Animation: Fix operator properties for redo panel

Animation: Fix operator properties for redo panel After the redo panel is added to animation editors in D14960, many operators must be adjusted to appear and function correctly. A full list of changes is tracked in T98195 --- This patch only includes actual usability fixes. It does not do any changes for the user's convenience, like adding other helpful properties to operators. This can be done in a follow-up patch. Reviewed By: sybren Maniphest Tasks: T98195 Differential Revision: https://developer.blender.org/D14977
Animation: Add redo panel to Dopesheet and NLA
2022-08-01 12:21:05 +02:00 · 2022-08-01 12:20:40 +02:00 · 2022-08-01 10:57:32 +02:00 · 2022-08-01 11:02:18 +10:00 · 2022-08-01 09:46:39 +12:00 · 2022-07-31 11:56:44 -05:00
1778 changed files with 78775 additions and 43242 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -265,6 +265,7 @@ ForEachMacros:
  - SET_SLOT_PROBING_BEGIN
  - MAP_SLOT_PROBING_BEGIN
  - VECTOR_SET_SLOT_PROBING_BEGIN
+  - WL_ARRAY_FOR_EACH

 StatementMacros:
  - PyObject_HEAD
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -222,6 +222,17 @@ if(UNIX AND NOT (APPLE OR HAIKU))

  option(WITH_GHOST_WAYLAND "Enable building Blender against Wayland for windowing (under development)" OFF)
  mark_as_advanced(WITH_GHOST_WAYLAND)
+
+  if (WITH_GHOST_WAYLAND)
+    option(WITH_GHOST_WAYLAND_LIBDECOR "Optionally build with LibDecor window decorations" OFF)
+    mark_as_advanced(WITH_GHOST_WAYLAND_LIBDECOR)
+
+    option(WITH_GHOST_WAYLAND_DBUS "Optionally build with DBUS support (used for Cursor themes). May hang on startup systems where DBUS is not used." OFF)
+    mark_as_advanced(WITH_GHOST_WAYLAND_DBUS)
+
+    option(WITH_GHOST_WAYLAND_DYNLOAD  "Enable runtime dynamic WAYLAND libraries loading" OFF)
+    mark_as_advanced(WITH_GHOST_WAYLAND_DYNLOAD)
+  endif()
 endif()

 if(WITH_GHOST_X11)
@@ -255,19 +266,11 @@ if(WITH_GHOST_X11)
 endif()

 if(UNIX AND NOT APPLE)
-  option(WITH_SYSTEM_GLEW "Use GLEW OpenGL wrapper library provided by the operating system" OFF)
-  option(WITH_SYSTEM_GLES "Use OpenGL ES library provided by the operating system"           ON)
  option(WITH_SYSTEM_FREETYPE "Use the freetype library provided by the operating system" OFF)
-else()
-  # not an option for other OS's
-  set(WITH_SYSTEM_GLEW OFF)
-  set(WITH_SYSTEM_GLES OFF)
-  set(WITH_SYSTEM_FREETYPE OFF)
-endif()
-
-
-if(UNIX AND NOT APPLE)
  option(WITH_SYSTEM_EIGEN3 "Use the systems Eigen3 library" OFF)
+else()
+  set(WITH_SYSTEM_FREETYPE OFF)
+  set(WITH_SYSTEM_EIGEN3 OFF)
 endif()


@@ -444,7 +447,7 @@ endif()
 if(NOT APPLE)
  option(WITH_CYCLES_DEVICE_HIP        "Enable Cycles AMD HIP support" ON)
  option(WITH_CYCLES_HIP_BINARIES      "Build Cycles AMD HIP binaries" OFF)
-  set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 CACHE STRING "AMD HIP architectures to build binaries for")
+  set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 CACHE STRING "AMD HIP architectures to build binaries for")
  mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
  mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
 endif()
@@ -454,6 +457,21 @@ if(APPLE)
  option(WITH_CYCLES_DEVICE_METAL       "Enable Cycles Apple Metal compute support" ON)
 endif()

+# oneAPI
+if(NOT APPLE)
+  option(WITH_CYCLES_DEVICE_ONEAPI "Enable Cycles oneAPI compute support" OFF)
+  option(WITH_CYCLES_ONEAPI_BINARIES "Enable Ahead-Of-Time compilation for Cycles oneAPI device" OFF)
+  option(WITH_CYCLES_ONEAPI_SYCL_HOST_ENABLED "Enable use of SYCL host (CPU) device execution by oneAPI implementation. This option is for debugging purposes and impacts GPU execution." OFF)
+
+  # https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-cpp-compiler-dev-guide-and-reference/top/compilation/ahead-of-time-compilation.html
+  SET (CYCLES_ONEAPI_SPIR64_GEN_DEVICES "dg2" CACHE STRING "oneAPI Intel GPU architectures to build binaries for")
+  SET (CYCLES_ONEAPI_SYCL_TARGETS spir64 spir64_gen CACHE STRING "oneAPI targets to build AOT binaries for")
+
+  mark_as_advanced(WITH_CYCLES_ONEAPI_SYCL_HOST_ENABLED)
+  mark_as_advanced(CYCLES_ONEAPI_SPIR64_GEN_DEVICES)
+  mark_as_advanced(CYCLES_ONEAPI_SYCL_TARGETS)
+endif()
+
 # Draw Manager
 option(WITH_DRAW_DEBUG "Add extra debug capabilities to Draw Manager" OFF)
 mark_as_advanced(WITH_DRAW_DEBUG)
@@ -518,20 +536,48 @@ endif()

 # OpenGL

+# Experimental EGL option.
+option(WITH_GL_EGL "Use the EGL OpenGL system library instead of the platform specific OpenGL system library (CGL, GLX or WGL)" OFF)
+mark_as_advanced(WITH_GL_EGL)
+
+if(WITH_GHOST_WAYLAND)
+  # Wayland can only use EGL to create OpenGL contexts, not GLX.
+  set(WITH_GL_EGL ON)
+endif()
+
+if(UNIX AND NOT APPLE)
+  if(WITH_GL_EGL)
+    # GLEW can only be built with either GLX or EGL support. Most binary distributions are
+    # built with GLX support and we have no automated way to detect this. So always build
+    # GLEW from source to be sure it has EGL support.
+    set(WITH_SYSTEM_GLEW OFF)
+  else()
+    option(WITH_SYSTEM_GLEW "Use GLEW OpenGL wrapper library provided by the operating system" OFF)
+  endif()
+
+  option(WITH_SYSTEM_GLES "Use OpenGL ES library provided by the operating system" ON)
+else()
+  # System GLEW and GLES not an option on other platforms.
+  set(WITH_SYSTEM_GLEW OFF)
+  set(WITH_SYSTEM_GLES OFF)
+endif()
+
 option(WITH_OPENGL              "When off limits visibility of the opengl headers to just bf_gpu and gawain (temporary option for development purposes)" ON)
 option(WITH_GLEW_ES             "Switches to experimental copy of GLEW that has support for OpenGL ES. (temporary option for development purposes)" OFF)
-option(WITH_GL_EGL              "Use the EGL OpenGL system library instead of the platform specific OpenGL system library (CGL, glX, or WGL)"       OFF)
 option(WITH_GL_PROFILE_ES20     "Support using OpenGL ES 2.0. (through either EGL or the AGL/WGL/XGL 'es20' profile)"                               OFF)
-option(WITH_GPU_SHADER_BUILDER  "Shader builder is a developer option enabling linting on GLSL during compilation"                                  OFF)
+option(WITH_GPU_BUILDTIME_SHADER_BUILDER  "Shader builder is a developer option enabling linting on GLSL during compilation"                                  OFF)

 mark_as_advanced(
  WITH_OPENGL
  WITH_GLEW_ES
-  WITH_GL_EGL
  WITH_GL_PROFILE_ES20
-  WITH_GPU_SHADER_BUILDER
+  WITH_GPU_BUILDTIME_SHADER_BUILDER
 )

+if(WITH_HEADLESS)
+  set(WITH_OPENGL OFF)
+endif()
+
 # Metal

 if (APPLE)
--- a/build_files/build_environment/CMakeLists.txt
+++ b/build_files/build_environment/CMakeLists.txt
@@ -29,10 +29,12 @@ cmake_minimum_required(VERSION 3.5)

 include(ExternalProject)
 include(cmake/check_software.cmake)
-include(cmake/versions.cmake)
 include(cmake/options.cmake)
+# versions.cmake needs to be included after options.cmake due to the BLENDER_PLATFORM_ARM variable being needed.
+include(cmake/versions.cmake)
 include(cmake/boost_build_options.cmake)
 include(cmake/download.cmake)
+include(cmake/macros.cmake)

 if(ENABLE_MINGW64)
  include(cmake/setup_mingw64.cmake)
@@ -96,6 +98,15 @@ include(cmake/fmt.cmake)
 include(cmake/robinmap.cmake)
 if(NOT APPLE)
  include(cmake/xr_openxr.cmake)
+  if(NOT WIN32 OR BUILD_MODE STREQUAL Release)
+    include(cmake/dpcpp.cmake)
+    include(cmake/dpcpp_deps.cmake)
+  endif()
+  if(NOT WIN32)
+    include(cmake/igc.cmake)
+    include(cmake/gmmlib.cmake)
+    include(cmake/ocloc.cmake)
+  endif()
 endif()

 # OpenColorIO and dependencies.
@@ -128,6 +139,7 @@ if(NOT WIN32 OR ENABLE_MINGW64)
    include(cmake/vpx.cmake)
    include(cmake/x264.cmake)
    include(cmake/xvidcore.cmake)
+    include(cmake/aom.cmake)
    include(cmake/ffmpeg.cmake)
    include(cmake/fftw.cmake)
    include(cmake/sndfile.cmake)
--- a/build_files/build_environment/cmake/alembic.cmake
+++ b/build_files/build_environment/cmake/alembic.cmake
@@ -42,4 +42,5 @@ endif()
 add_dependencies(
  external_alembic
  external_openexr
+  external_imath
 )
--- a/build_files/build_environment/cmake/aom.cmake
+++ b/build_files/build_environment/cmake/aom.cmake
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+if(WIN32)
+  # The default generator on windows is msbuild, which we do not
+  # want to use for this dep, as needs to build with mingw
+  set(AOM_GENERATOR "Ninja")
+  # The default flags are full of MSVC options given this will be
+  # building with mingw, it'll have an unhappy time with that and
+  # we need to clear them out.
+  set(AOM_CMAKE_FLAGS )
+  # CMake will correctly identify phreads being available, however
+  # we do not want to use them, as that gains a dependency on
+  # libpthreadswin.dll which we do not want. when pthreads is not
+  # available oam will use a pthreads emulation layer using win32 threads
+  set(AOM_EXTRA_ARGS_WIN32 -DCMAKE_HAVE_PTHREAD_H=OFF)
+else()
+  set(AOM_GENERATOR "Unix Makefiles")
+  set(AOM_CMAKE_FLAGS ${DEFAULT_CMAKE_FLAGS})
+endif()
+
+set(AOM_EXTRA_ARGS
+  -DENABLE_TESTDATA=OFF
+  -DENABLE_TESTS=OFF
+  -DENABLE_TOOLS=OFF
+  -DENABLE_EXAMPLES=OFF
+  ${AOM_EXTRA_ARGS_WIN32}
+)
+
+# This is slightly different from all other deps in the way that
+# aom uses cmake as a build system, but still needs the environment setup
+# to include perl so we manually setup the environment and call
+# cmake directly for the configure, build and install commands.
+
+ExternalProject_Add(external_aom
+  URL file://${PACKAGE_DIR}/${AOM_FILE}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  URL_HASH ${AOM_HASH_TYPE}=${AOM_HASH}
+  PREFIX ${BUILD_DIR}/aom
+  CONFIGURE_COMMAND ${CONFIGURE_ENV} &&
+    cd ${BUILD_DIR}/aom/src/external_aom-build/ &&
+    ${CMAKE_COMMAND} -G "${AOM_GENERATOR}" -DCMAKE_INSTALL_PREFIX=${LIBDIR}/aom ${AOM_CMAKE_FLAGS} ${AOM_EXTRA_ARGS} ${BUILD_DIR}/aom/src/external_aom/
+  BUILD_COMMAND ${CMAKE_COMMAND} --build .
+  INSTALL_COMMAND ${CMAKE_COMMAND} --build . --target install
+  INSTALL_DIR ${LIBDIR}/aom
+)
--- a/build_files/build_environment/cmake/check_software.cmake
+++ b/build_files/build_environment/cmake/check_software.cmake
@@ -56,10 +56,7 @@ if(UNIX)
      "On Debian and Ubuntu:\n"
      "  apt install autoconf automake libtool yasm tcl ninja-build meson python3-mako\n"
      "\n"
-      "On macOS Intel (with homebrew):\n"
-      "  brew install autoconf automake bison libtool pkg-config yasm\n"
-      "\n"
-      "On macOS ARM (with homebrew):\n"
+      "On macOS (with homebrew):\n"
      "  brew install autoconf automake bison flex libtool pkg-config yasm\n"
      "\n"
      "Other platforms:\n"
--- a/build_files/build_environment/cmake/download.cmake
+++ b/build_files/build_environment/cmake/download.cmake
@@ -101,3 +101,19 @@ download_source(ROBINMAP)
 download_source(IMATH)
 download_source(PYSTRING)
 download_source(LEVEL_ZERO)
+download_source(DPCPP)
+download_source(VCINTRINSICS)
+download_source(OPENCLHEADERS)
+download_source(ICDLOADER)
+download_source(MP11)
+download_source(SPIRV_HEADERS)
+download_source(IGC)
+download_source(IGC_LLVM)
+download_source(IGC_OPENCL_CLANG)
+download_source(IGC_VCINTRINSICS)
+download_source(IGC_SPIRV_HEADERS)
+download_source(IGC_SPIRV_TOOLS)
+download_source(IGC_SPIRV_TRANSLATOR)
+download_source(GMMLIB)
+download_source(OCLOC)
+download_source(AOM)
--- a/build_files/build_environment/cmake/dpcpp.cmake
+++ b/build_files/build_environment/cmake/dpcpp.cmake
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+
+if(WIN32)
+  set(LLVM_GENERATOR "Ninja")
+else()
+  set(LLVM_GENERATOR "Unix Makefiles")
+endif()
+
+set(DPCPP_CONFIGURE_ARGS
+  # When external deps dpcpp needs are not found it will automatically
+  # download the during the configure stage using FetchContent. Given
+  # we need to keep an archive of all source used during build for compliance
+  # reasons it CANNOT download anything we do not know about. By setting
+  # this property to ON, all downloads are disabled, and we will have to
+  # provide the missing deps some other way, a build error beats a compliance
+  # violation
+  --cmake-opt FETCHCONTENT_FULLY_DISCONNECTED=ON
+)
+set(DPCPP_SOURCE_ROOT ${BUILD_DIR}/dpcpp/src/external_dpcpp/)
+set(DPCPP_EXTRA_ARGS
+  # When external deps dpcpp needs are not found it will automatically
+  # download the during the configure stage using FetchContent. Given
+  # we need to keep an archive of all source used during build for compliance
+  # reasons it CANNOT download anything we do not know about. By setting
+  # this property to ON, all downloads are disabled, and we will have to
+  # provide the missing deps some other way, a build or configure error
+  # beats a compliance violation
+  -DFETCHCONTENT_FULLY_DISCONNECTED=ON
+  -DLLVMGenXIntrinsics_SOURCE_DIR=${BUILD_DIR}/vcintrinsics/src/external_vcintrinsics/
+  -DOpenCL_HEADERS=file://${PACKAGE_DIR}/${OPENCLHEADERS_FILE}
+  -DOpenCL_LIBRARY_SRC=file://${PACKAGE_DIR}/${ICDLOADER_FILE}
+  -DBOOST_MP11_SOURCE_DIR=${BUILD_DIR}/mp11/src/external_mp11/
+  -DLEVEL_ZERO_LIBRARY=${LIBDIR}/level-zero/lib/${LIBPREFIX}ze_loader${SHAREDLIBEXT}
+  -DLEVEL_ZERO_INCLUDE_DIR=${LIBDIR}/level-zero/include
+  -DLLVM_EXTERNAL_SPIRV_HEADERS_SOURCE_DIR=${BUILD_DIR}/spirvheaders/src/external_spirvheaders/
+  # Below here is copied from an invocation of buildbot/config.py
+  -DLLVM_ENABLE_ASSERTIONS=ON
+  -DLLVM_TARGETS_TO_BUILD=X86
+  -DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
+  -DLLVM_EXTERNAL_SYCL_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/sycl
+  -DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/llvm-spirv
+  -DLLVM_EXTERNAL_XPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
+  -DXPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
+  -DLLVM_EXTERNAL_XPTIFW_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xptifw
+  -DLLVM_EXTERNAL_LIBDEVICE_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/libdevice
+  -DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
+  -DLIBCLC_TARGETS_TO_BUILD=
+  -DLIBCLC_GENERATE_REMANGLED_VARIANTS=OFF
+  -DSYCL_BUILD_PI_HIP_PLATFORM=AMD
+  -DLLVM_BUILD_TOOLS=ON
+  -DSYCL_ENABLE_WERROR=OFF
+  -DSYCL_INCLUDE_TESTS=ON
+  -DLLVM_ENABLE_DOXYGEN=OFF
+  -DLLVM_ENABLE_SPHINX=OFF
+  -DBUILD_SHARED_LIBS=OFF
+  -DSYCL_ENABLE_XPTI_TRACING=ON
+  -DLLVM_ENABLE_LLD=OFF
+  -DXPTI_ENABLE_WERROR=OFF
+  -DSYCL_CLANG_EXTRA_FLAGS=
+  -DSYCL_ENABLE_PLUGINS=level_zero
+  -DCMAKE_INSTALL_RPATH=\$ORIGIN
+  -DPython3_ROOT_DIR=${LIBDIR}/python/
+  -DPython3_EXECUTABLE=${PYTHON_BINARY}
+  -DPYTHON_EXECUTABLE=${PYTHON_BINARY}
+  -DLLDB_ENABLE_CURSES=OFF
+  -DLLVM_ENABLE_TERMINFO=OFF
+)
+
+if(WIN32)
+   list(APPEND DPCPP_EXTRA_ARGS -DPython3_FIND_REGISTRY=NEVER)
+endif()
+
+ExternalProject_Add(external_dpcpp
+  URL file://${PACKAGE_DIR}/${DPCPP_FILE}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  URL_HASH ${DPCPP_HASH_TYPE}=${DPCPP_HASH}
+  PREFIX ${BUILD_DIR}/dpcpp
+  CMAKE_GENERATOR ${LLVM_GENERATOR}
+  SOURCE_SUBDIR llvm
+  LIST_SEPARATOR ^^
+  CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/dpcpp ${DEFAULT_CMAKE_FLAGS} ${DPCPP_EXTRA_ARGS}
+  #CONFIGURE_COMMAND ${PYTHON_BINARY} ${BUILD_DIR}/dpcpp/src/external_dpcpp/buildbot/configure.py ${DPCPP_CONFIGURE_ARGS}
+  #BUILD_COMMAND echo "." #${PYTHON_BINARY} ${BUILD_DIR}/dpcpp/src/external_dpcpp/buildbot/compile.py
+  INSTALL_COMMAND ${CMAKE_COMMAND} --build . -- deploy-sycl-toolchain
+  PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/dpcpp/src/external_dpcpp < ${PATCH_DIR}/dpcpp.diff
+  INSTALL_DIR ${LIBDIR}/dpcpp
+)
+
+add_dependencies(
+  external_dpcpp
+  external_python
+  external_python_site_packages
+  external_vcintrinsics
+  external_openclheaders
+  external_icdloader
+  external_mp11
+  external_level-zero
+  external_spirvheaders
+)
+
+if(BUILD_MODE STREQUAL Release AND WIN32)
+  ExternalProject_Add_Step(external_dpcpp after_install
+      COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cl.exe
+      COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cpp.exe
+      COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang.exe
+      COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/dpcpp ${HARVEST_TARGET}/dpcpp
+  )
+endif()
--- a/build_files/build_environment/cmake/dpcpp_deps.cmake
+++ b/build_files/build_environment/cmake/dpcpp_deps.cmake
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+# These are build time requirements for dpcpp
+# We only have to unpack these dpcpp will build
+# them.
+
+ExternalProject_Add(external_vcintrinsics
+  URL file://${PACKAGE_DIR}/${VCINTRINSICS_FILE}
+  URL_HASH ${VCINTRINSICS_HASH_TYPE}=${VCINTRINSICS_HASH}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  PREFIX ${BUILD_DIR}/vcintrinsics
+  CONFIGURE_COMMAND echo .
+  BUILD_COMMAND echo .
+  INSTALL_COMMAND echo .
+)
+
+# opencl headers do not have to be unpacked, dpcpp will do it
+# but it wouldn't hurt to do it anyway as an opertunity to validate
+# the hash is correct.
+ExternalProject_Add(external_openclheaders
+  URL file://${PACKAGE_DIR}/${OPENCLHEADERS_FILE}
+  URL_HASH ${OPENCLHEADERS_HASH_TYPE}=${OPENCLHEADERS_HASH}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  PREFIX ${BUILD_DIR}/openclheaders
+  CONFIGURE_COMMAND echo .
+  BUILD_COMMAND echo .
+  INSTALL_COMMAND echo .
+)
+
+# icdloader does not have to be unpacked, dpcpp will do it
+# but it wouldn't hurt to do it anyway as an opertunity to validate
+# the hash is correct.
+ExternalProject_Add(external_icdloader
+  URL file://${PACKAGE_DIR}/${ICDLOADER_FILE}
+  URL_HASH ${ICDLOADER_HASH_TYPE}=${ICDLOADER_HASH}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  PREFIX ${BUILD_DIR}/icdloader
+  CONFIGURE_COMMAND echo .
+  BUILD_COMMAND echo .
+  INSTALL_COMMAND echo .
+)
+
+ExternalProject_Add(external_mp11
+  URL file://${PACKAGE_DIR}/${MP11_FILE}
+  URL_HASH ${MP11_HASH_TYPE}=${MP11_HASH}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  PREFIX ${BUILD_DIR}/mp11
+  CONFIGURE_COMMAND echo .
+  BUILD_COMMAND echo .
+  INSTALL_COMMAND echo .
+)
+
+ExternalProject_Add(external_spirvheaders
+  URL file://${PACKAGE_DIR}/${SPIRV_HEADERS_FILE}
+  URL_HASH ${SPIRV_HEADERS_HASH_TYPE}=${SPIRV_HEADERS_HASH}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  PREFIX ${BUILD_DIR}/spirvheaders
+  CONFIGURE_COMMAND echo .
+  BUILD_COMMAND echo .
+  INSTALL_COMMAND echo .
+)
--- a/build_files/build_environment/cmake/embree.cmake
+++ b/build_files/build_environment/cmake/embree.cmake
@@ -10,18 +10,12 @@ set(EMBREE_EXTRA_ARGS
  -DEMBREE_RAY_MASK=ON
  -DEMBREE_FILTER_FUNCTION=ON
  -DEMBREE_BACKFACE_CULLING=OFF
-  -DEMBREE_MAX_ISA=AVX2
  -DEMBREE_TASKING_SYSTEM=TBB
  -DEMBREE_TBB_ROOT=${LIBDIR}/tbb
  -DTBB_ROOT=${LIBDIR}/tbb
-  -DTBB_STATIC_LIB=${TBB_STATIC_LIBRARY}
 )

-if(BLENDER_PLATFORM_ARM)
-  set(EMBREE_EXTRA_ARGS
-    ${EMBREE_EXTRA_ARGS}
-    -DEMBREE_MAX_ISA=NEON)
-else()
+if (NOT BLENDER_PLATFORM_ARM)
  set(EMBREE_EXTRA_ARGS
    ${EMBREE_EXTRA_ARGS}
    -DEMBREE_MAX_ISA=AVX2)
@@ -30,23 +24,10 @@ endif()
 if(TBB_STATIC_LIBRARY)
  set(EMBREE_EXTRA_ARGS
    ${EMBREE_EXTRA_ARGS}
-    -DEMBREE_TBB_LIBRARY_NAME=tbb_static
-    -DEMBREE_TBBMALLOC_LIBRARY_NAME=tbbmalloc_static
+    -DEMBREE_TBB_COMPONENT=tbb_static
  )
 endif()

-if(WIN32)
-  set(EMBREE_BUILD_DIR ${BUILD_MODE}/)
-  if(BUILD_MODE STREQUAL Debug)
-    list(APPEND EMBREE_EXTRA_ARGS
-     -DEMBREE_TBBMALLOC_LIBRARY_NAME=tbbmalloc_debug
-     -DEMBREE_TBB_LIBRARY_NAME=tbb_debug
-    )
-  endif()
-else()
-  set(EMBREE_BUILD_DIR)
-endif()
-
 ExternalProject_Add(external_embree
  URL file://${PACKAGE_DIR}/${EMBREE_FILE}
  DOWNLOAD_DIR ${DOWNLOAD_DIR}
--- a/build_files/build_environment/cmake/ffmpeg.cmake
+++ b/build_files/build_environment/cmake/ffmpeg.cmake
@@ -1,9 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0-or-later

-set(FFMPEG_CFLAGS "-I${mingw_LIBDIR}/lame/include -I${mingw_LIBDIR}/openjpeg/include/ -I${mingw_LIBDIR}/ogg/include -I${mingw_LIBDIR}/vorbis/include -I${mingw_LIBDIR}/theora/include -I${mingw_LIBDIR}/opus/include -I${mingw_LIBDIR}/vpx/include -I${mingw_LIBDIR}/x264/include -I${mingw_LIBDIR}/xvidcore/include -I${mingw_LIBDIR}/zlib/include")
-set(FFMPEG_LDFLAGS "-L${mingw_LIBDIR}/lame/lib -L${mingw_LIBDIR}/openjpeg/lib -L${mingw_LIBDIR}/ogg/lib -L${mingw_LIBDIR}/vorbis/lib -L${mingw_LIBDIR}/theora/lib -L${mingw_LIBDIR}/opus/lib -L${mingw_LIBDIR}/vpx/lib -L${mingw_LIBDIR}/x264/lib -L${mingw_LIBDIR}/xvidcore/lib -L${mingw_LIBDIR}/zlib/lib")
+set(FFMPEG_CFLAGS "-I${mingw_LIBDIR}/lame/include -I${mingw_LIBDIR}/openjpeg/include/ -I${mingw_LIBDIR}/ogg/include -I${mingw_LIBDIR}/vorbis/include -I${mingw_LIBDIR}/theora/include -I${mingw_LIBDIR}/opus/include -I${mingw_LIBDIR}/vpx/include -I${mingw_LIBDIR}/x264/include -I${mingw_LIBDIR}/xvidcore/include -I${mingw_LIBDIR}/zlib/include -I${mingw_LIBDIR}/aom/include")
+set(FFMPEG_LDFLAGS "-L${mingw_LIBDIR}/lame/lib -L${mingw_LIBDIR}/openjpeg/lib -L${mingw_LIBDIR}/ogg/lib -L${mingw_LIBDIR}/vorbis/lib -L${mingw_LIBDIR}/theora/lib -L${mingw_LIBDIR}/opus/lib -L${mingw_LIBDIR}/vpx/lib -L${mingw_LIBDIR}/x264/lib -L${mingw_LIBDIR}/xvidcore/lib -L${mingw_LIBDIR}/zlib/lib -L${mingw_LIBDIR}/aom/lib")
 set(FFMPEG_EXTRA_FLAGS --pkg-config-flags=--static --extra-cflags=${FFMPEG_CFLAGS} --extra-ldflags=${FFMPEG_LDFLAGS})
-set(FFMPEG_ENV PKG_CONFIG_PATH=${mingw_LIBDIR}/openjpeg/lib/pkgconfig:${mingw_LIBDIR}/x264/lib/pkgconfig:${mingw_LIBDIR}/vorbis/lib/pkgconfig:${mingw_LIBDIR}/ogg/lib/pkgconfig:${mingw_LIBDIR}:${mingw_LIBDIR}/vpx/lib/pkgconfig:${mingw_LIBDIR}/theora/lib/pkgconfig:${mingw_LIBDIR}/openjpeg/lib/pkgconfig:${mingw_LIBDIR}/opus/lib/pkgconfig:)
+set(FFMPEG_ENV PKG_CONFIG_PATH=${mingw_LIBDIR}/openjpeg/lib/pkgconfig:${mingw_LIBDIR}/x264/lib/pkgconfig:${mingw_LIBDIR}/vorbis/lib/pkgconfig:${mingw_LIBDIR}/ogg/lib/pkgconfig:${mingw_LIBDIR}:${mingw_LIBDIR}/vpx/lib/pkgconfig:${mingw_LIBDIR}/theora/lib/pkgconfig:${mingw_LIBDIR}/openjpeg/lib/pkgconfig:${mingw_LIBDIR}/opus/lib/pkgconfig:${mingw_LIBDIR}/aom/lib/pkgconfig:)

 if(WIN32)
  set(FFMPEG_ENV set ${FFMPEG_ENV} &&)
@@ -79,6 +79,7 @@ ExternalProject_Add(external_ffmpeg
    --disable-librtmp
    --enable-libx264
    --enable-libxvid
+    --enable-libaom
    --disable-libopencore-amrnb
    --disable-libopencore-amrwb
    --disable-libdc1394
@@ -125,6 +126,7 @@ add_dependencies(
  external_vorbis
  external_ogg
  external_lame
+  external_aom
 )
 if(WIN32)
  add_dependencies(
--- a/build_files/build_environment/cmake/gmmlib.cmake
+++ b/build_files/build_environment/cmake/gmmlib.cmake
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+set(GMMLIB_EXTRA_ARGS
+)
+
+ExternalProject_Add(external_gmmlib
+  URL file://${PACKAGE_DIR}/${GMMLIB_FILE}
+  URL_HASH ${GMMLIB_HASH_TYPE}=${GMMLIB_HASH}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  PREFIX ${BUILD_DIR}/gmmlib
+  CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/gmmlib ${DEFAULT_CMAKE_FLAGS} ${GMMLIB_EXTRA_ARGS}
+  INSTALL_DIR ${LIBDIR}/gmmlib
+)
--- a/build_files/build_environment/cmake/harvest.cmake
+++ b/build_files/build_environment/cmake/harvest.cmake
@@ -25,9 +25,6 @@ if(BUILD_MODE STREQUAL Release)
        # glew-> opengl
        ${CMAKE_COMMAND} -E copy ${LIBDIR}/glew/lib/libglew32.lib ${HARVEST_TARGET}/opengl/lib/glew.lib &&
        ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/glew/include/ ${HARVEST_TARGET}/opengl/include/ &&
-        # tiff
-        ${CMAKE_COMMAND} -E copy ${LIBDIR}/tiff/lib/tiff.lib ${HARVEST_TARGET}/tiff/lib/libtiff.lib &&
-        ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/tiff/include/ ${HARVEST_TARGET}/tiff/include/
    DEPENDS
  )
 endif()
@@ -177,6 +174,7 @@ harvest(opus/lib ffmpeg/lib "*.a")
 harvest(vpx/lib ffmpeg/lib "*.a")
 harvest(x264/lib ffmpeg/lib "*.a")
 harvest(xvidcore/lib ffmpeg/lib "*.a")
+harvest(aom/lib ffmpeg/lib "*.a")
 harvest(webp/lib webp/lib "*.a")
 harvest(webp/include webp/include "*.h")
 harvest(usd/include usd/include "*.h")
@@ -192,6 +190,10 @@ harvest(zstd/lib zstd/lib "*.a")
 if(UNIX AND NOT APPLE)
  harvest(libglu/lib mesa/lib "*.so*")
  harvest(mesa/lib64 mesa/lib "*.so*")
-endif()
+
+  harvest(dpcpp dpcpp "*")
+  harvest(igc dpcpp/lib/igc "*")
+  harvest(ocloc dpcpp/lib/ocloc "*")
+ endif()

 endif()
--- a/build_files/build_environment/cmake/igc.cmake
+++ b/build_files/build_environment/cmake/igc.cmake
@@ -0,0 +1,126 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+unpack_only(igc_vcintrinsics)
+unpack_only(igc_spirv_headers)
+unpack_only(igc_spirv_tools)
+
+#
+# igc_opencl_clang contains patches that need to be applied
+# to external_igc_llvm and igc_spirv_translator, we unpack
+# igc_opencl_clang first, then have the patch stages of
+# external_igc_llvm and igc_spirv_translator apply them.
+#
+
+ExternalProject_Add(external_igc_opencl_clang
+  URL file://${PACKAGE_DIR}/${IGC_OPENCL_CLANG_FILE}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  URL_HASH ${IGC_OPENCL_CLANG_HASH_TYPE}=${IGC_OPENCL_CLANG_HASH}
+  PREFIX ${BUILD_DIR}/igc_opencl_clang
+  CONFIGURE_COMMAND echo .
+  BUILD_COMMAND echo .
+  INSTALL_COMMAND echo .
+  PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/igc_opencl_clang/src/external_igc_opencl_clang/ < ${PATCH_DIR}/igc_opencl_clang.diff
+)
+
+set(IGC_OPENCL_CLANG_PATCH_DIR ${BUILD_DIR}/igc_opencl_clang/src/external_igc_opencl_clang/patches)
+set(IGC_LLVM_SOURCE_DIR ${BUILD_DIR}/igc_llvm/src/external_igc_llvm)
+set(IGC_SPIRV_TRANSLATOR_SOURCE_DIR ${BUILD_DIR}/igc_spirv_translator/src/external_igc_spirv_translator)
+
+ExternalProject_Add(external_igc_llvm
+  URL file://${PACKAGE_DIR}/${IGC_LLVM_FILE}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  URL_HASH ${IGC_LLVM_HASH_TYPE}=${IGC_LLVM_HASH}
+  PREFIX ${BUILD_DIR}/igc_llvm
+  CONFIGURE_COMMAND echo .
+  BUILD_COMMAND echo .
+  INSTALL_COMMAND echo .
+  PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0001-OpenCL-3.0-support.patch &&
+    ${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0002-Remove-__IMAGE_SUPPORT__-macro-for-SPIR.patch &&
+    ${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0003-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch &&
+    ${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0004-OpenCL-support-cl_ext_float_atomics.patch &&
+    ${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0005-OpenCL-Add-cl_khr_integer_dot_product.patch &&
+    ${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch &&
+    ${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch
+)
+add_dependencies(
+  external_igc_llvm
+  external_igc_opencl_clang
+)
+
+ExternalProject_Add(external_igc_spirv_translator
+  URL file://${PACKAGE_DIR}/${IGC_SPIRV_TRANSLATOR_FILE}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  URL_HASH ${IGC_SPIRV_TRANSLATOR_HASH_TYPE}=${IGC_SPIRV_TRANSLATOR_HASH}
+  PREFIX ${BUILD_DIR}/igc_spirv_translator
+  CONFIGURE_COMMAND echo .
+  BUILD_COMMAND echo .
+  INSTALL_COMMAND echo .
+  PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0001-update-SPIR-V-headers-for-SPV_INTEL_split_barrier.patch &&
+    ${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0002-Add-support-for-split-barriers-extension-SPV_INTEL_s.patch &&
+    ${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0003-Support-cl_bf16_conversions.patch
+)
+add_dependencies(
+  external_igc_spirv_translator
+  external_igc_opencl_clang
+)
+
+if(WIN32)
+  set(IGC_GENERATOR "Ninja")
+  set(IGC_TARGET Windows64)
+else()
+  set(IGC_GENERATOR "Unix Makefiles")
+  set(IGC_TARGET Linux64)
+endif()
+
+set(IGC_EXTRA_ARGS
+  -DIGC_OPTION__ARCHITECTURE_TARGET=${IGC_TARGET}
+  -DIGC_OPTION__ARCHITECTURE_HOST=${IGC_TARGET}
+)
+
+if(UNIX AND NOT APPLE)
+  list(APPEND IGC_EXTRA_ARGS
+       -DFLEX_EXECUTABLE=${LIBDIR}/flex/bin/flex
+       -DFLEX_INCLUDE_DIR=${LIBDIR}/flex/include
+  )
+endif()
+
+ExternalProject_Add(external_igc
+  URL file://${PACKAGE_DIR}/${IGC_FILE}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  URL_HASH ${IGC_HASH_TYPE}=${IGC_HASH}
+  CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/igc ${DEFAULT_CMAKE_FLAGS} ${IGC_EXTRA_ARGS}
+
+  # IGC is pretty set in its way where sub projects ought to live, for some it offers
+  # hooks to supply alternatives folders, other are just hardocded with no way to configure
+  # we symlink everything here, since it's less work than trying to convince the cmake
+  # scripts to accept alternative locations.
+  #
+  PATCH_COMMAND ${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_llvm/src/external_igc_llvm/ ${BUILD_DIR}/igc/src/llvm-project &&
+    ${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_opencl_clang/src/external_igc_opencl_clang/ ${BUILD_DIR}/igc/src/llvm-project/llvm/projects/opencl-clang &&
+    ${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_spirv_translator/src/external_igc_spirv_translator/ ${BUILD_DIR}/igc/src/llvm-project/llvm/projects/llvm-spirv &&
+    ${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_spirv_tools/src/external_igc_spirv_tools/ ${BUILD_DIR}/igc/src/SPIRV-Tools &&
+    ${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_spirv_headers/src/external_igc_spirv_headers/ ${BUILD_DIR}/igc/src/SPIRV-Headers &&
+    ${CMAKE_COMMAND} -E create_symlink ${BUILD_DIR}/igc_vcintrinsics/src/external_igc_vcintrinsics/ ${BUILD_DIR}/igc/src/vc-intrinsics
+  PREFIX ${BUILD_DIR}/igc
+  INSTALL_DIR ${LIBDIR}/igc
+  INSTALL_COMMAND ${CMAKE_COMMAND} --install . --strip
+  CMAKE_GENERATOR ${IGC_GENERATOR}
+)
+
+add_dependencies(
+  external_igc
+  external_igc_vcintrinsics
+  external_igc_llvm
+  external_igc_opencl_clang
+  external_igc_vcintrinsics
+  external_igc_spirv_headers
+  external_igc_spirv_tools
+  external_igc_spirv_translator
+)
+
+if(UNIX AND NOT APPLE)
+  add_dependencies(
+    external_igc
+    external_flex
+  )
+endif()
--- a/build_files/build_environment/cmake/ispc.cmake
+++ b/build_files/build_environment/cmake/ispc.cmake
@@ -28,7 +28,7 @@ elseif(UNIX)
  set(ISPC_EXTRA_ARGS_UNIX
    -DCMAKE_C_COMPILER=${LIBDIR}/llvm/bin/clang
    -DCMAKE_CXX_COMPILER=${LIBDIR}/llvm/bin/clang++
-    -DARM_ENABLED=Off
+    -DARM_ENABLED=${BLENDER_PLATFORM_ARM}
    -DFLEX_EXECUTABLE=${LIBDIR}/flex/bin/flex
  )
 endif()
--- a/build_files/build_environment/cmake/llvm.cmake
+++ b/build_files/build_environment/cmake/llvm.cmake
@@ -82,4 +82,3 @@ add_dependencies(
  ll
  external_python
 )
-
--- a/build_files/build_environment/cmake/macros.cmake
+++ b/build_files/build_environment/cmake/macros.cmake
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+# shorthand to only unpack a certain dependency
+macro(unpack_only name)
+  string(TOUPPER ${name} UPPER_NAME)
+  set(TARGET_FILE ${${UPPER_NAME}_FILE})
+  set(TARGET_HASH_TYPE ${${UPPER_NAME}_HASH_TYPE})
+  set(TARGET_HASH ${${UPPER_NAME}_HASH})
+  ExternalProject_Add(external_${name}
+    URL file://${PACKAGE_DIR}/${TARGET_FILE}
+    URL_HASH ${TARGET_HASH_TYPE}=${TARGET_HASH}
+    DOWNLOAD_DIR ${DOWNLOAD_DIR}
+    PREFIX ${BUILD_DIR}/${name}
+    CONFIGURE_COMMAND echo .
+    BUILD_COMMAND echo .
+    INSTALL_COMMAND echo .
+  )
+endmacro()
--- a/build_files/build_environment/cmake/ocloc.cmake
+++ b/build_files/build_environment/cmake/ocloc.cmake
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+set(OCLOC_EXTRA_ARGS
+  -DNEO_SKIP_UNIT_TESTS=1
+  -DNEO_BUILD_WITH_OCL=0
+  -DBUILD_WITH_L0=0
+  -DIGC_DIR=${LIBDIR}/igc
+  -DGMM_DIR=${LIBDIR}/gmmlib
+)
+
+ExternalProject_Add(external_ocloc
+  URL file://${PACKAGE_DIR}/${OCLOC_FILE}
+  URL_HASH ${OCLOC_HASH_TYPE}=${OCLOC_HASH}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  PREFIX ${BUILD_DIR}/ocloc
+  CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/ocloc ${DEFAULT_CMAKE_FLAGS} ${OCLOC_EXTRA_ARGS}
+  INSTALL_DIR ${LIBDIR}/ocloc
+)
+
+add_dependencies(
+  external_ocloc
+  external_igc
+  external_gmmlib
+)
--- a/build_files/build_environment/cmake/openimageio.cmake
+++ b/build_files/build_environment/cmake/openimageio.cmake
@@ -18,9 +18,15 @@ if(WIN32)
  set(PNG_LIBNAME libpng16_static${LIBEXT})
  set(OIIO_SIMD_FLAGS -DUSE_SIMD=sse2)
  set(OPENJPEG_POSTFIX _msvc)
+  if(BUILD_MODE STREQUAL Debug)
+    set(TIFF_POSTFIX d)
+  else()
+    set(TIFF_POSTFIX)
+  endif()
 else()
  set(PNG_LIBNAME libpng${LIBEXT})
  set(OIIO_SIMD_FLAGS)
+  set(TIFF_POSTFIX)
 endif()

 if(MSVC)
@@ -65,7 +71,7 @@ set(OPENIMAGEIO_EXTRA_ARGS
  -DZLIB_INCLUDE_DIR=${LIBDIR}/zlib/include
  -DPNG_LIBRARY=${LIBDIR}/png/lib/${PNG_LIBNAME}
  -DPNG_PNG_INCLUDE_DIR=${LIBDIR}/png/include
-  -DTIFF_LIBRARY=${LIBDIR}/tiff/lib/${LIBPREFIX}tiff${LIBEXT}
+  -DTIFF_LIBRARY=${LIBDIR}/tiff/lib/${LIBPREFIX}tiff${TIFF_POSTFIX}${LIBEXT}
  -DTIFF_INCLUDE_DIR=${LIBDIR}/tiff/include
  -DJPEG_LIBRARY=${LIBDIR}/jpeg/lib/${JPEG_LIBRARY}
  -DJPEG_INCLUDE_DIR=${LIBDIR}/jpeg/include
--- a/build_files/build_environment/cmake/options.cmake
+++ b/build_files/build_environment/cmake/options.cmake
@@ -38,6 +38,7 @@ message("BUILD_DIR = ${BUILD_DIR}")
 if(WIN32)
  set(PATCH_CMD ${DOWNLOAD_DIR}/mingw/mingw64/msys/1.0/bin/patch.exe)
  set(LIBEXT ".lib")
+  set(SHAREDLIBEXT ".lib")
  set(LIBPREFIX "")

  # For OIIO and OSL
@@ -96,6 +97,7 @@ if(WIN32)
 else()
  set(PATCH_CMD patch)
  set(LIBEXT ".a")
+  set(SHAREDLIBEXT ".so")
  set(LIBPREFIX "lib")

  if(APPLE)
--- a/build_files/build_environment/cmake/tiff.cmake
+++ b/build_files/build_environment/cmake/tiff.cmake
@@ -3,6 +3,8 @@
 set(TIFF_EXTRA_ARGS
  -DZLIB_LIBRARY=${LIBDIR}/zlib/lib/${ZLIB_LIBRARY}
  -DZLIB_INCLUDE_DIR=${LIBDIR}/zlib/include
+  -DJPEG_LIBRARY=${LIBDIR}/jpeg/lib/${JPEG_LIBRARY}
+  -DJPEG_INCLUDE_DIR=${LIBDIR}/jpeg/include
  -DPNG_STATIC=ON
  -DBUILD_SHARED_LIBS=OFF
  -Dlzma=OFF
@@ -24,10 +26,12 @@ add_dependencies(
  external_tiff
  external_zlib
 )
-
-if(WIN32 AND BUILD_MODE STREQUAL Debug)
-  ExternalProject_Add_Step(external_tiff after_install
-    COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/tiff/lib/tiffd${LIBEXT} ${LIBDIR}/tiff/lib/tiff${LIBEXT}
-    DEPENDEES install
-  )
+if(WIN32)
+  if(BUILD_MODE STREQUAL Release)
+    ExternalProject_Add_Step(external_tiff after_install
+      COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/tiff/lib/tiff.lib ${HARVEST_TARGET}/tiff/lib/libtiff.lib &&
+              ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/tiff/include/ ${HARVEST_TARGET}/tiff/include/
+      DEPENDEES install
+    )
+  endif()
 endif()
--- a/build_files/build_environment/cmake/versions.cmake
+++ b/build_files/build_environment/cmake/versions.cmake
@@ -45,15 +45,15 @@ set(PTHREADS_HASH f3bf81bb395840b3446197bcf4ecd653)
 set(PTHREADS_HASH_TYPE MD5)
 set(PTHREADS_FILE pthreads4w-code-${PTHREADS_VERSION}.zip)

-set(OPENEXR_VERSION 3.1.4)
+set(OPENEXR_VERSION 3.1.5)
 set(OPENEXR_URI https://github.com/AcademySoftwareFoundation/openexr/archive/v${OPENEXR_VERSION}.tar.gz)
-set(OPENEXR_HASH e990be1ff765797bc2d93a8060e1c1f2)
+set(OPENEXR_HASH a92f38eedd43e56c0af56d4852506886)
 set(OPENEXR_HASH_TYPE MD5)
 set(OPENEXR_FILE openexr-${OPENEXR_VERSION}.tar.gz)

-set(IMATH_VERSION 3.1.4)
+set(IMATH_VERSION 3.1.5)
 set(IMATH_URI https://github.com/AcademySoftwareFoundation/Imath/archive/v${OPENEXR_VERSION}.tar.gz)
-set(IMATH_HASH fddf14ec73e12c34e74c3c175e311a3f)
+set(IMATH_HASH dd375574276c54872b7b3d54053baff0)
 set(IMATH_HASH_TYPE MD5)
 set(IMATH_FILE imath-${IMATH_VERSION}.tar.gz)

@@ -147,7 +147,7 @@ set(OPENIMAGEIO_HASH de45fb38501c4581062b522b53b6141c)
 set(OPENIMAGEIO_HASH_TYPE MD5)
 set(OPENIMAGEIO_FILE OpenImageIO-${OPENIMAGEIO_VERSION}.tar.gz)

-# 8.0.0 is currently oiio's preferred vesion although never versions may be available.
+# 8.0.0 is currently oiio's preferred version although never versions may be available.
 # the preferred version can be found in oiio's externalpackages.cmake
 set(FMT_VERSION 8.0.0)
 set(FMT_URI https://github.com/fmtlib/fmt/archive/refs/tags/${FMT_VERSION}.tar.gz)
@@ -155,7 +155,7 @@ set(FMT_HASH 7bce0e9e022e586b178b150002e7c2339994e3c2bbe44027e9abb0d60f9cce83)
 set(FMT_HASH_TYPE SHA256)
 set(FMT_FILE fmt-${FMT_VERSION}.tar.gz)

-# 0.6.2 is currently oiio's preferred vesion although never versions may be available.
+# 0.6.2 is currently oiio's preferred version although never versions may be available.
 # the preferred version can be found in oiio's externalpackages.cmake
 set(ROBINMAP_VERSION v0.6.2)
 set(ROBINMAP_URI https://github.com/Tessil/robin-map/archive/refs/tags/${ROBINMAP_VERSION}.tar.gz)
@@ -163,9 +163,9 @@ set(ROBINMAP_HASH c08ec4b1bf1c85eb0d6432244a6a89862229da1cb834f3f90fba8dc35d8c8e
 set(ROBINMAP_HASH_TYPE SHA256)
 set(ROBINMAP_FILE robinmap-${ROBINMAP_VERSION}.tar.gz)

-set(TIFF_VERSION 4.3.0)
+set(TIFF_VERSION 4.4.0)
 set(TIFF_URI http://download.osgeo.org/libtiff/tiff-${TIFF_VERSION}.tar.gz)
-set(TIFF_HASH 0a2e4744d1426a8fc8211c0cdbc3a1b3)
+set(TIFF_HASH 376f17f189e9d02280dfe709b2b2bbea)
 set(TIFF_HASH_TYPE MD5)
 set(TIFF_FILE tiff-${TIFF_VERSION}.tar.gz)

@@ -410,9 +410,9 @@ set(SQLITE_HASH fb558c49ee21a837713c4f1e7e413309aabdd9c7)
 set(SQLITE_HASH_TYPE SHA1)
 set(SQLITE_FILE sqlite-src-3240000.zip)

-set(EMBREE_VERSION 3.13.3)
+set(EMBREE_VERSION 3.13.4)
 set(EMBREE_URI https://github.com/embree/embree/archive/v${EMBREE_VERSION}.zip)
-set(EMBREE_HASH f62766ba54e48a2f327c3a22596e7133)
+set(EMBREE_HASH 52d0be294d6c88ba7a6c9e046796e7be)
 set(EMBREE_HASH_TYPE MD5)
 set(EMBREE_FILE embree-v${EMBREE_VERSION}.zip)

@@ -502,3 +502,140 @@ set(LEVEL_ZERO_URI https://github.com/oneapi-src/level-zero/archive/refs/tags/${
 set(LEVEL_ZERO_HASH c39bb05a8e5898aa6c444e1704105b93d3f1888b9c333f8e7e73825ffbfb2617)
 set(LEVEL_ZERO_HASH_TYPE SHA256)
 set(LEVEL_ZERO_FILE level-zero-${LEVEL_ZERO_VERSION}.tar.gz)
+
+set(DPCPP_VERSION 20220620)
+set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/sycl-nightly/${DPCPP_VERSION}.tar.gz)
+set(DPCPP_HASH a5f41abd5229d28afa92cbd8a5d8d786ee698bf239f722929fd686276bad692c)
+set(DPCPP_HASH_TYPE SHA256)
+set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
+
+########################
+### DPCPP DEPS BEGIN ###
+########################
+# The following deps are build time requirements for dpcpp, when possible
+# the source in the dpcpp source tree for the version chosen is documented
+# by each dep, these will only have to be downloaded and unpacked, dpcpp
+# will take care of building them, unpack is being done in dpcpp_deps.cmake
+
+# Source llvm/lib/SYCLLowerIR/CMakeLists.txt
+set(VCINTRINSICS_VERSION 984bb27baacce6ee5c716c2e64845f2a1928025b)
+set(VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/${VCINTRINSICS_VERSION}.tar.gz)
+set(VCINTRINSICS_HASH abea415a15a0dd11fdc94dee8fb462910f2548311b787e02f42509789e1b0d7b)
+set(VCINTRINSICS_HASH_TYPE SHA256)
+set(VCINTRINSICS_FILE vc-intrinsics-${VCINTRINSICS_VERSION}.tar.gz)
+
+# Source opencl/CMakeLists.txt
+set(OPENCLHEADERS_VERSION dcd5bede6859d26833cd85f0d6bbcee7382dc9b3)
+set(OPENCLHEADERS_URI https://github.com/KhronosGroup/OpenCL-Headers/archive/${OPENCLHEADERS_VERSION}.tar.gz)
+set(OPENCLHEADERS_HASH ca8090359654e94f2c41e946b7e9d826253d795ae809ce7c83a7d3c859624693)
+set(OPENCLHEADERS_HASH_TYPE SHA256)
+set(OPENCLHEADERS_FILE opencl_headers-${OPENCLHEADERS_VERSION}.tar.gz)
+
+# Source opencl/CMakeLists.txt
+set(ICDLOADER_VERSION aec3952654832211636fc4af613710f80e203b0a)
+set(ICDLOADER_URI https://github.com/KhronosGroup/OpenCL-ICD-Loader/archive/${ICDLOADER_VERSION}.tar.gz)
+set(ICDLOADER_HASH e1880551d67bd8dc31d13de63b94bbfd6b1f315b6145dad1ffcd159b89bda93c)
+set(ICDLOADER_HASH_TYPE SHA256)
+set(ICDLOADER_FILE icdloader-${ICDLOADER_VERSION}.tar.gz)
+
+# Source sycl/cmake/modules/AddBoostMp11Headers.cmake
+# Using external MP11 here, getting AddBoostMp11Headers.cmake to recognize
+# our copy in boost directly was more trouble than it was worth.
+set(MP11_VERSION 7bc4e1ae9b36ec8ee635c3629b59ec525bbe82b9)
+set(MP11_URI https://github.com/boostorg/mp11/archive/${MP11_VERSION}.tar.gz)
+set(MP11_HASH 071ee2bd3952ec89882edb3af25dd1816f6b61723f66e42eea32f4d02ceef426)
+set(MP11_HASH_TYPE SHA256)
+set(MP11_FILE mp11-${MP11_VERSION}.tar.gz)
+
+# Source llvm-spirv/CMakeLists.txt (repo)
+# Source llvm-spirv/spirv-headers-tag.conf (hash)
+set(SPIRV_HEADERS_VERSION 36c0c1596225e728bd49abb7ef56a3953e7ed468)
+set(SPIRV_HEADERS_URI https://github.com/KhronosGroup/SPIRV-Headers/archive/${SPIRV_HEADERS_VERSION}.tar.gz)
+set(SPIRV_HEADERS_HASH 7a5c89633f8740456fe8adee052033e134476d267411d1336c0cb1e587a9229a)
+set(SPIRV_HEADERS_HASH_TYPE SHA256)
+set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)
+
+######################
+### DPCPP DEPS END ###
+######################
+
+##########################################
+### Intel Graphics Compiler DEPS BEGIN ###
+##########################################
+# The following deps are build time requirements for the intel graphics
+# compiler, the versions used are taken from the following location
+# https://github.com/intel/intel-graphics-compiler/releases
+
+set(IGC_VERSION 1.0.11222)
+set(IGC_URI https://github.com/intel/intel-graphics-compiler/archive/refs/tags/igc-${IGC_VERSION}.tar.gz)
+set(IGC_HASH d92f0608dcbb52690855685f9447282e5c09c0ba98ae35fabf114fcf8b1e9fcf)
+set(IGC_HASH_TYPE SHA256)
+set(IGC_FILE igc-${IGC_VERSION}.tar.gz)
+
+set(IGC_LLVM_VERSION llvmorg-11.1.0)
+set(IGC_LLVM_URI https://github.com/llvm/llvm-project/archive/refs/tags/${IGC_LLVM_VERSION}.tar.gz)
+set(IGC_LLVM_HASH 53a0719f3f4b0388013cfffd7b10c7d5682eece1929a9553c722348d1f866e79)
+set(IGC_LLVM_HASH_TYPE SHA256)
+set(IGC_LLVM_FILE ${IGC_LLVM_VERSION}.tar.gz)
+
+# WARNING WARNING WARNING
+#
+# IGC_OPENCL_CLANG contains patches for some of its dependencies.
+#
+# Whenever IGC_OPENCL_CLANG_VERSION changes, one *MUST* inspect
+# IGC_OPENCL_CLANG's patches folder and update igc.cmake to account for
+# any added or removed patches.
+#
+# WARNING WARNING WARNING
+
+set(IGC_OPENCL_CLANG_VERSION bbdd1587f577397a105c900be114b56755d1f7dc)
+set(IGC_OPENCL_CLANG_URI https://github.com/intel/opencl-clang/archive/${IGC_OPENCL_CLANG_VERSION}.tar.gz)
+set(IGC_OPENCL_CLANG_HASH d08315f1b0d8a6fef33de2b3e6aa7356534c324910634962c72523d970773efc)
+set(IGC_OPENCL_CLANG_HASH_TYPE SHA256)
+set(IGC_OPENCL_CLANG_FILE opencl-clang-${IGC_OPENCL_CLANG_VERSION}.tar.gz)
+
+set(IGC_VCINTRINSICS_VERSION v0.4.0)
+set(IGC_VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/refs/tags/${IGC_VCINTRINSICS_VERSION}.tar.gz)
+set(IGC_VCINTRINSICS_HASH c8b92682ad5031cf9d5b82a40e7d5c0e763cd9278660adbcaa69aab988e4b589)
+set(IGC_VCINTRINSICS_HASH_TYPE SHA256)
+set(IGC_VCINTRINSICS_FILE vc-intrinsics-${IGC_VCINTRINSICS_VERSION}.tar.gz)
+
+set(IGC_SPIRV_HEADERS_VERSION sdk-1.3.204.1)
+set(IGC_SPIRV_HEADERS_URI https://github.com/KhronosGroup/SPIRV-Headers/archive/refs/tags/${IGC_SPIRV_HEADERS_VERSION}.tar.gz)
+set(IGC_SPIRV_HEADERS_HASH 262864053968c217d45b24b89044a7736a32361894743dd6cfe788df258c746c)
+set(IGC_SPIRV_HEADERS_HASH_TYPE SHA256)
+set(IGC_SPIRV_HEADERS_FILE SPIR-V-Headers-${IGC_SPIRV_HEADERS_VERSION}.tar.gz)
+
+set(IGC_SPIRV_TOOLS_VERSION sdk-1.3.204.1)
+set(IGC_SPIRV_TOOLS_URI https://github.com/KhronosGroup/SPIRV-Tools/archive/refs/tags/${IGC_SPIRV_TOOLS_VERSION}.tar.gz)
+set(IGC_SPIRV_TOOLS_HASH 6e19900e948944243024aedd0a201baf3854b377b9cc7a386553bc103b087335)
+set(IGC_SPIRV_TOOLS_HASH_TYPE SHA256)
+set(IGC_SPIRV_TOOLS_FILE SPIR-V-Tools-${IGC_SPIRV_TOOLS_VERSION}.tar.gz)
+
+set(IGC_SPIRV_TRANSLATOR_VERSION 99420daab98998a7e36858befac9c5ed109d4920)
+set(IGC_SPIRV_TRANSLATOR_URI https://github.com/KhronosGroup/SPIRV-LLVM-Translator/archive/${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
+set(IGC_SPIRV_TRANSLATOR_HASH 77dfb4ddb6bfb993535562c02ddea23f0a0d1c5a0258c1afe7e27c894ff783a8)
+set(IGC_SPIRV_TRANSLATOR_HASH_TYPE SHA256)
+set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
+
+########################################
+### Intel Graphics Compiler DEPS END ###
+########################################
+
+set(GMMLIB_VERSION intel-gmmlib-22.1.2)
+set(GMMLIB_URI https://github.com/intel/gmmlib/archive/refs/tags/${GMMLIB_VERSION}.tar.gz)
+set(GMMLIB_HASH 3b9a6d5e7e3f5748b3d0a2fb0e980ae943907fece0980bd9c0508e71c838e334)
+set(GMMLIB_HASH_TYPE SHA256)
+set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)
+
+set(OCLOC_VERSION 22.20.23198)
+set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
+set(OCLOC_HASH ab22b8bf2560a57fdd3def0e35a62ca75991406f959c0263abb00cd6cd9ae998)
+set(OCLOC_HASH_TYPE SHA256)
+set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)
+
+set(AOM_VERSION 3.4.0)
+set(AOM_URI https://storage.googleapis.com/aom-releases/libaom-${AOM_VERSION}.tar.gz)
+set(AOM_HASH bd754b58c3fa69f3ffd29da77de591bd9c26970e3b18537951336d6c0252e354)
+set(AOM_HASH_TYPE SHA256)
+set(AOM_FILE libaom-${AOM_VERSION}.tar.gz)
--- a/build_files/build_environment/cmake/vpx.cmake
+++ b/build_files/build_environment/cmake/vpx.cmake
@@ -1,11 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0-or-later

 if(WIN32)
-  if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
-    set(VPX_EXTRA_FLAGS --target=x86_64-win64-gcc --disable-multithread)
-  else()
-    set(VPX_EXTRA_FLAGS --target=x86-win32-gcc --disable-multithread)
-  endif()
+  # VPX is determined to use pthreads which it will tell ffmpeg to dynamically
+  # link, which is not something we're super into distribution wise. However
+  # if it cannot find pthread.h it'll happily provide a pthread emulation
+  # layer using win32 threads. So all this patch does is make it not find
+  # pthead.h
+  set(VPX_PATCH ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/vpx/src/external_vpx < ${PATCH_DIR}/vpx_windows.diff)
+  set(VPX_EXTRA_FLAGS --target=x86_64-win64-gcc )
 else()
  if(APPLE)
    if("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
@@ -18,6 +20,16 @@ else()
  endif()
 endif()

+if(NOT BLENDER_PLATFORM_ARM)
+  list(APPEND VPX_EXTRA_FLAGS
+    --enable-sse4_1
+    --enable-sse3
+    --enable-ssse3
+    --enable-avx
+    --enable-avx2
+  )
+endif()
+
 ExternalProject_Add(external_vpx
  URL file://${PACKAGE_DIR}/${VPX_FILE}
  DOWNLOAD_DIR ${DOWNLOAD_DIR}
@@ -30,11 +42,6 @@ ExternalProject_Add(external_vpx
      --enable-static
      --disable-install-bins
      --disable-install-srcs
-      --disable-sse4_1
-      --disable-sse3
-      --disable-ssse3
-      --disable-avx
-      --disable-avx2
      --disable-unit-tests
      --disable-examples
      --enable-vp8
@@ -42,6 +49,7 @@ ExternalProject_Add(external_vpx
      ${VPX_EXTRA_FLAGS}
  BUILD_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/vpx/src/external_vpx/ && make -j${MAKE_THREADS}
  INSTALL_COMMAND ${CONFIGURE_ENV} && cd ${BUILD_DIR}/vpx/src/external_vpx/ && make install
+  PATCH_COMMAND ${VPX_PATCH}
  INSTALL_DIR ${LIBDIR}/vpx
 )

--- a/build_files/build_environment/install_deps.sh
+++ b/build_files/build_environment/install_deps.sh
@@ -465,7 +465,7 @@ TBB_VERSION="2020"
 TBB_VERSION_SHORT="2020"
 TBB_VERSION_UPDATE="_U3"  # Used for source packages...
 TBB_VERSION_MIN="2018"
-TBB_VERSION_MEX="2022"
+TBB_VERSION_MEX="2021"  # 2021 introduces 'oneTBB', which has lots of compatibility breakage with previous versions
 TBB_FORCE_BUILD=false
 TBB_FORCE_REBUILD=false
 TBB_SKIP=false
@@ -478,7 +478,7 @@ OCIO_FORCE_BUILD=false
 OCIO_FORCE_REBUILD=false
 OCIO_SKIP=false

-IMATH_VERSION="3.1.4"
+IMATH_VERSION="3.1.5"
 IMATH_VERSION_SHORT="3.1"
 IMATH_VERSION_MIN="3.0"
 IMATH_VERSION_MEX="4.0"
@@ -487,7 +487,7 @@ IMATH_FORCE_REBUILD=false
 IMATH_SKIP=false
 _with_built_imath=false

-OPENEXR_VERSION="3.1.4"
+OPENEXR_VERSION="3.1.5"
 OPENEXR_VERSION_SHORT="3.1"
 OPENEXR_VERSION_MIN="3.0"
 OPENEXR_VERSION_MEX="4.0"
@@ -567,7 +567,7 @@ OPENCOLLADA_FORCE_BUILD=false
 OPENCOLLADA_FORCE_REBUILD=false
 OPENCOLLADA_SKIP=false

-EMBREE_VERSION="3.13.3"
+EMBREE_VERSION="3.13.4"
 EMBREE_VERSION_SHORT="3.13"
 EMBREE_VERSION_MIN="3.13"
 EMBREE_VERSION_MEX="4.0"
@@ -627,6 +627,9 @@ WEBP_DEV=""
 VPX_USE=false
 VPX_VERSION_MIN=0.9.7
 VPX_DEV=""
+AOM_USE=false
+AOM_VERSION_MIN=3.3.0
+AOM_DEV=""
 OPUS_USE=false
 OPUS_VERSION_MIN=1.1.1
 OPUS_DEV=""
@@ -635,9 +638,6 @@ MP3LAME_DEV=""
 OPENJPEG_USE=false
 OPENJPEG_DEV=""

-# Whether to use system GLEW or not (OpenSubDiv needs recent glew to work).
-NO_SYSTEM_GLEW=false
-
 # Switch to english language, else some things (like check_package_DEB()) won't work!
 LANG_BACK=$LANG
 LANG=""
@@ -1193,7 +1193,7 @@ Those libraries should be available as packages in all recent distributions (opt
    * libx11, libxcursor, libxi, libxrandr, libxinerama (and other libx... as needed).
    * libwayland-client0, libwayland-cursor0, libwayland-egl1, libxkbcommon0, libdbus-1-3, libegl1 (Wayland)
    * libsqlite3, libzstd, libbz2, libssl, libfftw3, libxml2, libtinyxml, yasm, libyaml-cpp, flex.
-    * libsdl2, libglew, libpugixml, libpotrace, [libgmp], [libglewmx], fontconfig, [libharu/libhpdf].\""
+    * libsdl2, libglew, libpugixml, libpotrace, [libgmp], fontconfig, [libharu/libhpdf].\""

 DEPS_SPECIFIC_INFO="\"BUILDABLE DEPENDENCIES:

@@ -1212,7 +1212,7 @@ You may also want to build them yourself (optional ones are [between brackets]):
    ** [NumPy $PYTHON_NUMPY_VERSION] (use pip).
    * Boost $BOOST_VERSION (from $BOOST_SOURCE, modules: $BOOST_BUILD_MODULES).
    * TBB $TBB_VERSION (from $TBB_SOURCE).
-    * [FFMpeg $FFMPEG_VERSION (needs libvorbis, libogg, libtheora, libx264, libmp3lame, libxvidcore, libvpx, libwebp, ...)] (from $FFMPEG_SOURCE).
+    * [FFMpeg $FFMPEG_VERSION (needs libvorbis, libogg, libtheora, libx264, libmp3lame, libxvidcore, libvpx, libaom, libwebp, ...)] (from $FFMPEG_SOURCE).
    * [OpenColorIO $OCIO_VERSION] (from $OCIO_SOURCE).
    * Imath $IMATH_VERSION (from $IMATH_SOURCE).
    * OpenEXR $OPENEXR_VERSION (from $OPENEXR_SOURCE).
@@ -1687,7 +1687,7 @@ compile_TBB() {
  fi

  # To be changed each time we make edits that would modify the compiled result!
-  tbb_magic=0
+  tbb_magic=1
  _init_tbb

  # Force having own builds for the dependencies.
@@ -2696,14 +2696,13 @@ compile_OSD() {
    mkdir build
    cd build

-    if [ -d $INST/tbb ]; then
-      cmake_d="$cmake_d $cmake_d -D TBB_LOCATION=$INST/tbb"
-    fi
    cmake_d="-D CMAKE_BUILD_TYPE=Release"
+    if [ -d $INST/tbb ]; then
+      cmake_d="$cmake_d -D TBB_LOCATION=$INST/tbb"
+    fi
    cmake_d="$cmake_d -D CMAKE_INSTALL_PREFIX=$_inst"
-    # ptex is only needed when nicholas bishop is ready
    cmake_d="$cmake_d -D NO_PTEX=1"
-    cmake_d="$cmake_d -D NO_CLEW=1 -D NO_CUDA=1 -D NO_OPENCL=1"
+    cmake_d="$cmake_d -D NO_CLEW=1 -D NO_CUDA=1 -D NO_OPENCL=1 -D NO_GLEW=1"
    # maya plugin, docs, tutorials, regression tests and examples are not needed
    cmake_d="$cmake_d -D NO_MAYA=1 -D NO_DOC=1 -D NO_TUTORIALS=1 -D NO_REGRESSION=1 -DNO_EXAMPLES=1"

@@ -3004,7 +3003,7 @@ compile_ALEMBIC() {
  fi

  # To be changed each time we make edits that would modify the compiled result!
-  alembic_magic=2
+  alembic_magic=3
  _init_alembic

  # Force having own builds for the dependencies.
@@ -3052,7 +3051,7 @@ compile_ALEMBIC() {
    fi
    if [ "$_with_built_openexr" = true ]; then
      cmake_d="$cmake_d -D USE_ARNOLD=OFF"
-      cmake_d="$cmake_d -D USE_BINARIES=OFF"
+      cmake_d="$cmake_d -D USE_BINARIES=ON"  # Tests use some Alembic binaries...
      cmake_d="$cmake_d -D USE_EXAMPLES=OFF"
      cmake_d="$cmake_d -D USE_HDF5=OFF"
      cmake_d="$cmake_d -D USE_MAYA=OFF"
@@ -3326,7 +3325,7 @@ compile_Embree() {
  fi

  # To be changed each time we make edits that would modify the compiled results!
-  embree_magic=10
+  embree_magic=11
  _init_embree

  # Force having own builds for the dependencies.
@@ -3386,7 +3385,7 @@ compile_Embree() {

    cmake_d="$cmake_d -D EMBREE_TASKING_SYSTEM=TBB"
    if [ -d $INST/tbb ]; then
-      make_d="$make_d EMBREE_TBB_ROOT=$INST/tbb"
+      cmake_d="$cmake_d -D EMBREE_TBB_ROOT=$INST/tbb"
    fi

    cmake $cmake_d ../
@@ -3525,7 +3524,7 @@ compile_OIDN() {
  install_ISPC

  # To be changed each time we make edits that would modify the compiled results!
-  oidn_magic=9
+  oidn_magic=10
  _init_oidn

  # Force having own builds for the dependencies.
@@ -3581,7 +3580,7 @@ compile_OIDN() {
    cmake_d="$cmake_d -D ISPC_DIR_HINT=$_ispc_path_bin"

    if [ -d $INST/tbb ]; then
-      make_d="$make_d TBB_ROOT=$INST/tbb"
+      cmake_d="$cmake_d -D TBB_ROOT=$INST/tbb"
    fi

    cmake $cmake_d ../
@@ -3638,7 +3637,7 @@ compile_FFmpeg() {
  fi

  # To be changed each time we make edits that would modify the compiled result!
-  ffmpeg_magic=5
+  ffmpeg_magic=6
  _init_ffmpeg

  # Force having own builds for the dependencies.
@@ -3691,6 +3690,10 @@ compile_FFmpeg() {
      extra="$extra --enable-libvpx"
    fi

+    if [ "$AOM_USE" = true ]; then
+      extra="$extra --enable-libaom"
+    fi
+
    if [ "$WEBP_USE" = true ]; then
      extra="$extra --enable-libwebp"
    fi
@@ -4062,7 +4065,6 @@ install_DEB() {
             libopenal-dev libglew-dev yasm \
             libsdl2-dev libfftw3-dev patch bzip2 libxml2-dev libtinyxml-dev libjemalloc-dev \
             libgmp-dev libpugixml-dev libpotrace-dev libhpdf-dev libzstd-dev libpystring-dev"
-             # libglewmx-dev  (broken in deb testing currently...)

  VORBIS_USE=true
  OGG_USE=true
@@ -4145,33 +4147,37 @@ install_DEB() {
    WEBP_USE=true
  fi

-  if [ "$WITH_ALL" = true ]; then
-    XVID_DEV="libxvidcore-dev"
-    check_package_DEB $XVID_DEV
-    if [ $? -eq 0 ]; then
-      XVID_USE=true
-    fi
-
-    MP3LAME_DEV="libmp3lame-dev"
-    check_package_DEB $MP3LAME_DEV
-    if [ $? -eq 0 ]; then
-      MP3LAME_USE=true
-    fi
-
-    VPX_DEV="libvpx-dev"
-    check_package_version_ge_DEB $VPX_DEV $VPX_VERSION_MIN
-    if [ $? -eq 0 ]; then
-      VPX_USE=true
-    fi
-
-    OPUS_DEV="libopus-dev"
-    check_package_version_ge_DEB $OPUS_DEV $OPUS_VERSION_MIN
-    if [ $? -eq 0 ]; then
-      OPUS_USE=true
-    fi
+  XVID_DEV="libxvidcore-dev"
+  check_package_DEB $XVID_DEV
+  if [ $? -eq 0 ]; then
+    XVID_USE=true
  fi

-  # Check cmake/glew versions and disable features for older distros.
+  MP3LAME_DEV="libmp3lame-dev"
+  check_package_DEB $MP3LAME_DEV
+  if [ $? -eq 0 ]; then
+    MP3LAME_USE=true
+  fi
+
+  VPX_DEV="libvpx-dev"
+  check_package_version_ge_DEB $VPX_DEV $VPX_VERSION_MIN
+  if [ $? -eq 0 ]; then
+    VPX_USE=true
+  fi
+
+  AOM_DEV="libaom-dev"
+  check_package_version_ge_DEB $AOM_DEV $AOM_VERSION_MIN
+  if [ $? -eq 0 ]; then
+    AOM_USE=true
+  fi
+
+  OPUS_DEV="libopus-dev"
+  check_package_version_ge_DEB $OPUS_DEV $OPUS_VERSION_MIN
+  if [ $? -eq 0 ]; then
+    OPUS_USE=true
+  fi
+
+  # Check cmake version and disable features for older distros.
  # This is so Blender can at least compile.
  PRINT ""
  _cmake=`get_package_version_DEB cmake`
@@ -4188,28 +4194,6 @@ install_DEB() {
    fi
  fi

-  PRINT ""
-  _glew=`get_package_version_DEB libglew-dev`
-  if [ -z $_glew ]; then
-    # Stupid virtual package in Ubuntu 12.04 doesn't show version number...
-    _glew=`apt-cache showpkg libglew-dev|tail -n1|awk '{print $2}'|sed 's/-.*//'`
-  fi
-  version_ge $_glew "1.9.0"
-  if [ $? -eq 1 ]; then
-    version_ge $_glew "1.7.0"
-    if [ $? -eq 1 ]; then
-      WARNING "OpenSubdiv disabled because GLEW-$_glew is not enough"
-      WARNING "Blender will not use system GLEW library"
-      OSD_SKIP=true
-      NO_SYSTEM_GLEW=true
-    else
-      WARNING "OpenSubdiv will compile with GLEW-$_glew but with limited capability"
-      WARNING "Blender will not use system GLEW library"
-      NO_SYSTEM_GLEW=true
-    fi
-  fi
-
-
  PRINT ""
  _do_compile_python=false
  if [ "$PYTHON_SKIP" = true ]; then
@@ -4573,6 +4557,9 @@ install_DEB() {
    if [ "$VPX_USE" = true ]; then
      _packages="$_packages $VPX_DEV"
    fi
+    if [ "$AOM_USE" = true ]; then
+      _packages="$_packages $AOM_DEV"
+    fi
    if [ "$OPUS_USE" = true ]; then
      _packages="$_packages $OPUS_DEV"
    fi
@@ -4873,21 +4860,27 @@ install_RPM() {
    WEBP_USE=true
  fi

-  if [ "$WITH_ALL" = true ]; then
-    VPX_DEV="libvpx-devel"
-    check_package_version_ge_RPM $VPX_DEV $VPX_VERSION_MIN
-    if [ $? -eq 0 ]; then
-      VPX_USE=true
-    fi
+  VPX_DEV="libvpx-devel"
+  check_package_version_ge_RPM $VPX_DEV $VPX_VERSION_MIN
+  if [ $? -eq 0 ]; then
+    VPX_USE=true
+  fi

+  AOM_DEV="libaom-devel"
+  check_package_version_ge_RPM $AOM_DEV $AOM_VERSION_MIN
+  if [ $? -eq 0 ]; then
+    AOM_USE=true
+  fi
+
+  OPUS_DEV="libopus-devel"
+  check_package_version_ge_RPM $OPUS_DEV $OPUS_VERSION_MIN
+  if [ $? -eq 0 ]; then
+    OPUS_USE=true
+  fi
+
+  if [ "$WITH_ALL" = true ]; then
    PRINT ""
    install_packages_RPM libspnav-devel
-
-    OPUS_DEV="libopus-devel"
-    check_package_version_ge_RPM $OPUS_DEV $OPUS_VERSION_MIN
-    if [ $? -eq 0 ]; then
-      OPUS_USE=true
-    fi
  fi

  PRINT ""
@@ -5272,6 +5265,9 @@ install_RPM() {
    if [ "$VPX_USE" = true ]; then
      _packages="$_packages $VPX_DEV"
    fi
+    if [ "$AOM_USE" = true ]; then
+      _packages="$_packages $AOM_DEV"
+    fi
    if [ "$OPUS_USE" = true ]; then
      _packages="$_packages $OPUS_DEV"
    fi
@@ -5461,30 +5457,34 @@ install_ARCH() {
    WEBP_USE=true
  fi

-  if [ "$WITH_ALL" = true ]; then
-    XVID_DEV="xvidcore"
-    check_package_ARCH $XVID_DEV
-    if [ $? -eq 0 ]; then
-      XVID_USE=true
-    fi
+  XVID_DEV="xvidcore"
+  check_package_ARCH $XVID_DEV
+  if [ $? -eq 0 ]; then
+    XVID_USE=true
+  fi

-    MP3LAME_DEV="lame"
-    check_package_ARCH $MP3LAME_DEV
-    if [ $? -eq 0 ]; then
-      MP3LAME_USE=true
-    fi
+  MP3LAME_DEV="lame"
+  check_package_ARCH $MP3LAME_DEV
+  if [ $? -eq 0 ]; then
+    MP3LAME_USE=true
+  fi

-    VPX_DEV="libvpx"
-    check_package_version_ge_ARCH $VPX_DEV $VPX_VERSION_MIN
-    if [ $? -eq 0 ]; then
-      VPX_USE=true
-    fi
+  VPX_DEV="libvpx"
+  check_package_version_ge_ARCH $VPX_DEV $VPX_VERSION_MIN
+  if [ $? -eq 0 ]; then
+    VPX_USE=true
+  fi

-    OPUS_DEV="opus"
-    check_package_version_ge_ARCH $OPUS_DEV $OPUS_VERSION_MIN
-    if [ $? -eq 0 ]; then
-      OPUS_USE=true
-    fi
+  AOM_DEV="libaom"
+  check_package_version_ge_ARCH $AOM_DEV $AOM_VERSION_MIN
+  if [ $? -eq 0 ]; then
+    AOM_USE=true
+  fi
+
+  OPUS_DEV="opus"
+  check_package_version_ge_ARCH $OPUS_DEV $OPUS_VERSION_MIN
+  if [ $? -eq 0 ]; then
+    OPUS_USE=true
  fi


@@ -5862,6 +5862,9 @@ install_ARCH() {
    if [ "$VPX_USE" = true ]; then
      _packages="$_packages $VPX_DEV"
    fi
+    if [ "$AOM_USE" = true ]; then
+      _packages="$_packages $AOM_DEV"
+    fi
    if [ "$OPUS_USE" = true ]; then
      _packages="$_packages $OPUS_DEV"
    fi
@@ -6290,12 +6293,6 @@ print_info() {
    fi
  fi

-  if [ "$NO_SYSTEM_GLEW" = true ]; then
-    _1="-D WITH_SYSTEM_GLEW=OFF"
-    PRINT "  $_1"
-    _buildargs="$_buildargs $_1"
-  fi
-
  if [ "$FFMPEG_SKIP" = false ]; then
    _1="-D WITH_CODEC_FFMPEG=ON"
    PRINT "  $_1"
--- a/build_files/build_environment/patches/dpcpp.diff
+++ b/build_files/build_environment/patches/dpcpp.diff
@@ -0,0 +1,54 @@
+diff -Naur external_dpcpp.orig/sycl/source/CMakeLists.txt external_dpcpp/sycl/source/CMakeLists.txt
+--- external_dpcpp.orig/sycl/source/CMakeLists.txt      2022-05-20 04:19:45.067771362 +0000
+++ external_dpcpp/sycl/source/CMakeLists.txt   2022-05-20 04:21:49.708025048 +0000
+@@ -66,10 +66,10 @@
+     target_compile_options(${LIB_OBJ_NAME} PUBLIC
+                            -fvisibility=hidden -fvisibility-inlines-hidden)
+     set(linker_script "${CMAKE_CURRENT_SOURCE_DIR}/ld-version-script.txt")
+-    set(abi_linker_script "${CMAKE_CURRENT_SOURCE_DIR}/abi_replacements_linux.txt")
+-    target_link_libraries(
+-      ${LIB_NAME} PRIVATE "-Wl,${abi_linker_script}")
+-    set_target_properties(${LIB_NAME} PROPERTIES LINK_DEPENDS ${abi_linker_script})
+#    set(abi_linker_script "${CMAKE_CURRENT_SOURCE_DIR}/abi_replacements_linux.txt")
+#    target_link_libraries(
+#      ${LIB_NAME} PRIVATE "-Wl,${abi_linker_script}")
+#    set_target_properties(${LIB_NAME} PROPERTIES LINK_DEPENDS ${abi_linker_script})
+     target_link_libraries(
+         ${LIB_NAME} PRIVATE "-Wl,--version-script=${linker_script}")
+     set_target_properties(${LIB_NAME} PROPERTIES LINK_DEPENDS ${linker_script})
+diff -Naur llvm-sycl-nightly-20220501.orig\opencl/CMakeLists.txt llvm-sycl-nightly-20220501\opencl/CMakeLists.txt
+--- llvm-sycl-nightly-20220501.orig/opencl/CMakeLists.txt       2022-04-29 13:47:11 -0600
+++ llvm-sycl-nightly-20220501/opencl/CMakeLists.txt    2022-05-21 15:25:06 -0600
+@@ -11,6 +11,11 @@
+   )
+ endif()
+
+# Blender code below is determined to use FetchContent_Declare
+# temporarily allow it (but feed it our downloaded tarball
+# in the OpenCL_HEADERS variable
+set(FETCHCONTENT_FULLY_DISCONNECTED OFF)
+
+ # Repo URLs
+
+ set(OCL_HEADERS_REPO
+@@ -77,5 +82,6 @@
+
+ FetchContent_MakeAvailable(ocl-icd)
+ add_library(OpenCL-ICD ALIAS OpenCL)
+set(FETCHCONTENT_FULLY_DISCONNECTED ON)
+
+ add_subdirectory(opencl-aot)
+diff -Naur llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice.cmake llvm-sycl-nightly-20220208/libdevice/cmake/modules/SYCLLibdevice.cmake
+--- llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice.cmake	2022-02-08 09:17:24 -0700
+++ llvm-sycl-nightly-20220208/libdevice/cmake/modules/SYCLLibdevice.cmake	2022-05-24 11:35:51 -0600
+@@ -36,7 +36,9 @@
+ add_custom_target(libsycldevice-obj)
+ add_custom_target(libsycldevice-spv)
+ 
+-add_custom_target(libsycldevice DEPENDS
+# Blender: add ALL here otherwise this target will not build
+# and cause an error due to missing files during the install phase.
+add_custom_target(libsycldevice ALL DEPENDS
+   libsycldevice-obj
+   libsycldevice-spv)
+ 
--- a/build_files/build_environment/patches/embree.diff
+++ b/build_files/build_environment/patches/embree.diff
@@ -1,30 +1,37 @@
-diff -Naur orig/common/sys/platform.h external_embree/common/sys/platform.h
--- orig/common/sys/platform.h	2020-05-13 23:08:53 -0600
-+++ external_embree/common/sys/platform.h	2020-06-13 17:40:26 -0600
-@@ -84,8 +84,8 @@
- ////////////////////////////////////////////////////////////////////////////////
+diff -Naur org/kernels/rtcore_config.h.in embree-3.13.4/kernels/rtcore_config.h.in
+--- org/kernels/rtcore_config.h.in      2022-06-14 22:13:52 -0600
+++ embree-3.13.4/kernels/rtcore_config.h.in    2022-06-24 15:20:12 -0600
+@@ -14,6 +14,7 @@
+ #cmakedefine01 EMBREE_MIN_WIDTH
+ #define RTC_MIN_WIDTH EMBREE_MIN_WIDTH
+
+#cmakedefine EMBREE_STATIC_LIB
+ #cmakedefine EMBREE_API_NAMESPACE
+
+ #if defined(EMBREE_API_NAMESPACE)
+diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt
+index 7c2f43d..106b1d5 100644
+--- a/kernels/CMakeLists.txt
+++ b/kernels/CMakeLists.txt
+@@ -201,6 +201,12 @@ embree_files(EMBREE_LIBRARY_FILES_AVX512 ${AVX512})
+ #message("AVX2: ${EMBREE_LIBRARY_FILES_AVX2}")
+ #message("AVX512: ${EMBREE_LIBRARY_FILES_AVX512}")
 
- #ifdef __WIN32__
-#define dll_export __declspec(dllexport)
-#define dll_import __declspec(dllimport)
-+#define dll_export 
-+#define dll_import 
- #else
- #define dll_export __attribute__ ((visibility ("default")))
- #define dll_import 
-diff --git orig/common/tasking/CMakeLists.txt external_embree/common/tasking/CMakeLists.txt
--- orig/common/tasking/CMakeLists.txt
-+++ external_embree/common/tasking/CMakeLists.txt
-@@ -27,7 +27,11 @@
-     else()
-       # If not found try getting older TBB via module (FindTBB.cmake)
-       unset(TBB_DIR CACHE)
-      find_package(TBB 4.1 REQUIRED tbb)
-+      if (TBB_STATIC_LIB)
-+        find_package(TBB 4.1 REQUIRED tbb_static)
-+      else()
-+        find_package(TBB 4.1 REQUIRED tbb)
-+      endif()
-       if (TBB_FOUND)
-         TARGET_LINK_LIBRARIES(tasking PUBLIC TBB)
-         TARGET_INCLUDE_DIRECTORIES(tasking PUBLIC "${TBB_INCLUDE_DIRS}")
+# Bundle Neon2x into the main static library.
+IF(EMBREE_ISA_NEON2X AND EMBREE_STATIC_LIB)
+  LIST(APPEND EMBREE_LIBRARY_FILES ${EMBREE_LIBRARY_FILES_AVX2})
+  LIST(REMOVE_DUPLICATES EMBREE_LIBRARY_FILES)
+ENDIF()
+
+ # replaces all .cpp files with a dummy file that includes that .cpp file
+ # this is to work around an ICC name mangling issue related to lambda functions under windows
+ MACRO (CreateISADummyFiles list isa)
+@@ -277,7 +283,7 @@ IF (EMBREE_ISA_AVX  AND EMBREE_LIBRARY_FILES_AVX)
+   ENDIF()
+ ENDIF()
+ 
+-IF (EMBREE_ISA_AVX2 AND EMBREE_LIBRARY_FILES_AVX2)
+IF (EMBREE_ISA_AVX2 AND EMBREE_LIBRARY_FILES_AVX2 AND NOT (EMBREE_ISA_NEON2X AND EMBREE_STATIC_LIB))
+   DISABLE_STACK_PROTECTOR_FOR_INTERSECTORS(${EMBREE_LIBRARY_FILES_AVX2})
+   ADD_LIBRARY(embree_avx2 STATIC ${EMBREE_LIBRARY_FILES_AVX2})
+   TARGET_LINK_LIBRARIES(embree_avx2 PRIVATE tasking)
--- a/build_files/build_environment/patches/igc_opencl_clang.diff
+++ b/build_files/build_environment/patches/igc_opencl_clang.diff
@@ -0,0 +1,44 @@
+diff -Naur external_igc_opencl_clang.orig/CMakeLists.txt external_igc_opencl_clang/CMakeLists.txt
+--- external_igc_opencl_clang.orig/CMakeLists.txt	2022-03-16 05:51:10 -0600
+++ external_igc_opencl_clang/CMakeLists.txt	2022-05-23 10:40:09 -0600
+@@ -126,22 +126,24 @@
+         )
+     endif()
+ 
+-
+-    set(SPIRV_BASE_REVISION llvm_release_110)
+-    set(TARGET_BRANCH "ocl-open-110")
+-    get_filename_component(LLVM_MONOREPO_DIR ${LLVM_SOURCE_DIR} DIRECTORY)
+-    set(LLVM_PATCHES_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/patches/llvm
+-                          ${CMAKE_CURRENT_SOURCE_DIR}/patches/clang)
+-    apply_patches(${LLVM_MONOREPO_DIR}
+-                  "${LLVM_PATCHES_DIRS}"
+-                  ${LLVM_BASE_REVISION}
+-                  ${TARGET_BRANCH}
+-                  ret)
+-    apply_patches(${SPIRV_SOURCE_DIR}
+-                  ${CMAKE_CURRENT_SOURCE_DIR}/patches/spirv
+-                  ${SPIRV_BASE_REVISION}
+-                  ${TARGET_BRANCH}
+-                  ret)
+    #
+    # Blender: Why apply these manually in igc.cmake
+    #
+    #set(SPIRV_BASE_REVISION llvm_release_110)
+    #set(TARGET_BRANCH "ocl-open-110")
+    #get_filename_component(LLVM_MONOREPO_DIR ${LLVM_SOURCE_DIR} DIRECTORY)
+    #set(LLVM_PATCHES_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/patches/llvm
+    #                      ${CMAKE_CURRENT_SOURCE_DIR}/patches/clang)
+    #apply_patches(${LLVM_MONOREPO_DIR}
+    #              "${LLVM_PATCHES_DIRS}"
+    #              ${LLVM_BASE_REVISION}
+    #              ${TARGET_BRANCH}
+    #              ret)
+    #apply_patches(${SPIRV_SOURCE_DIR}
+    #              ${CMAKE_CURRENT_SOURCE_DIR}/patches/spirv
+    #              ${SPIRV_BASE_REVISION}
+    #              ${TARGET_BRANCH}
+    #              ret)
+ endif(NOT USE_PREBUILT_LLVM)
+ 
+ #
--- a/build_files/build_environment/patches/vpx_windows.diff
+++ b/build_files/build_environment/patches/vpx_windows.diff
@@ -0,0 +1,11 @@
+diff -Naur orig/configure external_vpx/configure
+--- orig/configure	2022-07-06 09:22:04 -0600
+++ external_vpx/configure	2022-07-06 09:24:12 -0600
+@@ -270,7 +270,6 @@
+ HAVE_LIST="
+     ${ARCH_EXT_LIST}
+     vpx_ports
+-    pthread_h
+     unistd_h
+ "
+ EXPERIMENT_LIST="
--- a/build_files/cmake/Modules/FindLevelZero.cmake
+++ b/build_files/cmake/Modules/FindLevelZero.cmake
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2021-2022 Intel Corporation
+
+# - Find Level Zero library
+# Find Level Zero headers and libraries needed by oneAPI implementation
+# This module defines
+#  LEVEL_ZERO_LIBRARY, libraries to link against in order to use L0.
+#  LEVEL_ZERO_INCLUDE_DIR, directories where L0 headers can be found.
+#  LEVEL_ZERO_ROOT_DIR, The base directory to search for L0 files.
+#                 This can also be an environment variable.
+#  LEVEL_ZERO_FOUND, If false, then don't try to use L0.
+
+IF(NOT LEVEL_ZERO_ROOT_DIR AND NOT $ENV{LEVEL_ZERO_ROOT_DIR} STREQUAL "")
+  SET(LEVEL_ZERO_ROOT_DIR $ENV{LEVEL_ZERO_ROOT_DIR})
+ENDIF()
+
+SET(_level_zero_search_dirs
+  ${LEVEL_ZERO_ROOT_DIR}
+  /usr/lib
+  /usr/local/lib
+)
+
+FIND_LIBRARY(_LEVEL_ZERO_LIBRARY
+  NAMES
+    ze_loader
+  HINTS
+    ${_level_zero_search_dirs}
+  PATH_SUFFIXES
+    lib64 lib
+)
+
+FIND_PATH(_LEVEL_ZERO_INCLUDE_DIR
+  NAMES
+    level_zero/ze_api.h
+  HINTS
+    ${_level_zero_search_dirs}
+  PATH_SUFFIXES
+    include
+)
+
+INCLUDE(FindPackageHandleStandardArgs)
+
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(LevelZero DEFAULT_MSG _LEVEL_ZERO_LIBRARY _LEVEL_ZERO_INCLUDE_DIR)
+
+IF(LevelZero_FOUND)
+  SET(LEVEL_ZERO_LIBRARY ${_LEVEL_ZERO_LIBRARY})
+  SET(LEVEL_ZERO_INCLUDE_DIR ${_LEVEL_ZERO_INCLUDE_DIR} ${_LEVEL_ZERO_INCLUDE_PARENT_DIR})
+  SET(LEVEL_ZERO_FOUND TRUE)
+ELSE()
+  SET(LEVEL_ZERO_FOUND FALSE)
+ENDIF()
+
+MARK_AS_ADVANCED(
+  LEVEL_ZERO_LIBRARY
+  LEVEL_ZERO_INCLUDE_DIR
+)
--- a/build_files/cmake/Modules/FindSYCL.cmake
+++ b/build_files/cmake/Modules/FindSYCL.cmake
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2021-2022 Intel Corporation
+
+# - Find SYCL library
+# Find the native SYCL header and libraries needed by oneAPI implementation
+# This module defines
+#  SYCL_COMPILER, compiler which will be used for compilation of SYCL code
+#  SYCL_LIBRARY, libraries to link against in order to use SYCL.
+#  SYCL_INCLUDE_DIR, directories where SYCL headers can be found
+#  SYCL_ROOT_DIR, The base directory to search for SYCL files.
+#                 This can also be an environment variable.
+#  SYCL_FOUND, If false, then don't try to use SYCL.
+
+IF(NOT SYCL_ROOT_DIR AND NOT $ENV{SYCL_ROOT_DIR} STREQUAL "")
+  SET(SYCL_ROOT_DIR $ENV{SYCL_ROOT_DIR})
+ENDIF()
+
+SET(_sycl_search_dirs
+  ${SYCL_ROOT_DIR}
+  /usr/lib
+  /usr/local/lib
+  /opt/intel/oneapi/compiler/latest/linux/
+  C:/Program\ Files\ \(x86\)/Intel/oneAPI/compiler/latest/windows
+)
+
+# Find DPC++ compiler.
+# Since the compiler name is possibly conflicting with the system-wide
+# CLang start with looking for either dpcpp or clang binary in the given
+# list of search paths only. If that fails, try to look for a system-wide
+# dpcpp binary.
+FIND_PROGRAM(SYCL_COMPILER
+  NAMES
+    dpcpp
+    clang++
+  HINTS
+    ${_sycl_search_dirs}
+  PATH_SUFFIXES
+    bin
+  NO_CMAKE_FIND_ROOT_PATH
+  NAMES_PER_DIR
+)
+
+# NOTE: No clang++ here so that we do not pick up a system-wide CLang
+# compiler.
+if(NOT SYCL_COMPILER)
+  FIND_PROGRAM(SYCL_COMPILER
+   NAMES
+      dpcpp
+    HINTS
+      ${_sycl_search_dirs}
+    PATH_SUFFIXES
+      bin
+  )
+endif()
+
+FIND_LIBRARY(SYCL_LIBRARY
+  NAMES
+    sycl
+  HINTS
+    ${_sycl_search_dirs}
+  PATH_SUFFIXES
+    lib64 lib
+)
+
+FIND_PATH(SYCL_INCLUDE_DIR
+  NAMES
+    CL/sycl.hpp
+  HINTS
+    ${_sycl_search_dirs}
+  PATH_SUFFIXES
+    include
+    include/sycl
+)
+
+INCLUDE(FindPackageHandleStandardArgs)
+
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(SYCL DEFAULT_MSG SYCL_LIBRARY SYCL_INCLUDE_DIR)
+
+IF(SYCL_FOUND)
+  get_filename_component(_SYCL_INCLUDE_PARENT_DIR ${SYCL_INCLUDE_DIR} DIRECTORY)
+  SET(SYCL_INCLUDE_DIR ${SYCL_INCLUDE_DIR} ${_SYCL_INCLUDE_PARENT_DIR})
+ELSE()
+  SET(SYCL_SYCL_FOUND FALSE)
+ENDIF()
+
+MARK_AS_ADVANCED(
+  _SYCL_INCLUDE_PARENT_DIR
+)
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -70,7 +70,7 @@ if(NOT WIN32)
  set(WITH_JACK                ON  CACHE BOOL "" FORCE)
 endif()
 if(WIN32)
-  set(WITH_WASAPI              ON  CACHE BOOL "" FORCE)
+  set(WITH_WASAPI               ON  CACHE BOOL "" FORCE)
 endif()
 if(UNIX AND NOT APPLE)
  set(WITH_DOC_MANPAGE         ON  CACHE BOOL "" FORCE)
@@ -86,4 +86,8 @@ if(NOT APPLE)
  set(WITH_CYCLES_CUDA_BINARIES   ON  CACHE BOOL "" FORCE)
  set(WITH_CYCLES_CUBIN_COMPILER  OFF CACHE BOOL "" FORCE)
  set(WITH_CYCLES_HIP_BINARIES    ON  CACHE BOOL "" FORCE)
+  set(WITH_CYCLES_DEVICE_ONEAPI   ON  CACHE BOOL "" FORCE)
+
+  # Disable AoT kernels compilations until buildbot can deliver them in a reasonable time.
+  set(WITH_CYCLES_ONEAPI_BINARIES OFF CACHE BOOL "" FORCE)
 endif()
--- a/build_files/cmake/platform/platform_apple.cmake
+++ b/build_files/cmake/platform/platform_apple.cmake
@@ -162,6 +162,9 @@ if(WITH_CODEC_FFMPEG)
    mp3lame ogg opus swresample swscale
    theora theoradec theoraenc vorbis vorbisenc
    vorbisfile vpx x264 xvidcore)
+  if(EXISTS ${LIBDIR}/ffmpeg/lib/libaom.a)
+    list(APPEND FFMPEG_FIND_COMPONENTS aom)
+  endif()
  find_package(FFmpeg)
 endif()

@@ -467,8 +470,9 @@ string(APPEND CMAKE_CXX_FLAGS " -ftemplate-depth=1024")

 # Avoid conflicts with Luxrender, and other plug-ins that may use the same
 # libraries as Blender with a different version or build options.
+set(PLATFORM_SYMBOLS_MAP ${CMAKE_SOURCE_DIR}/source/creator/symbols_apple.map)
 string(APPEND PLATFORM_LINKFLAGS
-  " -Wl,-unexported_symbols_list,'${CMAKE_SOURCE_DIR}/source/creator/osx_locals.map'"
+  " -Wl,-unexported_symbols_list,'${PLATFORM_SYMBOLS_MAP}'"
 )

 string(APPEND CMAKE_CXX_FLAGS " -stdlib=libc++")
--- a/build_files/cmake/platform/platform_unix.cmake
+++ b/build_files/cmake/platform/platform_unix.cmake
@@ -38,9 +38,15 @@ if(EXISTS ${LIBDIR})
  message(STATUS "Using pre-compiled LIBDIR: ${LIBDIR}")

  file(GLOB LIB_SUBDIRS ${LIBDIR}/*)
+
  # Ignore Mesa software OpenGL libraries, they are not intended to be
  # linked against but to optionally override at runtime.
  list(REMOVE_ITEM LIB_SUBDIRS ${LIBDIR}/mesa)
+
+  # Ignore DPC++ as it contains its own copy of LLVM/CLang which we do
+  # not need to be ever discovered for the Blender linking.
+  list(REMOVE_ITEM LIB_SUBDIRS ${LIBDIR}/dpcpp)
+
  # NOTE: Make sure "proper" compiled zlib comes first before the one
  # which is a part of OpenCollada. They have different ABI, and we
  # do need to use the official one.
@@ -196,6 +202,9 @@ if(WITH_CODEC_FFMPEG)
      vpx
      x264
      xvidcore)
+    if(EXISTS ${LIBDIR}/ffmpeg/lib/libaom.a)
+      list(APPEND FFMPEG_FIND_COMPONENTS aom)
+    endif()
  elseif(FFMPEG)
    # Old cache variable used for root dir, convert to new standard.
    set(FFMPEG_ROOT_DIR ${FFMPEG})
@@ -271,6 +280,18 @@ if(WITH_CYCLES AND WITH_CYCLES_OSL)
  endif()
 endif()

+if(WITH_CYCLES_DEVICE_ONEAPI)
+  set(CYCLES_LEVEL_ZERO ${LIBDIR}/level-zero CACHE PATH "Path to Level Zero installation")
+  if(EXISTS ${CYCLES_LEVEL_ZERO} AND NOT LEVEL_ZERO_ROOT_DIR)
+    set(LEVEL_ZERO_ROOT_DIR ${CYCLES_LEVEL_ZERO})
+  endif()
+
+  set(CYCLES_SYCL ${LIBDIR}/dpcpp CACHE PATH "Path to DPC++ and SYCL installation")
+  if(EXISTS ${CYCLES_SYCL} AND NOT SYCL_ROOT_DIR)
+    set(SYCL_ROOT_DIR ${CYCLES_SYCL})
+  endif()
+endif()
+
 if(WITH_OPENVDB)
  find_package_wrapper(OpenVDB)
  find_package_wrapper(Blosc)
@@ -613,17 +634,42 @@ if(WITH_GHOST_WAYLAND)
  pkg_check_modules(wayland-scanner REQUIRED wayland-scanner)
  pkg_check_modules(xkbcommon REQUIRED xkbcommon)
  pkg_check_modules(wayland-cursor REQUIRED wayland-cursor)
-  pkg_check_modules(dbus REQUIRED dbus-1)

-  set(WITH_GL_EGL ON)
+  if(WITH_GHOST_WAYLAND_DBUS)
+    pkg_check_modules(dbus REQUIRED dbus-1)
+  endif()
+
+  if(WITH_GHOST_WAYLAND_LIBDECOR)
+    pkg_check_modules(libdecor REQUIRED libdecor-0>=0.1)
+  endif()

  list(APPEND PLATFORM_LINKLIBS
-    ${wayland-client_LINK_LIBRARIES}
-    ${wayland-egl_LINK_LIBRARIES}
    ${xkbcommon_LINK_LIBRARIES}
-    ${wayland-cursor_LINK_LIBRARIES}
-    ${dbus_LINK_LIBRARIES}
  )
+
+  if(NOT WITH_GHOST_WAYLAND_DYNLOAD)
+    list(APPEND PLATFORM_LINKLIBS
+      ${wayland-client_LINK_LIBRARIES}
+      ${wayland-egl_LINK_LIBRARIES}
+      ${wayland-cursor_LINK_LIBRARIES}
+    )
+  endif()
+
+  if(WITH_GHOST_WAYLAND_DBUS)
+    list(APPEND PLATFORM_LINKLIBS
+      ${dbus_LINK_LIBRARIES}
+    )
+    add_definitions(-DWITH_GHOST_WAYLAND_DBUS)
+  endif()
+
+  if(WITH_GHOST_WAYLAND_LIBDECOR)
+    if(NOT WITH_GHOST_WAYLAND_DYNLOAD)
+      list(APPEND PLATFORM_LINKLIBS
+        ${libdecor_LIBRARIES}
+      )
+    endif()
+    add_definitions(-DWITH_GHOST_WAYLAND_LIBDECOR)
+  endif()
 endif()

 if(WITH_GHOST_X11)
@@ -842,8 +888,9 @@ unset(_IS_LINKER_DEFAULT)

 # Avoid conflicts with Mesa llvmpipe, Luxrender, and other plug-ins that may
 # use the same libraries as Blender with a different version or build options.
+set(PLATFORM_SYMBOLS_MAP ${CMAKE_SOURCE_DIR}/source/creator/symbols_unix.map)
 set(PLATFORM_LINKFLAGS
-  "${PLATFORM_LINKFLAGS} -Wl,--version-script='${CMAKE_SOURCE_DIR}/source/creator/blender.map'"
+  "${PLATFORM_LINKFLAGS} -Wl,--version-script='${PLATFORM_SYMBOLS_MAP}'"
 )

 # Don't use position independent executable for portable install since file
--- a/build_files/cmake/platform/platform_win32.cmake
+++ b/build_files/cmake/platform/platform_win32.cmake
@@ -950,3 +950,6 @@ endif()

 set(ZSTD_INCLUDE_DIRS ${LIBDIR}/zstd/include)
 set(ZSTD_LIBRARIES ${LIBDIR}/zstd/lib/zstd_static.lib)
+
+set(LEVEL_ZERO_ROOT_DIR ${LIBDIR}/level_zero)
+set(SYCL_ROOT_DIR ${LIBDIR}/dpcpp)
--- a/build_files/config/pipeline_config.yaml
+++ b/build_files/config/pipeline_config.yaml
@@ -54,6 +54,8 @@ buildbot:
        version: '10.1.243'
    cuda11:
        version: '11.4.1'
+    hip:
+        version: '5.2.21440'
    optix:
        version: '7.3.0'
    cmake:
--- a/doc/doxygen/Doxyfile
+++ b/doc/doxygen/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = Blender
 # could be handy for archiving the generated documentation or if some version
 # control system is used.

-PROJECT_NUMBER         = V3.3
+PROJECT_NUMBER         = V3.4

 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
--- a/doc/python_api/sphinx_changelog_gen.py
+++ b/doc/python_api/sphinx_changelog_gen.py
@@ -1,59 +1,111 @@
 # SPDX-License-Identifier: GPL-2.0-or-later

 """
-Dump the python API into a text file so we can generate changelogs.
+---------------

-output from this tool should be added into "doc/python_api/rst/change_log.rst"
+Dump the python API into a JSON file, or generate changelogs from those JSON API dumps.

-# dump api blender_version.py in CWD
-blender --background --python doc/python_api/sphinx_changelog_gen.py -- --dump
+Typically, changelog output from this tool should be added into "doc/python_api/rst/change_log.rst"

-# create changelog
+API dump files are saved together with the generated API doc on the server, with a general index file.
+This way the changelog generation simply needs to re-download the previous version's dump for the diffing process.
+
+---------------
+
+# Dump api blender_version.json in CWD:
+blender --background  --factory-startup --python doc/python_api/sphinx_changelog_gen.py -- \
+        --indexpath="path/to/api/docs/api_dump_index.json" \
+        dump --filepath-out="path/to/api/docs/<version>/api_dump.json"
+
+# Create changelog:
 blender --background --factory-startup --python doc/python_api/sphinx_changelog_gen.py -- \
-        --api_from blender_2_63_0.py \
-        --api_to   blender_2_64_0.py \
-        --api_out changes.rst
+        --indexpath="path/to/api/docs/api_dump_index.json" \
+        changelog --filepath-out doc/python_api/rst/change_log.rst

-
-# Api comparison can also run without blender
+# Api comparison can also run without blender,
+# will by default generate changeloig between the last two available versions listed in the index,
+# unless input files are provided explicitely:
 python doc/python_api/sphinx_changelog_gen.py -- \
-        --api_from blender_api_2_63_0.py \
-        --api_to   blender_api_2_64_0.py \
-        --api_out changes.rst
+        --indexpath="path/to/api/docs/api_dump_index.json" \
+        changelog --filepath-in-from blender_api_2_63_0.json \
+                  --filepath-in-to   blender_api_2_64_0.json \
+                  --filepath-out changes.rst

-# Save the latest API dump in this folder, renaming it with its revision.
-# This way the next person updating it doesn't need to build an old Blender only for that
+--------------
+
+API dump index format:
+
+{[version_main, version_sub]: "<version>/api_dump.json", ...
+}
+
+API dump format:
+
+[
+    [version_main, vserion_sub, version_path],
+    {"module.name":
+        {"parent.class":
+            {"basic_type", "member_name":
+                ["Name", type, range, length, default, descr, f_args, f_arg_types, f_ret_types]}, ...
+        }, ...
+    }
+]

 """

-# format
-'''
-{"module.name":
-    {"parent.class":
-        {"basic_type", "member_name":
-            ("Name", type, range, length, default, descr, f_args, f_arg_types, f_ret_types)}, ...
-    }, ...
-}
-'''
+import json
+import os
+

 api_names = "basic_type" "name", "type", "range", "length", "default", "descr", "f_args", "f_arg_types", "f_ret_types"
-
 API_BASIC_TYPE = 0
 API_F_ARGS = 7


-def api_dunp_fname():
-    import bpy
-    return "blender_api_%s.py" % "_".join([str(i) for i in bpy.app.version])
+def api_version():
+    try:
+        import bpy
+    except:
+        return None, None
+    version = tuple(bpy.app.version[:2])
+    version_key = "%d.%d" % (version[0], version[1])
+    return version, version_key


-def api_dump():
-    dump = {}
-    dump_module = dump["bpy.types"] = {}
+def api_version_previous_in_index(index, version):
+    print("Searching for previous version to %s in %r" % (version, index))
+    version_prev = (version[0], version[1])
+    while True:
+        version_prev = (version_prev[0], version_prev[1] - 1)
+        if version_prev[1] < 0:
+            version_prev = (version_prev[0] - 1, 99)
+        if version_prev[0] < 0:
+            return None, None
+        version_prev_key = "%d.%d" % (version_prev[0], version_prev[1])
+        if version_prev_key in index:
+            print("Found previous version %s: %r" % (version_prev, index[version_prev_key]))
+            return version_prev, version_prev_key

+
+class JSONEncoderAPIDump(json.JSONEncoder):
+    def default(self, o):
+        if o is ...:
+            return "..."
+        if isinstance(o, set):
+            return tuple(o)
+        return json.JSONEncoder.default(self, o)
+
+
+def api_dump(args):
    import rna_info
    import inspect

+    version, version_key = api_version()
+    if version is None:
+        raise(ValueError("API dumps can only be generated from within Blender."))
+
+    dump = {}
+    dump_module = dump["bpy.types"] = {}
+
    struct = rna_info.BuildRNAInfo()[0]
    for struct_id, struct_info in sorted(struct.items()):

@@ -155,17 +207,25 @@ def api_dump():
            )
        del funcs

-    import pprint
+    filepath_out = args.filepath_out
+    with open(filepath_out, 'w', encoding='utf-8') as file_handle:
+        json.dump((version, dump), file_handle, cls=JSONEncoderAPIDump)

-    filename = api_dunp_fname()
-    filehandle = open(filename, 'w', encoding='utf-8')
-    tot = filehandle.write(pprint.pformat(dump, width=1))
-    filehandle.close()
-    print("%s, %d bytes written" % (filename, tot))
+    indexpath = args.indexpath
+    rootpath = os.path.dirname(indexpath)
+    if os.path.exists(indexpath):
+        with open(indexpath, 'r', encoding='utf-8') as file_handle:
+            index = json.load(file_handle)
+    else:
+        index = {}
+    index[version_key] = os.path.relpath(filepath_out, rootpath)
+    with open(indexpath, 'w', encoding='utf-8') as file_handle:
+        json.dump(index, file_handle)
+
+    print("API version %s dumped into %r, and index %r has been updated" % (version_key, filepath_out, indexpath))


 def compare_props(a, b, fuzz=0.75):
-
    # must be same basic_type, function != property
    if a[0] != b[0]:
        return False
@@ -180,15 +240,44 @@ def compare_props(a, b, fuzz=0.75):
    return ((tot / totlen) >= fuzz)


-def api_changelog(api_from, api_to, api_out):
+def api_changelog(args):
+    indexpath = args.indexpath
+    filepath_in_from = args.filepath_in_from
+    filepath_in_to = args.filepath_in_to
+    filepath_out = args.filepath_out

-    file_handle = open(api_from, 'r', encoding='utf-8')
-    dict_from = eval(file_handle.read())
-    file_handle.close()
+    rootpath = os.path.dirname(indexpath)

-    file_handle = open(api_to, 'r', encoding='utf-8')
-    dict_to = eval(file_handle.read())
-    file_handle.close()
+    version, version_key = api_version()
+    if version is None and (filepath_in_from is None or filepath_in_to is None):
+        raise(ValueError("API dumps files must be given when ran outside of Blender."))
+
+    with open(indexpath, 'r', encoding='utf-8') as file_handle:
+        index = json.load(file_handle)
+
+    if filepath_in_to is None:
+        filepath_in_to = index.get(version_key, None)
+    if filepath_in_to is None:
+        raise(ValueError("Cannot find API dump file for Blender version " + str(version) + " in index file."))
+
+    print("Found to file: %r" % filepath_in_to)
+
+    if filepath_in_from is None:
+        version_from, version_from_key = api_version_previous_in_index(index, version)
+        if version_from is None:
+            raise(ValueError("No previous version of Blender could be found in the index."))
+        filepath_in_from = index.get(version_from_key, None)
+    if filepath_in_from is None:
+        raise(ValueError("Cannot find API dump file for previous Blender version " + str(version_from) + " in index file."))
+
+    print("Found from file: %r" % filepath_in_from)
+
+    with open(os.path.join(rootpath, filepath_in_from), 'r', encoding='utf-8') as file_handle:
+        _, dict_from = json.load(file_handle)
+
+    with open(os.path.join(rootpath, filepath_in_to), 'r', encoding='utf-8') as file_handle:
+        dump_version, dict_to = json.load(file_handle)
+        assert(tuple(dump_version) == version)

    api_changes = []

@@ -249,63 +338,66 @@ def api_changelog(api_from, api_to, api_out):

    # also document function argument changes

-    fout = open(api_out, 'w', encoding='utf-8')
-    fw = fout.write
-    # print(api_changes)
+    with open(filepath_out, 'w', encoding='utf-8') as fout:
+        fw = fout.write

-    # :class:`bpy_struct.id_data`
+        # Write header.
+        fw(""
+           ":tocdepth: 2\n"
+           "\n"
+           "Blender API Change Log\n"
+           "**********************\n"
+           "\n"
+           ".. note, this document is auto generated by sphinx_changelog_gen.py\n"
+           "\n"
+           "\n"
+           "%s to %s\n"
+           "============\n"
+           "\n" % (version_from_key, version_key))

-    def write_title(title, title_char):
-        fw("%s\n%s\n\n" % (title, title_char * len(title)))
+        def write_title(title, title_char):
+            fw("%s\n%s\n\n" % (title, title_char * len(title)))

-    for mod_id, class_id, props_moved, props_new, props_old, func_args in api_changes:
-        class_name = class_id.split(".")[-1]
-        title = mod_id + "." + class_name
-        write_title(title, "-")
+        for mod_id, class_id, props_moved, props_new, props_old, func_args in api_changes:
+            class_name = class_id.split(".")[-1]
+            title = mod_id + "." + class_name
+            write_title(title, "-")

-        if props_new:
-            write_title("Added", "^")
-            for prop_id in props_new:
-                fw("* :class:`%s.%s.%s`\n" % (mod_id, class_name, prop_id))
-            fw("\n")
+            if props_new:
+                write_title("Added", "^")
+                for prop_id in props_new:
+                    fw("* :class:`%s.%s.%s`\n" % (mod_id, class_name, prop_id))
+                fw("\n")

-        if props_old:
-            write_title("Removed", "^")
-            for prop_id in props_old:
-                fw("* **%s**\n" % prop_id)  # can't link to removed docs
-            fw("\n")
+            if props_old:
+                write_title("Removed", "^")
+                for prop_id in props_old:
+                    fw("* **%s**\n" % prop_id)  # can't link to removed docs
+                fw("\n")

-        if props_moved:
-            write_title("Renamed", "^")
-            for prop_id_old, prop_id in props_moved:
-                fw("* **%s** -> :class:`%s.%s.%s`\n" % (prop_id_old, mod_id, class_name, prop_id))
-            fw("\n")
+            if props_moved:
+                write_title("Renamed", "^")
+                for prop_id_old, prop_id in props_moved:
+                    fw("* **%s** -> :class:`%s.%s.%s`\n" % (prop_id_old, mod_id, class_name, prop_id))
+                fw("\n")

-        if func_args:
-            write_title("Function Arguments", "^")
-            for func_id, args_old, args_new in func_args:
-                args_new = ", ".join(args_new)
-                args_old = ", ".join(args_old)
-                fw("* :class:`%s.%s.%s` (%s), *was (%s)*\n" % (mod_id, class_name, func_id, args_new, args_old))
-            fw("\n")
+            if func_args:
+                write_title("Function Arguments", "^")
+                for func_id, args_old, args_new in func_args:
+                    args_new = ", ".join(args_new)
+                    args_old = ", ".join(args_old)
+                    fw("* :class:`%s.%s.%s` (%s), *was (%s)*\n" % (mod_id, class_name, func_id, args_new, args_old))
+                fw("\n")

-    fout.close()
-
-    print("Written: %r" % api_out)
+    print("Written: %r" % filepath_out)


-def main():
+def main(argv=None):
    import sys
-    import os
+    import argparse

-    try:
-        import argparse
-    except ImportError:
-        print("Old Blender, just dumping")
-        api_dump()
-        return
-
-    argv = sys.argv
+    if argv is None:
+        argv = sys.argv

    if "--" not in argv:
        argv = []  # as if no args are passed
@@ -316,42 +408,42 @@ def main():
    usage_text = "Run blender in background mode with this script: "
    "blender --background --factory-startup --python %s -- [options]" % os.path.basename(__file__)

-    epilog = "Run this before releases"
-
-    parser = argparse.ArgumentParser(description=usage_text, epilog=epilog)
-
+    parser = argparse.ArgumentParser(description=usage_text,
+                                     epilog=__doc__,
+                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
-        "--dump", dest="dump", action='store_true',
-        help="When set the api will be dumped into blender_version.py")
+        "--indexpath", dest="indexpath", metavar='FILE', required=True,
+        help="Path of the JSON file containing the index of all available API dumps.")

-    parser.add_argument(
-        "--api_from", dest="api_from", metavar='FILE',
-        help="File to compare from (previous version)")
-    parser.add_argument(
-        "--api_to", dest="api_to", metavar='FILE',
-        help="File to compare from (current)")
-    parser.add_argument(
-        "--api_out", dest="api_out", metavar='FILE',
-        help="Output sphinx changelog")
+    parser_commands = parser.add_subparsers(required=True)

-    args = parser.parse_args(argv)  # In this example we won't use the args
+    parser_dump = parser_commands.add_parser('dump', help="Dump the current Blender Python API into a JSON file.")
+    parser_dump.add_argument(
+        "--filepath-out", dest="filepath_out", metavar='FILE', required=True,
+        help="Path of the JSON file containing the dump of the API.")
+    parser_dump.set_defaults(func=api_dump)

-    if not argv:
-        print("No args given!")
-        parser.print_help()
-        return
+    parser_changelog = parser_commands.add_parser(
+        'changelog',
+        help="Generate the RST changelog page based on two Blender Python API JSON dumps.",
+    )

-    if args.dump:
-        api_dump()
-    else:
-        if args.api_from and args.api_to and args.api_out:
-            api_changelog(args.api_from, args.api_to, args.api_out)
-        else:
-            print("Error: --api_from/api_to/api_out args needed")
-            parser.print_help()
-            return
+    parser_changelog.add_argument(
+        "--filepath-in-from", dest="filepath_in_from", metavar='FILE', default=None,
+        help="JSON dump file to compare from (typically, previous version). "
+             "If not given, will be automatically determined from current Blender version and index file.")
+    parser_changelog.add_argument(
+        "--filepath-in-to", dest="filepath_in_to", metavar='FILE', default=None,
+        help="JSON dump file to compare to (typically, current version). "
+             "If not given, will be automatically determined from current Blender version and index file.")
+    parser_changelog.add_argument(
+        "--filepath-out", dest="filepath_out", metavar='FILE', required=True,
+        help="Output sphinx changelog RST file.")
+    parser_changelog.set_defaults(func=api_changelog)

-    print("batch job finished, exiting")
+    args = parser.parse_args(argv)
+
+    args.func(args)


 if __name__ == "__main__":
--- a/doc/python_api/sphinx_doc_gen.py
+++ b/doc/python_api/sphinx_doc_gen.py
@@ -141,6 +141,26 @@ def handle_args():
        required=False,
    )

+    parser.add_argument(
+        "--api-changelog-generate",
+        dest="changelog",
+        default=False,
+        action='store_true',
+        help="Generate the API changelog RST file "
+        "(default=False, requires `--api-dump-index-path` parameter)",
+        required=False,
+    )
+
+    parser.add_argument(
+        "--api-dump-index-path",
+        dest="api_dump_index_path",
+        metavar='FILE',
+        default=None,
+        help="Path to the API dump index JSON file "
+        "(required when `--api-changelog-generate` is True)",
+        required=False,
+    )
+
    parser.add_argument(
        "-o", "--output",
        dest="output_dir",
@@ -514,6 +534,42 @@ if ARGS.sphinx_build_pdf:
        sphinx_make_pdf_log = os.path.join(ARGS.output_dir, ".latex_make.log")
        SPHINX_MAKE_PDF_STDOUT = open(sphinx_make_pdf_log, "w", encoding="utf-8")

+
+# --------------------------------CHANGELOG GENERATION--------------------------------------
+
+def generate_changelog():
+    import importlib.util
+    spec = importlib.util.spec_from_file_location(
+        "sphinx_changelog_gen",
+        os.path.abspath(os.path.join(SCRIPT_DIR, "sphinx_changelog_gen.py")),
+    )
+    sphinx_changelog_gen = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(sphinx_changelog_gen)
+
+    API_DUMP_INDEX_FILEPATH = ARGS.api_dump_index_path
+    API_DUMP_ROOT = os.path.dirname(API_DUMP_INDEX_FILEPATH)
+    API_DUMP_FILEPATH = os.path.abspath(os.path.join(API_DUMP_ROOT, BLENDER_VERSION_DOTS, "api_dump.json"))
+    API_CHANGELOG_FILEPATH = os.path.abspath(os.path.join(SPHINX_IN_TMP, "change_log.rst"))
+
+    sphinx_changelog_gen.main((
+        "--",
+        "--indexpath",
+        API_DUMP_INDEX_FILEPATH,
+        "dump",
+        "--filepath-out",
+        API_DUMP_FILEPATH,
+    ))
+
+    sphinx_changelog_gen.main((
+        "--",
+        "--indexpath",
+        API_DUMP_INDEX_FILEPATH,
+        "changelog",
+        "--filepath-out",
+        API_CHANGELOG_FILEPATH,
+    ))
+
+
 # --------------------------------API DUMP--------------------------------------

 # Lame, python won't give some access.
@@ -1075,6 +1131,7 @@ def pymodule2sphinx(basepath, module_name, module, title, module_all_extra):
 # Changes In Blender will force errors here.
 context_type_map = {
    # context_member: (RNA type, is_collection)
+    "active_action": ("Action", False),
    "active_annotation_layer": ("GPencilLayer", False),
    "active_bone": ("EditBone", False),
    "active_file": ("FileSelectEntry", False),
@@ -1473,7 +1530,8 @@ def pyrna2sphinx(basepath):
        else:
            fw(".. class:: %s\n\n" % struct_id)

-        fw("   %s\n\n" % struct.description)
+        write_indented_lines("   ", fw, struct.description, False)
+        fw("\n")

        # Properties sorted in alphabetical order.
        sorted_struct_properties = struct.properties[:]
@@ -2473,6 +2531,9 @@ def main():

    rna2sphinx(SPHINX_IN_TMP)

+    if ARGS.changelog:
+        generate_changelog()
+
    if ARGS.full_rebuild:
        # Only for full updates.
        shutil.rmtree(SPHINX_IN, True)
--- a/extern/audaspace/bindings/C/AUD_Special.cpp
+++ b/extern/audaspace/bindings/C/AUD_Special.cpp
@@ -270,7 +270,7 @@ AUD_API int AUD_readSound(AUD_Sound* sound, float* buffer, int length, int sampl
 	return length;
 }

-AUD_API const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data)
+AUD_API int AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data, char* error, size_t errorsize)
 {
 	try
 	{
@@ -282,15 +282,20 @@ AUD_API const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned i
 		std::shared_ptr<IWriter> writer = FileWriter::createWriter(filename, convCToDSpec(specs), static_cast<Container>(format), static_cast<Codec>(codec), bitrate);
 		FileWriter::writeReader(reader, writer, length, buffersize, callback, data);

-		return nullptr;
+		return true;
 	}
 	catch(Exception& e)
 	{
-		return e.getMessage().c_str();
+		if(error && errorsize)
+		{
+			std::strncpy(error, e.getMessage().c_str(), errorsize);
+			error[errorsize - 1] = '\0';
+		}
+		return false;
 	}
 }

-AUD_API const char* AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data)
+AUD_API int AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data, char* error, size_t errorsize)
 {
 	try
 	{
@@ -328,11 +333,16 @@ AUD_API const char* AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start
 		reader->seek(start);
 		FileWriter::writeReader(reader, writers, length, buffersize, callback, data);

-		return nullptr;
+		return true;
 	}
 	catch(Exception& e)
 	{
-		return e.getMessage().c_str();
+		if(error && errorsize)
+		{
+			std::strncpy(error, e.getMessage().c_str(), errorsize);
+			error[errorsize - 1] = '\0';
+		}
+		return false;
 	}
 }

--- a/extern/audaspace/bindings/C/AUD_Special.h
+++ b/extern/audaspace/bindings/C/AUD_Special.h
@@ -70,13 +70,15 @@ extern AUD_API int AUD_readSound(AUD_Sound* sound, float* buffer, int length, in
 * \param bitrate The bitrate for encoding.
 * \param callback A callback function that is called periodically during mixdown, reporting progress if length > 0. Can be NULL.
 * \param data Pass through parameter that is passed to the callback.
- * \return An error message or NULL in case of success.
+ * \param error String buffer to copy the error message to in case of failure.
+ * \param errorsize The size of the error buffer.
+ * \return Whether or not the operation succeeded.
 */
-extern AUD_API const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length,
+extern AUD_API int AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length,
 							   unsigned int buffersize, const char* filename,
 							   AUD_DeviceSpecs specs, AUD_Container format,
 							   AUD_Codec codec, unsigned int bitrate,
-							   void(*callback)(float, void*), void* data);
+							   void(*callback)(float, void*), void* data, char* error, size_t errorsize);

 /**
 * Mixes a sound down into multiple files.
@@ -91,13 +93,15 @@ extern AUD_API const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, uns
 * \param bitrate The bitrate for encoding.
 * \param callback A callback function that is called periodically during mixdown, reporting progress if length > 0. Can be NULL.
 * \param data Pass through parameter that is passed to the callback.
- * \return An error message or NULL in case of success.
+ * \param error String buffer to copy the error message to in case of failure.
+ * \param errorsize The size of the error buffer.
+ * \return Whether or not the operation succeeded.
 */
-extern AUD_API const char* AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length,
+extern AUD_API int AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length,
 										   unsigned int buffersize, const char* filename,
 										   AUD_DeviceSpecs specs, AUD_Container format,
 										   AUD_Codec codec, unsigned int bitrate,
-										   void(*callback)(float, void*), void* data);
+										   void(*callback)(float, void*), void* data, char* error, size_t errorsize);

 /**
 * Opens a read device and prepares it for mixdown of the sound scene.
--- a/extern/audaspace/plugins/pulseaudio/PulseAudioDevice.cpp
+++ b/extern/audaspace/plugins/pulseaudio/PulseAudioDevice.cpp
@@ -41,7 +41,7 @@ double PulseAudioDevice::PulseAudioSynchronizer::getPosition(std::shared_ptr<IHa

 void PulseAudioDevice::updateRingBuffer()
 {
-	unsigned int samplesize = AUD_SAMPLE_SIZE(m_specs);
+	unsigned int samplesize = AUD_DEVICE_SAMPLE_SIZE(m_specs);

 	std::unique_lock<std::mutex> lock(m_mixingLock);

--- a/extern/curve_fit_nd/README.blender
+++ b/extern/curve_fit_nd/README.blender
@@ -1,5 +1,5 @@
 Project: Curve-Fit-nD
 URL: https://github.com/ideasman42/curve-fit-nd
 License: BSD 3-Clause
-Upstream version: ddcd5bd (Last Release)
+Upstream version: ae32da9de264c3ed399673e2bc1bc09003799416 (Last Release)
 Local modifications: None
--- a/extern/curve_fit_nd/curve_fit_nd.h
+++ b/extern/curve_fit_nd/curve_fit_nd.h
@@ -39,7 +39,7 @@
 * Takes a flat array of points and evaluates that to calculate a bezier spline.
 *
 * \param points, points_len: The array of points to calculate a cubics from.
- * \param dims: The number of dimensions for for each element in \a points.
+ * \param dims: The number of dimensions for each element in \a points.
 * \param error_threshold: the error threshold to allow for,
 * the curve will be within this distance from \a points.
 * \param corners, corners_len: indices for points which will not have aligned tangents (optional).
@@ -47,10 +47,10 @@
 * to evaluate a line to detect corner indices.
 *
 * \param r_cubic_array, r_cubic_array_len: Resulting array of tangents and knots, formatted as follows:
- * ``r_cubic_array[r_cubic_array_len][3][dims]``,
+ * `r_cubic_array[r_cubic_array_len][3][dims]`,
 * where each point has 0 and 2 for the tangents and the middle index 1 for the knot.
- * The size of the *flat* array will be ``r_cubic_array_len * 3 * dims``.
- * \param r_corner_index_array, r_corner_index_len: Corner indices in in \a r_cubic_array (optional).
+ * The size of the *flat* array will be `r_cubic_array_len * 3 * dims`.
+ * \param r_corner_index_array, r_corner_index_len: Corner indices in \a r_cubic_array (optional).
 * This allows you to access corners on the resulting curve.
 *
 * \returns zero on success, nonzero is reserved for error values.
@@ -85,7 +85,7 @@ int curve_fit_cubic_to_points_fl(
 * Takes a flat array of points and evaluates that to calculate handle lengths.
 *
 * \param points, points_len: The array of points to calculate a cubics from.
- * \param dims: The number of dimensions for for each element in \a points.
+ * \param dims: The number of dimensions for each element in \a points.
 * \param points_length_cache: Optional pre-calculated lengths between points.
 * \param error_threshold: the error threshold to allow for,
 * \param tan_l, tan_r: Normalized tangents the handles will be aligned to.
@@ -166,7 +166,7 @@ int curve_fit_cubic_to_points_refit_fl(
 * A helper function that takes a line and outputs its corner indices.
 *
 * \param points, points_len: Curve to evaluate.
- * \param dims: The number of dimensions for for each element in \a points.
+ * \param dims: The number of dimensions for each element in \a points.
 * \param radius_min: Corners on the curve between points below this radius are ignored.
 * \param radius_max: Corners on the curve above this radius are ignored.
 * \param samples_max: Prevent testing corners beyond this many points
--- a/extern/curve_fit_nd/intern/curve_fit_cubic.c
+++ b/extern/curve_fit_nd/intern/curve_fit_cubic.c
@@ -43,20 +43,24 @@

 #include "../curve_fit_nd.h"

-/* Take curvature into account when calculating the least square solution isn't usable. */
+/** Take curvature into account when calculating the least square solution isn't usable. */
 #define USE_CIRCULAR_FALLBACK

-/* Use the maximum distance of any points from the direct line between 2 points
+/**
+ * Use the maximum distance of any points from the direct line between 2 points
 * to calculate how long the handles need to be.
 * Can do a 'perfect' reversal of subdivision when for curve has symmetrical handles and doesn't change direction
- * (as with an 'S' shape). */
+ * (as with an 'S' shape).
+ */
 #define USE_OFFSET_FALLBACK

-/* avoid re-calculating lengths multiple times */
+/** Avoid re-calculating lengths multiple times. */
 #define USE_LENGTH_CACHE

-/* store the indices in the cubic data so we can return the original indices,
- * useful when the caller has data associated with the curve. */
+/**
+ * Store the indices in the cubic data so we can return the original indices,
+ * useful when the caller has data associated with the curve.
+ */
 #define USE_ORIG_INDEX_DATA

 typedef unsigned int uint;
@@ -95,13 +99,15 @@ typedef unsigned int uint;
 * \{ */

 typedef struct Cubic {
-	/* single linked lists */
+	/** Single linked lists. */
 	struct Cubic *next;
 #ifdef USE_ORIG_INDEX_DATA
 	uint orig_span;
 #endif
-	/* 0: point_0, 1: handle_0, 2: handle_1, 3: point_1,
-	 * each one is offset by 'dims' */
+	/**
+	 * 0: point_0, 1: handle_0, 2: handle_1, 3: point_1,
+	 * each one is offset by 'dims'.
+	 */
 	double pt_data[0];
 } Cubic;

@@ -195,7 +201,7 @@ static double *cubic_list_as_array(
 	bool use_orig_index = (r_orig_index != NULL);
 #endif

-	/* fill the array backwards */
+	/* Fill the array backwards. */
 	const size_t array_chunk = 3 * dims;
 	double *array_iter = array + array_flat_len;
 	for (Cubic *citer = clist->items; citer; citer = citer->next) {
@@ -221,15 +227,15 @@ static double *cubic_list_as_array(
 	}
 #endif

-	/* flip tangent for first and last (we could leave at zero, but set to something useful) */
+	/* Flip tangent for first and last (we could leave at zero, but set to something useful). */

-	/* first */
+	/* First. */
 	array_iter -= array_chunk;
 	memcpy(&array_iter[dims], handle_prev, sizeof(double) * 2 * dims);
 	flip_vn_vnvn(&array_iter[0 * dims], &array_iter[1 * dims], &array_iter[2 * dims], dims);
 	assert(array == array_iter);

-	/* last */
+	/* Last. */
 	array_iter += array_flat_len - (3 * dims);
 	flip_vn_vnvn(&array_iter[2 * dims], &array_iter[1 * dims], &array_iter[0 * dims], dims);

@@ -455,7 +461,7 @@ static double points_calc_circumference_factor(
 	const double dot = dot_vnvn(tan_l, tan_r, dims);
 	const double len_tangent = dot < 0.0 ? len_vnvn(tan_l, tan_r, dims) : len_negated_vnvn(tan_l, tan_r, dims);
 	if (len_tangent > DBL_EPSILON) {
-		/* only clamp to avoid precision error */
+		/* Only clamp to avoid precision error. */
 		double angle = acos(max(-fabs(dot), -1.0));
 		/* Angle may be less than the length when the tangents define >180 degrees of the circle,
 		 * (tangents that point away from each other).
@@ -466,7 +472,7 @@ static double points_calc_circumference_factor(
 		return factor;
 	}
 	else {
-		/* tangents are exactly aligned (think two opposite sides of a circle). */
+		/* Tangents are exactly aligned (think two opposite sides of a circle). */
 		return (M_PI / 2);
 	}
 }
@@ -485,18 +491,18 @@ static double points_calc_circle_tangent_factor(
 	const double eps = 1e-8;
 	const double tan_dot = dot_vnvn(tan_l, tan_r, dims);
 	if (tan_dot > 1.0 - eps) {
-		/* no angle difference (use fallback, length wont make any difference) */
+		/* No angle difference (use fallback, length won't make any difference). */
 		return (1.0 / 3.0) * 0.75;
 	}
 	else if (tan_dot < -1.0 + eps) {
-		/* parallel tangents (half-circle) */
+		/* Parallel tangents (half-circle). */
 		return (1.0 / 2.0);
 	}
 	else {
-		/* non-aligned tangents, calculate handle length */
+		/* Non-aligned tangents, calculate handle length. */
 		const double angle = acos(tan_dot) / 2.0;

-		/* could also use 'angle_sin = len_vnvn(tan_l, tan_r, dims) / 2.0' */
+		/* Could also use `angle_sin = len_vnvn(tan_l, tan_r, dims) / 2.0`. */
 		const double angle_sin = sin(angle);
 		const double angle_cos = cos(angle);
 		return ((1.0 - angle_cos) / (angle_sin * 2.0)) / angle_sin;
@@ -516,15 +522,15 @@ static double points_calc_cubic_scale(
 	const double len_direct = len_vnvn(v_l, v_r, dims);
 	const double len_circle_factor = points_calc_circle_tangent_factor(tan_l, tan_r, dims);

-	/* if this curve is a circle, this value doesn't need modification */
+	/* If this curve is a circle, this value doesn't need modification. */
 	const double len_circle_handle = (len_direct * (len_circle_factor / 0.75));

-	/* scale by the difference from the circumference distance */
+	/* Scale by the difference from the circumference distance. */
 	const double len_circle = len_direct * points_calc_circumference_factor(tan_l, tan_r, dims);
 	double scale_handle = (coords_length / len_circle);

 	/* Could investigate an accurate calculation here,
-	 * though this gives close results */
+	 * though this gives close results. */
 	scale_handle = ((scale_handle - 1.0) * 1.75) + 1.0;

 	return len_circle_handle * scale_handle;
@@ -554,9 +560,8 @@ static void cubic_from_points_fallback(
 	r_cubic->orig_span = (points_offset_len - 1);
 #endif

-	/* p1 = p0 - (tan_l * alpha);
-	 * p2 = p3 + (tan_r * alpha);
-	 */
+	/* `p1 = p0 - (tan_l * alpha);`
+	 * `p2 = p3 + (tan_r * alpha);` */
 	msub_vn_vnvn_fl(p1, p0, tan_l, alpha, dims);
 	madd_vn_vnvn_fl(p2, p3, tan_r, alpha, dims);
 }
@@ -594,7 +599,7 @@ static void cubic_from_points_offset_fallback(
 	project_plane_vn_vnvn_normalized(a[0], tan_l, dir_unit, dims);
 	project_plane_vn_vnvn_normalized(a[1], tan_r, dir_unit, dims);

-	/* only for better accuracy, not essential */
+	/* Only for better accuracy, not essential. */
 	normalize_vn(a[0], dims);
 	normalize_vn(a[1], dims);

@@ -620,7 +625,7 @@ static void cubic_from_points_offset_fallback(
 	 *
 	 * The 'dists[..] + dir_dirs' limit is just a rough approximation.
 	 * While a more exact value could be calculated,
-	 * in this case the error values approach divide by zero (inf)
+	 * in this case the error values approach divide by zero (infinite)
 	 * so there is no need to be too precise when checking if limits have been exceeded. */

 	double alpha_l = (dists[0] / 0.75) / fabs(dot_vnvn(tan_l, a[0], dims));
@@ -644,9 +649,8 @@ static void cubic_from_points_offset_fallback(
 	r_cubic->orig_span = (points_offset_len - 1);
 #endif

-	/* p1 = p0 - (tan_l * alpha_l);
-	 * p2 = p3 + (tan_r * alpha_r);
-	 */
+	/* `p1 = p0 - (tan_l * alpha_l);`
+	 * `p2 = p3 + (tan_r * alpha_r);` */
 	msub_vn_vnvn_fl(p1, p0, tan_l, alpha_l, dims);
 	madd_vn_vnvn_fl(p2, p3, tan_r, alpha_r, dims);
 }
@@ -674,7 +678,7 @@ static void cubic_from_points(
 	const double *p0 = &points_offset[0];
 	const double *p3 = &points_offset[(points_offset_len - 1) * dims];

-	/* Point Pairs */
+	/* Point Pairs. */
 	double alpha_l, alpha_r;
 #ifdef USE_VLA
 	double a[2][dims];
@@ -696,7 +700,7 @@ static void cubic_from_points(
 			const double b0_plus_b1 = B0plusB1(u_prime[i]);
 			const double b2_plus_b3 = B2plusB3(u_prime[i]);

-			/* inline dot product */
+			/* Inline dot product. */
 			for (uint j = 0; j < dims; j++) {
 				const double tmp = (pt[j] - (p0[j] * b0_plus_b1)) + (p3[j] * b2_plus_b3);

@@ -719,7 +723,7 @@ static void cubic_from_points(
 			det_C0_C1 = c[0][0] * c[1][1] * 10e-12;
 		}

-		/* may still divide-by-zero, check below will catch nan values */
+		/* May still divide-by-zero, check below will catch NAN values. */
 		alpha_l = det_X_C1 / det_C0_C1;
 		alpha_r = det_C_0X / det_C0_C1;
 	}
@@ -736,7 +740,7 @@ static void cubic_from_points(

 	bool use_clamp = true;

-	/* flip check to catch nan values */
+	/* Flip check to catch NAN values. */
 	if (!(alpha_l >= 0.0) ||
 	    !(alpha_r >= 0.0))
 	{
@@ -750,7 +754,7 @@ static void cubic_from_points(
 		alpha_l = alpha_r = len_vnvn(p0, p3, dims) / 3.0;
 #endif

-		/* skip clamping when we're using default handles */
+		/* Skip clamping when we're using default handles. */
 		use_clamp = false;
 	}

@@ -764,9 +768,8 @@ static void cubic_from_points(
 	r_cubic->orig_span = (points_offset_len - 1);
 #endif

-	/* p1 = p0 - (tan_l * alpha_l);
-	 * p2 = p3 + (tan_r * alpha_r);
-	 */
+	/* `p1 = p0 - (tan_l * alpha_l);`
+	 * `p2 = p3 + (tan_r * alpha_r);` */
 	msub_vn_vnvn_fl(p1, p0, tan_l, alpha_l, dims);
 	madd_vn_vnvn_fl(p2, p3, tan_r, alpha_r, dims);

@@ -781,7 +784,7 @@ static void cubic_from_points(
 #endif
 		points_calc_center_weighted(points_offset, points_offset_len, dims, center);

-		const double clamp_scale = 3.0;  /* clamp to 3x */
+		const double clamp_scale = 3.0;  /* Clamp to 3x. */
 		double dist_sq_max = 0.0;

 		{
@@ -790,7 +793,7 @@ static void cubic_from_points(
 #if 0
 				double dist_sq_test = sq(len_vnvn(center, pt, dims) * clamp_scale);
 #else
-				/* do inline */
+				/* Do inline. */
 				double dist_sq_test = 0.0;
 				for (uint j = 0; j < dims; j++) {
 					dist_sq_test += sq((pt[j] - center[j]) * clamp_scale);
@@ -816,10 +819,8 @@ static void cubic_from_points(
 			alpha_l = alpha_r = len_vnvn(p0, p3, dims) / 3.0;
 #endif

-			/*
-			 * p1 = p0 - (tan_l * alpha_l);
-			 * p2 = p3 + (tan_r * alpha_r);
-			 */
+			/* `p1 = p0 - (tan_l * alpha_l);`
+			 * `p2 = p3 + (tan_r * alpha_r);` */
 			for (uint j = 0; j < dims; j++) {
 				p1[j] = p0[j] - (tan_l[j] * alpha_l);
 				p2[j] = p3[j] + (tan_r[j] * alpha_r);
@@ -829,7 +830,7 @@ static void cubic_from_points(
 			p2_dist_sq = len_squared_vnvn(center, p2, dims);
 		}

-		/* clamp within the 3x radius */
+		/* Clamp within the 3x radius. */
 		if (p1_dist_sq > dist_sq_max) {
 			isub_vnvn(p1, center, dims);
 			imul_vn_fl(p1, sqrt(dist_sq_max) / sqrt(p1_dist_sq), dims);
@@ -841,7 +842,7 @@ static void cubic_from_points(
 			iadd_vnvn(p2, center, dims);
 		}
 	}
-	/* end clamping */
+	/* End clamping. */
 }

 #ifdef USE_LENGTH_CACHE
@@ -917,7 +918,7 @@ static double cubic_find_root(
        const uint dims)
 {
 	/* Newton-Raphson Method. */
-	/* all vectors */
+	/* All vectors. */
 #ifdef USE_VLA
 	double q0_u[dims];
 	double q1_u[dims];
@@ -932,8 +933,8 @@ static double cubic_find_root(
 	cubic_calc_speed(cubic, u, dims, q1_u);
 	cubic_calc_acceleration(cubic, u, dims, q2_u);

-	/* may divide-by-zero, caller must check for that case */
-	/* u - ((q0_u - p) * q1_u) / (q1_u.length_squared() + (q0_u - p) * q2_u) */
+	/* May divide-by-zero, caller must check for that case. */
+	/* `u - ((q0_u - p) * q1_u) / (q1_u.length_squared() + (q0_u - p) * q2_u)` */
 	isub_vnvn(q0_u, p, dims);
 	return u - dot_vnvn(q0_u, q1_u, dims) /
 	       (len_squared_vn(q1_u, dims) + dot_vnvn(q0_u, q2_u, dims));
@@ -1032,7 +1033,7 @@ static bool fit_cubic_to_points(
 	double error_max_sq;
 	uint split_index;

-	/* Parameterize points, and attempt to fit curve */
+	/* Parameterize points, and attempt to fit curve. */
 	cubic_from_points(
 	        points_offset, points_offset_len,
 #ifdef USE_CIRCULAR_FALLBACK
@@ -1040,7 +1041,7 @@ static bool fit_cubic_to_points(
 #endif
 	        u, tan_l, tan_r, dims, r_cubic);

-	/* Find max deviation of points to fitted curve */
+	/* Find max deviation of points to fitted curve. */
 	error_max_sq = cubic_calc_error(
 	        r_cubic, points_offset, points_offset_len, u, dims,
 	        &split_index);
@@ -1062,7 +1063,7 @@ static bool fit_cubic_to_points(
 		        cubic_test, points_offset, points_offset_len, u, dims,
 		        &split_index);

-		/* intentionally use the newly calculated 'split_index',
+		/* Intentionally use the newly calculated 'split_index',
 		 * even if the 'error_max_sq_test' is worse. */
 		if (error_max_sq > error_max_sq_test) {
 			error_max_sq = error_max_sq_test;
@@ -1071,7 +1072,7 @@ static bool fit_cubic_to_points(
 	}
 #endif

-	/* Test the offset fallback */
+	/* Test the offset fallback. */
 #ifdef USE_OFFSET_FALLBACK
 	if (!(error_max_sq < error_threshold_sq)) {
 		/* Using the offset from the curve to calculate cubic handle length may give better results
@@ -1095,7 +1096,7 @@ static bool fit_cubic_to_points(
 	if (!(error_max_sq < error_threshold_sq)) {
 		cubic_copy(cubic_test, r_cubic, dims);

-		/* If error not too large, try some reparameterization and iteration */
+		/* If error not too large, try some re-parameterization and iteration. */
 		double *u_prime = malloc(sizeof(double) * points_offset_len);
 		for (uint iter = 0; iter < iteration_max; iter++) {
 			if (!cubic_reparameterize(
@@ -1123,7 +1124,7 @@ static bool fit_cubic_to_points(
 			}

 			if (!(error_max_sq < error_threshold_sq)) {
-				/* continue */
+				/* Continue. */
 			}
 			else {
 				assert((error_max_sq < error_threshold_sq));
@@ -1156,7 +1157,7 @@ static void fit_cubic_to_points_recursive(
        const double  error_threshold_sq,
        const uint    calc_flag,
        const uint    dims,
-        /* fill in the list */
+        /* Fill in the list. */
        CubicList *clist)
 {
 	Cubic *cubic = cubic_alloc(dims);
@@ -1180,7 +1181,7 @@ static void fit_cubic_to_points_recursive(
 	cubic_free(cubic);


-	/* Fitting failed -- split at max error point and fit recursively */
+	/* Fitting failed -- split at max error point and fit recursively. */

 	/* Check splinePoint is not an endpoint?
 	 *
@@ -1212,7 +1213,7 @@ static void fit_cubic_to_points_recursive(
 #endif
 		const double *pt   = &points_offset[split_index * dims];

-		/* tan_center = ((pt_a - pt).normalized() + (pt - pt_b).normalized()).normalized() */
+		/* `tan_center = ((pt_a - pt).normalized() + (pt - pt_b).normalized()).normalized()`. */
 		normalize_vn_vnvn(tan_center_a, pt_a, pt, dims);
 		normalize_vn_vnvn(tan_center_b, pt, pt_b, dims);
 		add_vn_vnvn(tan_center, tan_center_a, tan_center_b, dims);
@@ -1306,9 +1307,8 @@ int curve_fit_cubic_to_points_db(
 			const double *pt_l_next = pt_l + dims;
 			const double *pt_r_prev = pt_r - dims;

-			/* tan_l = (pt_l - pt_l_next).normalized()
-			 * tan_r = (pt_r_prev - pt_r).normalized()
-			 */
+			/* `tan_l = (pt_l - pt_l_next).normalized();`
+			 * `tan_r = (pt_r_prev - pt_r).normalized();` */
 			normalize_vn_vnvn(tan_l, pt_l, pt_l_next, dims);
 			normalize_vn_vnvn(tan_r, pt_r_prev, pt_r, dims);

@@ -1362,7 +1362,7 @@ int curve_fit_cubic_to_points_db(
 	*r_cubic_orig_index = NULL;
 #endif

-	/* allocate a contiguous array and free the linked list */
+	/* Allocate a contiguous array and free the linked list. */
 	*r_cubic_array = cubic_list_as_array(
 	        &clist
 #ifdef USE_ORIG_INDEX_DATA
@@ -1454,7 +1454,7 @@ int curve_fit_cubic_to_points_single_db(
 {
 	Cubic *cubic = alloca(cubic_alloc_size(dims));

-	/* in this instance theres no advantage in using length cache,
+	/* In this instance there are no advantage in using length cache,
 	 * since we're not recursively calculating values. */
 #ifdef USE_LENGTH_CACHE
 	double *points_length_cache_alloc = NULL;
--- a/extern/curve_fit_nd/intern/curve_fit_cubic_refit.c
+++ b/extern/curve_fit_nd/intern/curve_fit_cubic_refit.c
@@ -1490,3 +1490,4 @@ int curve_fit_cubic_to_points_refit_fl(

 	return result;
 }
+
--- a/extern/curve_fit_nd/intern/generic_alloc_impl.h
+++ b/extern/curve_fit_nd/intern/generic_alloc_impl.h
@@ -37,7 +37,7 @@
 * - #TPOOL_STRUCT: Name for pool struct name.
 * - #TPOOL_CHUNK_SIZE: Chunk size (optional), use 64kb when not defined.
 *
- * \note #TPOOL_ALLOC_TYPE must be at least ``sizeof(void *)``.
+ * \note #TPOOL_ALLOC_TYPE must be at least `sizeof(void *)`.
 *
 * Defines the API, uses #TPOOL_IMPL_PREFIX to prefix each function.
 *
--- a/extern/curve_fit_nd/intern/generic_heap.c
+++ b/extern/curve_fit_nd/intern/generic_heap.c
@@ -305,5 +305,3 @@ void *HEAP_node_ptr(HeapNode *node)
 {
 	return node->ptr;
 }
-
-/** \} */
--- a/extern/gflags/CMakeLists.txt
+++ b/extern/gflags/CMakeLists.txt
@@ -1,6 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0-or-later
 # Copyright 2016 Blender Foundation. All rights reserved.

+# Too noisy for code we don't maintain.
+if(CMAKE_COMPILER_IS_GNUCC)
+  if(NOT "${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS "8.0")
+    add_cxx_flag("-Wno-cast-function-type")
+  endif()
+endif()
+
 set(INC
  src
  src/gflags
--- a/extern/mantaflow/README.blender
+++ b/extern/mantaflow/README.blender
@@ -2,4 +2,5 @@ Project: Mantaflow
 URL: http://mantaflow.com/
 License: Apache 2.0
 Upstream version: 0.13
-Local modifications: None
+Local modifications:
+* ./patches/local_namespace.diff to support loading MANTA variables into an isolated __main__ name-space.
--- a/extern/mantaflow/helper/pwrapper/registry.cpp
+++ b/extern/mantaflow/helper/pwrapper/registry.cpp
@@ -115,7 +115,7 @@ class WrapperRegistry {
  void construct(const std::string &scriptname, const vector<string> &args);
  void cleanup();
  void renameObjects();
-  void runPreInit();
+  void runPreInit(PyObject *name_space);
  PyObject *initModule();
  ClassData *lookup(const std::string &name);
  bool canConvert(ClassData *from, ClassData *to);
@@ -505,7 +505,7 @@ void WrapperRegistry::addConstants(PyObject *module)
  }
 }

-void WrapperRegistry::runPreInit()
+void WrapperRegistry::runPreInit(PyObject *name_space)
 {
  // add python directories to path
  PyObject *sys_path = PySys_GetObject((char *)"path");
@@ -518,7 +518,15 @@ void WrapperRegistry::runPreInit()
  }
  if (!mCode.empty()) {
    mCode = "from manta import *\n" + mCode;
-    PyRun_SimpleString(mCode.c_str());
+    PyObject *return_value = PyRun_String(mCode.c_str(), Py_file_input, name_space, name_space);
+    if (return_value == nullptr) {
+      if (PyErr_Occurred()) {
+        PyErr_Print();
+      }
+    }
+    else {
+      Py_DECREF(return_value);
+    }
  }
 }

@@ -698,16 +706,23 @@ PyObject *WrapperRegistry::initModule()
 //******************************************************
 // Register members and exposed functions

-void setup(const std::string &filename, const std::vector<std::string> &args)
+void setup(const bool python_lifecycle,
+           const std::string &filename,
+           const std::vector<std::string> &args,
+           PyObject *name_space)
 {
  WrapperRegistry::instance().construct(filename, args);
-  Py_Initialize();
-  WrapperRegistry::instance().runPreInit();
+  if (python_lifecycle) {
+    Py_Initialize();
+  }
+  WrapperRegistry::instance().runPreInit(name_space);
 }

-void finalize()
+void finalize(const bool python_lifecycle)
 {
-  Py_Finalize();
+  if (python_lifecycle) {
+    Py_Finalize();
+  }
  WrapperRegistry::instance().cleanup();
 }

--- a/extern/mantaflow/helper/pwrapper/registry.h
+++ b/extern/mantaflow/helper/pwrapper/registry.h
@@ -48,8 +48,11 @@ template<class T> struct Namify {
 namespace Pb {

 // internal registry access
-void setup(const std::string &filename, const std::vector<std::string> &args);
-void finalize();
+void setup(bool python_lifecycle,
+           const std::string &filename,
+           const std::vector<std::string> &args,
+           PyObject *name_space);
+void finalize(bool python_lifecycle);
 bool canConvert(PyObject *obj, const std::string &to);
 Manta::PbClass *objFromPy(PyObject *obj);
 Manta::PbClass *createPy(const std::string &classname,
--- a/extern/mantaflow/patches/local_namespace.diff
+++ b/extern/mantaflow/patches/local_namespace.diff
@@ -0,0 +1,86 @@
+diff --git a/extern/mantaflow/helper/pwrapper/registry.cpp b/extern/mantaflow/helper/pwrapper/registry.cpp
+index 5196c0409f8..b4206a41dea 100644
+--- a/extern/mantaflow/helper/pwrapper/registry.cpp
+++ b/extern/mantaflow/helper/pwrapper/registry.cpp
+@@ -115,7 +115,7 @@ class WrapperRegistry {
+   void construct(const std::string &scriptname, const vector<string> &args);
+   void cleanup();
+   void renameObjects();
+-  void runPreInit();
+  void runPreInit(PyObject *name_space);
+   PyObject *initModule();
+   ClassData *lookup(const std::string &name);
+   bool canConvert(ClassData *from, ClassData *to);
+@@ -505,7 +505,7 @@ void WrapperRegistry::addConstants(PyObject *module)
+   }
+ }
+ 
+-void WrapperRegistry::runPreInit()
+void WrapperRegistry::runPreInit(PyObject *name_space)
+ {
+   // add python directories to path
+   PyObject *sys_path = PySys_GetObject((char *)"path");
+@@ -518,7 +518,15 @@ void WrapperRegistry::runPreInit()
+   }
+   if (!mCode.empty()) {
+     mCode = "from manta import *\n" + mCode;
+-    PyRun_SimpleString(mCode.c_str());
+    PyObject *return_value = PyRun_String(mCode.c_str(), Py_file_input, name_space, name_space);
+    if (return_value == nullptr) {
+      if (PyErr_Occurred()) {
+        PyErr_Print();
+      }
+    }
+    else {
+      Py_DECREF(return_value);
+    }
+   }
+ }
+ 
+@@ -698,16 +706,23 @@ PyObject *WrapperRegistry::initModule()
+ //******************************************************
+ // Register members and exposed functions
+ 
+-void setup(const std::string &filename, const std::vector<std::string> &args)
+void setup(const bool python_lifecycle,
+           const std::string &filename,
+           const std::vector<std::string> &args,
+           PyObject *name_space)
+ {
+   WrapperRegistry::instance().construct(filename, args);
+-  Py_Initialize();
+-  WrapperRegistry::instance().runPreInit();
+  if (python_lifecycle) {
+    Py_Initialize();
+  }
+  WrapperRegistry::instance().runPreInit(name_space);
+ }
+ 
+-void finalize()
+void finalize(const bool python_lifecycle)
+ {
+-  Py_Finalize();
+  if (python_lifecycle) {
+    Py_Finalize();
+  }
+   WrapperRegistry::instance().cleanup();
+ }
+ 
+diff --git a/extern/mantaflow/helper/pwrapper/registry.h b/extern/mantaflow/helper/pwrapper/registry.h
+index d9d2bbb624b..2273d0b9bb1 100644
+--- a/extern/mantaflow/helper/pwrapper/registry.h
+++ b/extern/mantaflow/helper/pwrapper/registry.h
+@@ -48,8 +48,11 @@ template<class T> struct Namify {
+ namespace Pb {
+ 
+ // internal registry access
+-void setup(const std::string &filename, const std::vector<std::string> &args);
+-void finalize();
+void setup(bool python_lifecycle,
+           const std::string &filename,
+           const std::vector<std::string> &args,
+           PyObject *name_space);
+void finalize(bool python_lifecycle);
+ bool canConvert(PyObject *obj, const std::string &to);
+ Manta::PbClass *objFromPy(PyObject *obj);
+ Manta::PbClass *createPy(const std::string &classname,
--- a/intern/CMakeLists.txt
+++ b/intern/CMakeLists.txt
@@ -67,3 +67,10 @@ endif()
 if(UNIX AND NOT APPLE)
  add_subdirectory(libc_compat)
 endif()
+
+if(UNIX AND NOT APPLE)
+  # Important this comes after "ghost" as it uses includes defined by GHOST's CMake.
+  if(WITH_GHOST_WAYLAND AND WITH_GHOST_WAYLAND_DYNLOAD)
+    add_subdirectory(wayland_dynload)
+  endif()
+endif()
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -36,8 +36,13 @@ if(WITH_CYCLES_NATIVE_ONLY)
  )

  if(NOT MSVC)
-    string(APPEND CMAKE_CXX_FLAGS " -march=native")
-    set(CYCLES_KERNEL_FLAGS "-march=native")
+    ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_march_native "-march=native")
+    if(_has_march_native)
+      set(CYCLES_KERNEL_FLAGS "-march=native")
+    else()
+      set(CYCLES_KERNEL_FLAGS "")
+    endif()
+    unset(_has_march_native)
  else()
    if(NOT MSVC_NATIVE_ARCH_FLAGS)
        TRY_RUN(
@@ -263,6 +268,10 @@ if(WITH_CYCLES_DEVICE_OPTIX)
  endif()
 endif()

+if (WITH_CYCLES_DEVICE_ONEAPI)
+  add_definitions(-DWITH_ONEAPI)
+endif()
+
 if(WITH_CYCLES_EMBREE)
  add_definitions(-DWITH_EMBREE)
  include_directories(
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -128,10 +128,6 @@ if(WITH_OPENIMAGEDENOISE)
  )
 endif()

-if(WITH_EXPERIMENTAL_FEATURES)
-  add_definitions(-DWITH_NEW_CURVES_TYPE)
-endif()
-
 blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")

 add_dependencies(bf_intern_cycles bf_rna)
--- a/intern/cycles/blender/addon/presets.py
+++ b/intern/cycles/blender/addon/presets.py
@@ -84,10 +84,36 @@ class AddPresetViewportSampling(AddPresetBase, Operator):
    preset_subdir = "cycles/viewport_sampling"


+class AddPresetPerformance(AddPresetBase, Operator):
+    '''Add an Performance Preset'''
+    bl_idname = "render.cycles_performance_preset_add"
+    bl_label = "Add Performance Preset"
+    preset_menu = "CYCLES_PT_performance_presets"
+
+    preset_defines = [
+        "render = bpy.context.scene.render"
+        "cycles = bpy.context.scene.cycles"
+    ]
+
+    preset_values = [
+        "render.threads_mode",
+        "render.use_persistent_data",
+        "cycles.debug_use_spatial_splits",
+        "cycles.debug_use_compact_bvh",
+        "cycles.debug_use_hair_bvh",
+        "cycles.debug_bvh_time_steps",
+        "cycles.use_auto_tile",
+        "cycles.tile_size",
+    ]
+
+    preset_subdir = "cycles/performance"
+
+
 classes = (
    AddPresetIntegrator,
    AddPresetSampling,
    AddPresetViewportSampling,
+    AddPresetPerformance,
 )


--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -118,7 +118,8 @@ enum_device_type = (
    ('CUDA', "CUDA", "CUDA", 1),
    ('OPTIX', "OptiX", "OptiX", 3),
    ('HIP', "HIP", "HIP", 4),
-    ('METAL', "Metal", "Metal", 5)
+    ('METAL', "Metal", "Metal", 5),
+    ('ONEAPI', "oneAPI", "oneAPI", 6)
 )

 enum_texture_limit = (
@@ -692,7 +693,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
    debug_use_compact_bvh: BoolProperty(
        name="Use Compact BVH",
        description="Use compact BVH structure (uses less ram but renders slower)",
-        default=True,
+        default=False,
    )
    debug_bvh_time_steps: IntProperty(
        name="BVH Time Steps",
@@ -1397,7 +1398,8 @@ class CyclesPreferences(bpy.types.AddonPreferences):

    def get_device_types(self, context):
        import _cycles
-        has_cuda, has_optix, has_hip, has_metal = _cycles.get_device_types()
+        has_cuda, has_optix, has_hip, has_metal, has_oneapi = _cycles.get_device_types()
+
        list = [('NONE', "None", "Don't use compute device", 0)]
        if has_cuda:
            list.append(('CUDA', "CUDA", "Use CUDA for GPU acceleration", 1))
@@ -1407,6 +1409,8 @@ class CyclesPreferences(bpy.types.AddonPreferences):
            list.append(('HIP', "HIP", "Use HIP for GPU acceleration", 4))
        if has_metal:
            list.append(('METAL', "Metal", "Use Metal for GPU acceleration", 5))
+        if has_oneapi:
+            list.append(('ONEAPI', "oneAPI", "Use oneAPI for GPU acceleration", 6))

        return list

@@ -1438,7 +1442,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):

    def update_device_entries(self, device_list):
        for device in device_list:
-            if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP', 'METAL'}:
+            if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP', 'METAL', 'ONEAPI'}:
                continue
            # Try to find existing Device entry
            entry = self.find_existing_device_entry(device)
@@ -1482,7 +1486,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
        import _cycles
        # Ensure `self.devices` is not re-allocated when the second call to
        # get_devices_for_type is made, freeing items from the first list.
-        for device_type in ('CUDA', 'OPTIX', 'HIP', 'METAL'):
+        for device_type in ('CUDA', 'OPTIX', 'HIP', 'METAL', 'ONEAPI'):
            self.update_device_entries(_cycles.available_devices(device_type))

    # Deprecated: use refresh_devices instead.
@@ -1545,18 +1549,31 @@ class CyclesPreferences(bpy.types.AddonPreferences):
            elif device_type == 'HIP':
                import sys
                if sys.platform[:3] == "win":
-                    col.label(text="Requires discrete AMD GPU with RDNA architecture", icon='BLANK1')
+                    col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
                    col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
                elif sys.platform.startswith("linux"):
-                    col.label(text="Requires discrete AMD GPU with RDNA architecture", icon='BLANK1')
+                    col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
                    col.label(text="and AMD driver version 22.10 or newer", icon='BLANK1')
+            elif device_type == 'ONEAPI':
+                import sys
+                col.label(text="Requires Intel GPU with Xe-HPG architecture", icon='BLANK1')
+                if sys.platform.startswith("win"):
+                    col.label(text="and Windows driver version 101.1660 or newer", icon='BLANK1')
+                elif sys.platform.startswith("linux"):
+                    col.label(text="and Linux driver version xx.xx.23570 or newer", icon='BLANK1')
            elif device_type == 'METAL':
                col.label(text="Requires Apple Silicon with macOS 12.2 or newer", icon='BLANK1')
                col.label(text="or AMD with macOS 12.3 or newer", icon='BLANK1')
            return

        for device in devices:
-            box.prop(device, "use", text=device.name)
+            import unicodedata
+            box.prop(
+                device, "use", text=device.name
+                .replace('(TM)', unicodedata.lookup('TRADE MARK SIGN'))
+                .replace('(R)', unicodedata.lookup('REGISTERED SIGN'))
+                .replace('(C)', unicodedata.lookup('COPYRIGHT SIGN'))
+            )

    def draw_impl(self, layout, context):
        row = layout.row()
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -43,6 +43,12 @@ class CYCLES_PT_integrator_presets(CyclesPresetPanel):
    preset_add_operator = "render.cycles_integrator_preset_add"


+class CYCLES_PT_performance_presets(CyclesPresetPanel):
+    bl_label = "Performance Presets"
+    preset_subdir = "cycles/performance"
+    preset_add_operator = "render.cycles_performance_preset_add"
+
+
 class CyclesButtonsPanel:
    bl_space_type = "PROPERTIES"
    bl_region_type = "WINDOW"
@@ -111,6 +117,12 @@ def use_optix(context):
    return (get_device_type(context) == 'OPTIX' and cscene.device == 'GPU')


+def use_oneapi(context):
+    cscene = context.scene.cycles
+
+    return (get_device_type(context) == 'ONEAPI' and cscene.device == 'GPU')
+
+
 def use_multi_device(context):
    cscene = context.scene.cycles
    if cscene.device != 'GPU':
@@ -618,6 +630,9 @@ class CYCLES_RENDER_PT_performance(CyclesButtonsPanel, Panel):
    bl_label = "Performance"
    bl_options = {'DEFAULT_CLOSED'}

+    def draw_header_preset(self, context):
+        CYCLES_PT_performance_presets.draw_panel_header(self.layout)
+
    def draw(self, context):
        pass

@@ -937,6 +952,8 @@ class CYCLES_CAMERA_PT_dof(CyclesButtonsPanel, Panel):

        col = split.column()
        col.prop(dof, "focus_object", text="Focus Object")
+        if dof.focus_object and dof.focus_object.type == 'ARMATURE':
+            col.prop_search(dof, "focus_subtarget", dof.focus_object.data, "bones", text="Focus Bone")

        sub = col.row()
        sub.active = dof.focus_object is None
@@ -1196,7 +1213,7 @@ class CYCLES_OBJECT_PT_lightgroup(CyclesButtonsPanel, Panel):
        sub.prop_search(ob, "lightgroup", view_layer, "lightgroups", text="Light Group", results_are_suggestions=True)

        sub = row.column(align=True)
-        sub.active = bool(ob.lightgroup) and not any(lg.name == ob.lightgroup for lg in view_layer.lightgroups)
+        sub.enabled = bool(ob.lightgroup) and not any(lg.name == ob.lightgroup for lg in view_layer.lightgroups)
        sub.operator("scene.view_layer_add_lightgroup", icon='ADD', text="").name = ob.lightgroup


@@ -1634,7 +1651,7 @@ class CYCLES_WORLD_PT_settings_light_group(CyclesButtonsPanel, Panel):
        )

        sub = row.column(align=True)
-        sub.active = bool(world.lightgroup) and not any(lg.name == world.lightgroup for lg in view_layer.lightgroups)
+        sub.enabled = bool(world.lightgroup) and not any(lg.name == world.lightgroup for lg in view_layer.lightgroups)
        sub.operator("scene.view_layer_add_lightgroup", icon='ADD', text="").name = world.lightgroup


@@ -2263,6 +2280,7 @@ classes = (
    CYCLES_PT_sampling_presets,
    CYCLES_PT_viewport_sampling_presets,
    CYCLES_PT_integrator_presets,
+    CYCLES_PT_performance_presets,
    CYCLES_RENDER_PT_sampling,
    CYCLES_RENDER_PT_sampling_viewport,
    CYCLES_RENDER_PT_sampling_viewport_denoise,
--- a/intern/cycles/blender/camera.cpp
+++ b/intern/cycles/blender/camera.cpp
@@ -143,11 +143,20 @@ static float blender_camera_focal_distance(BL::RenderEngine &b_engine,
  if (!b_dof_object)
    return b_camera.dof().focus_distance();

+  Transform dofmat = get_transform(b_dof_object.matrix_world());
+
+  string focus_subtarget = b_camera.dof().focus_subtarget();
+  if (b_dof_object.pose() && !focus_subtarget.empty()) {
+    BL::PoseBone b_bone = b_dof_object.pose().bones[focus_subtarget];
+    if (b_bone) {
+      dofmat = dofmat * get_transform(b_bone.matrix());
+    }
+  }
+
  /* for dof object, return distance along camera Z direction */
  BL::Array<float, 16> b_ob_matrix;
  b_engine.camera_model_matrix(b_ob, bcam->use_spherical_stereo, b_ob_matrix);
  Transform obmat = transform_clear_scale(get_transform(b_ob_matrix));
-  Transform dofmat = get_transform(b_dof_object.matrix_world());
  float3 view_dir = normalize(transform_get_column(&obmat, 2));
  float3 dof_dir = transform_get_column(&obmat, 3) - transform_get_column(&dofmat, 3);
  return fabsf(dot(view_dir, dof_dir));
--- a/intern/cycles/blender/curves.cpp
+++ b/intern/cycles/blender/curves.cpp
@@ -55,7 +55,7 @@ static bool ObtainCacheParticleData(
    return false;

  Transform tfm = get_transform(b_ob->matrix_world());
-  Transform itfm = transform_quick_inverse(tfm);
+  Transform itfm = transform_inverse(tfm);

  for (BL::Modifier &b_mod : b_ob->modifiers) {
    if ((b_mod.type() == b_mod.type_PARTICLE_SYSTEM) &&
@@ -613,8 +613,6 @@ void BlenderSync::sync_particle_hair(
  }
 }

-#ifdef WITH_NEW_CURVES_TYPE
-
 static std::optional<BL::FloatAttribute> find_curves_radius_attribute(BL::Curves b_curves)
 {
  for (BL::Attribute &b_attribute : b_curves.attributes) {
@@ -632,6 +630,25 @@ static std::optional<BL::FloatAttribute> find_curves_radius_attribute(BL::Curves
  return std::nullopt;
 }

+static BL::FloatVectorAttribute find_curves_position_attribute(BL::Curves b_curves)
+{
+  for (BL::Attribute &b_attribute : b_curves.attributes) {
+    if (b_attribute.name() != "position") {
+      continue;
+    }
+    if (b_attribute.domain() != BL::Attribute::domain_POINT) {
+      continue;
+    }
+    if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT_VECTOR) {
+      continue;
+    }
+    return BL::FloatVectorAttribute{b_attribute};
+  }
+  /* The position attribute must exist. */
+  assert(false);
+  return BL::FloatVectorAttribute{b_curves.attributes[0]};
+}
+
 template<typename TypeInCycles, typename GetValueAtIndex>
 static void fill_generic_attribute(BL::Curves &b_curves,
                                   TypeInCycles *data,
@@ -795,16 +812,16 @@ static void attr_create_generic(Scene *scene,
  }
 }

-static float4 hair_point_as_float4(BL::Curves b_curves,
+static float4 hair_point_as_float4(BL::FloatVectorAttribute b_attr_position,
                                   std::optional<BL::FloatAttribute> b_attr_radius,
                                   const int index)
 {
-  float4 mP = float3_to_float4(get_float3(b_curves.position_data[index].vector()));
+  float4 mP = float3_to_float4(get_float3(b_attr_position.data[index].vector()));
  mP.w = b_attr_radius ? b_attr_radius->data[index].value() : 0.0f;
  return mP;
 }

-static float4 interpolate_hair_points(BL::Curves b_curves,
+static float4 interpolate_hair_points(BL::FloatVectorAttribute b_attr_position,
                                      std::optional<BL::FloatAttribute> b_attr_radius,
                                      const int first_point_index,
                                      const int num_points,
@@ -814,8 +831,8 @@ static float4 interpolate_hair_points(BL::Curves b_curves,
  const int point_a = clamp((int)curve_t, 0, num_points - 1);
  const int point_b = min(point_a + 1, num_points - 1);
  const float t = curve_t - (float)point_a;
-  return lerp(hair_point_as_float4(b_curves, b_attr_radius, first_point_index + point_a),
-              hair_point_as_float4(b_curves, b_attr_radius, first_point_index + point_b),
+  return lerp(hair_point_as_float4(b_attr_position, b_attr_radius, first_point_index + point_a),
+              hair_point_as_float4(b_attr_position, b_attr_radius, first_point_index + point_b),
              t);
 }

@@ -848,6 +865,7 @@ static void export_hair_curves(Scene *scene,

  hair->reserve_curves(num_curves, num_keys);

+  BL::FloatVectorAttribute b_attr_position = find_curves_position_attribute(b_curves);
  std::optional<BL::FloatAttribute> b_attr_radius = find_curves_radius_attribute(b_curves);

  /* Export curves and points. */
@@ -866,9 +884,9 @@ static void export_hair_curves(Scene *scene,

    /* Position and radius. */
    for (int i = 0; i < num_points; i++) {
-      const float3 co = get_float3(b_curves.position_data[first_point_index + i].vector());
+      const float3 co = get_float3(b_attr_position.data[first_point_index + i].vector());
      const float radius = b_attr_radius ? b_attr_radius->data[first_point_index + i].value() :
-                                           0.0f;
+                                           0.005f;
      hair->add_curve_key(co, radius);

      if (attr_intercept) {
@@ -923,6 +941,7 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
  int num_motion_keys = 0;
  int curve_index = 0;

+  BL::FloatVectorAttribute b_attr_position = find_curves_position_attribute(b_curves);
  std::optional<BL::FloatAttribute> b_attr_radius = find_curves_radius_attribute(b_curves);

  for (int i = 0; i < num_curves; i++) {
@@ -938,7 +957,7 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
        int point_index = first_point_index + i;

        if (point_index < num_keys) {
-          mP[num_motion_keys] = hair_point_as_float4(b_curves, b_attr_radius, point_index);
+          mP[num_motion_keys] = hair_point_as_float4(b_attr_position, b_attr_radius, point_index);
          num_motion_keys++;

          if (!have_motion) {
@@ -958,7 +977,7 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
      for (int i = 0; i < curve.num_keys; i++) {
        const float step = i * step_size;
        mP[num_motion_keys] = interpolate_hair_points(
-            b_curves, b_attr_radius, first_point_index, num_points, step);
+            b_attr_position, b_attr_radius, first_point_index, num_points, step);
        num_motion_keys++;
      }
      have_motion = true;
@@ -990,15 +1009,6 @@ void BlenderSync::sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int
    export_hair_curves(scene, hair, b_curves, need_motion, motion_scale);
  }
 }
-#else
-void BlenderSync::sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int motion_step)
-{
-  (void)hair;
-  (void)b_ob_info;
-  (void)motion;
-  (void)motion_step;
-}
-#endif

 void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, Hair *hair)
 {
@@ -1010,14 +1020,11 @@ void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, H
  new_hair.set_used_shaders(used_shaders);

  if (view_layer.use_hair) {
-#ifdef WITH_NEW_CURVES_TYPE
    if (b_ob_info.object_data.is_a(&RNA_Curves)) {
      /* Hair object. */
      sync_hair(&new_hair, b_ob_info, false);
    }
-    else
-#endif
-    {
+    else {
      /* Particle hair. */
      bool need_undeformed = new_hair.need_attribute(scene, ATTR_STD_GENERATED);
      BL::Mesh b_mesh = object_to_mesh(
@@ -1064,15 +1071,12 @@ void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph,

  /* Export deformed coordinates. */
  if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
-#ifdef WITH_NEW_CURVES_TYPE
    if (b_ob_info.object_data.is_a(&RNA_Curves)) {
      /* Hair object. */
      sync_hair(hair, b_ob_info, true, motion_step);
      return;
    }
-    else
-#endif
-    {
+    else {
      /* Particle hair. */
      BL::Mesh b_mesh = object_to_mesh(
          b_data, b_ob_info, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
--- a/intern/cycles/blender/device.cpp
+++ b/intern/cycles/blender/device.cpp
@@ -15,6 +15,7 @@ enum ComputeDevice {
  COMPUTE_DEVICE_OPTIX = 3,
  COMPUTE_DEVICE_HIP = 4,
  COMPUTE_DEVICE_METAL = 5,
+  COMPUTE_DEVICE_ONEAPI = 6,

  COMPUTE_DEVICE_NUM
 };
@@ -76,6 +77,9 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
      else if (compute_device == COMPUTE_DEVICE_METAL) {
        mask |= DEVICE_MASK_METAL;
      }
+      else if (compute_device == COMPUTE_DEVICE_ONEAPI) {
+        mask |= DEVICE_MASK_ONEAPI;
+      }
      vector<DeviceInfo> devices = Device::available_devices(mask);

      /* Match device preferences and available devices. */
--- a/intern/cycles/blender/geometry.cpp
+++ b/intern/cycles/blender/geometry.cpp
@@ -18,11 +18,7 @@ CCL_NAMESPACE_BEGIN

 static Geometry::Type determine_geom_type(BObjectInfo &b_ob_info, bool use_particle_hair)
 {
-#ifdef WITH_NEW_CURVES_TYPE
  if (b_ob_info.object_data.is_a(&RNA_Curves) || use_particle_hair) {
-#else
-  if (use_particle_hair) {
-#endif
    return Geometry::HAIR;
  }

@@ -217,11 +213,7 @@ void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
    if (progress.get_cancel())
      return;

-#ifdef WITH_NEW_CURVES_TYPE
    if (b_ob_info.object_data.is_a(&RNA_Curves) || use_particle_hair) {
-#else
-    if (use_particle_hair) {
-#endif
      Hair *hair = static_cast<Hair *>(geom);
      sync_hair_motion(b_depsgraph, b_ob_info, hair, motion_step);
    }
--- a/intern/cycles/blender/pointcloud.cpp
+++ b/intern/cycles/blender/pointcloud.cpp
@@ -1,8 +1,10 @@
 /* SPDX-License-Identifier: Apache-2.0
 * Copyright 2011-2022 Blender Foundation */

-#include "scene/pointcloud.h"
+#include <optional>
+
 #include "scene/attribute.h"
+#include "scene/pointcloud.h"
 #include "scene/scene.h"

 #include "blender/sync.h"
@@ -138,6 +140,36 @@ static void copy_attributes(PointCloud *pointcloud,
  }
 }

+static std::optional<BL::FloatAttribute> find_radius_attribute(BL::PointCloud b_pointcloud)
+{
+  for (BL::Attribute &b_attribute : b_pointcloud.attributes) {
+    if (b_attribute.name() != "radius") {
+      continue;
+    }
+    if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT) {
+      continue;
+    }
+    return BL::FloatAttribute{b_attribute};
+  }
+  return std::nullopt;
+}
+
+static BL::FloatVectorAttribute find_position_attribute(BL::PointCloud b_pointcloud)
+{
+  for (BL::Attribute &b_attribute : b_pointcloud.attributes) {
+    if (b_attribute.name() != "position") {
+      continue;
+    }
+    if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT_VECTOR) {
+      continue;
+    }
+    return BL::FloatVectorAttribute{b_attribute};
+  }
+  /* The position attribute must exist. */
+  assert(false);
+  return BL::FloatVectorAttribute{b_pointcloud.attributes[0]};
+}
+
 static void export_pointcloud(Scene *scene,
                              PointCloud *pointcloud,
                              BL::PointCloud b_pointcloud,
@@ -156,18 +188,18 @@ static void export_pointcloud(Scene *scene,
  const int num_points = b_pointcloud.points.length();
  pointcloud->reserve(num_points);

+  BL::FloatVectorAttribute b_attr_position = find_position_attribute(b_pointcloud);
+  std::optional<BL::FloatAttribute> b_attr_radius = find_radius_attribute(b_pointcloud);
+
  /* Export points. */
-  BL::PointCloud::points_iterator b_point_iter;
-  for (b_pointcloud.points.begin(b_point_iter); b_point_iter != b_pointcloud.points.end();
-       ++b_point_iter) {
-    BL::Point b_point = *b_point_iter;
-    const float3 co = get_float3(b_point.co());
-    const float radius = b_point.radius();
+  for (int i = 0; i < num_points; i++) {
+    const float3 co = get_float3(b_attr_position.data[i].vector());
+    const float radius = b_attr_radius ? b_attr_radius->data[i].value() : 0.0f;
    pointcloud->add_point(co, radius);

    /* Random number per point. */
    if (attr_random != NULL) {
-      attr_random->add(hash_uint2_to_float(b_point.index(), 0));
+      attr_random->add(hash_uint2_to_float(i, 0));
    }
  }

@@ -195,14 +227,15 @@ static void export_pointcloud_motion(PointCloud *pointcloud,
  int num_motion_points = 0;
  const array<float3> &pointcloud_points = pointcloud->get_points();

-  BL::PointCloud::points_iterator b_point_iter;
-  for (b_pointcloud.points.begin(b_point_iter); b_point_iter != b_pointcloud.points.end();
-       ++b_point_iter) {
-    BL::Point b_point = *b_point_iter;
+  BL::FloatVectorAttribute b_attr_position = find_position_attribute(b_pointcloud);
+  std::optional<BL::FloatAttribute> b_attr_radius = find_radius_attribute(b_pointcloud);

+  for (int i = 0; i < num_points; i++) {
    if (num_motion_points < num_points) {
-      float3 P = get_float3(b_point.co());
-      P.w = b_point.radius();
+      const float3 co = get_float3(b_attr_position.data[i].vector());
+      const float radius = b_attr_radius ? b_attr_radius->data[i].value() : 0.0f;
+      float3 P = co;
+      P.w = radius;
      mP[num_motion_points] = P;
      have_motion = have_motion || (P != pointcloud_points[num_motion_points]);
      num_motion_points++;
--- a/intern/cycles/blender/python.cpp
+++ b/intern/cycles/blender/python.cpp
@@ -871,18 +871,20 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
 static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
 {
  vector<DeviceType> device_types = Device::available_types();
-  bool has_cuda = false, has_optix = false, has_hip = false, has_metal = false;
+  bool has_cuda = false, has_optix = false, has_hip = false, has_metal = false, has_oneapi = false;
  foreach (DeviceType device_type, device_types) {
    has_cuda |= (device_type == DEVICE_CUDA);
    has_optix |= (device_type == DEVICE_OPTIX);
    has_hip |= (device_type == DEVICE_HIP);
    has_metal |= (device_type == DEVICE_METAL);
+    has_oneapi |= (device_type == DEVICE_ONEAPI);
  }
-  PyObject *list = PyTuple_New(4);
+  PyObject *list = PyTuple_New(5);
  PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
  PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
  PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
  PyTuple_SET_ITEM(list, 3, PyBool_FromLong(has_metal));
+  PyTuple_SET_ITEM(list, 4, PyBool_FromLong(has_oneapi));
  return list;
 }

@@ -914,6 +916,9 @@ static PyObject *set_device_override_func(PyObject * /*self*/, PyObject *arg)
  else if (override == "METAL") {
    BlenderSession::device_override = DEVICE_MASK_METAL;
  }
+  else if (override == "ONEAPI") {
+    BlenderSession::device_override = DEVICE_MASK_ONEAPI;
+  }
  else {
    printf("\nError: %s is not a valid Cycles device.\n", override.c_str());
    Py_RETURN_FALSE;
--- a/intern/cycles/blender/shader.cpp
+++ b/intern/cycles/blender/shader.cpp
@@ -928,8 +928,22 @@ static ShaderNode *add_node(Scene *scene,
    sky->set_sun_disc(b_sky_node.sun_disc());
    sky->set_sun_size(b_sky_node.sun_size());
    sky->set_sun_intensity(b_sky_node.sun_intensity());
-    sky->set_sun_elevation(b_sky_node.sun_elevation());
-    sky->set_sun_rotation(b_sky_node.sun_rotation());
+    /* Patch sun position to be able to animate daylight cycle while keeping the shading code
+     * simple. */
+    float sun_rotation = b_sky_node.sun_rotation();
+    /* Wrap into [-2PI..2PI] range. */
+    float sun_elevation = fmodf(b_sky_node.sun_elevation(), M_2PI_F);
+    /* Wrap into [-PI..PI] range. */
+    if (fabsf(sun_elevation) >= M_PI_F) {
+      sun_elevation -= copysignf(2.0f, sun_elevation) * M_PI_F;
+    }
+    /* Wrap into [-PI/2..PI/2] range while keeping the same absolute position. */
+    if (sun_elevation >= M_PI_2_F || sun_elevation <= -M_PI_2_F) {
+      sun_elevation = copysignf(M_PI_F, sun_elevation) - sun_elevation;
+      sun_rotation += M_PI_F;
+    }
+    sky->set_sun_elevation(sun_elevation);
+    sky->set_sun_rotation(sun_rotation);
    sky->set_altitude(b_sky_node.altitude());
    sky->set_air_density(b_sky_node.air_density());
    sky->set_dust_density(b_sky_node.dust_density());
--- a/intern/cycles/blender/sync.h
+++ b/intern/cycles/blender/sync.h
@@ -7,6 +7,7 @@
 #include "MEM_guardedalloc.h"
 #include "RNA_access.h"
 #include "RNA_blender_cpp.h"
+#include "RNA_path.h"
 #include "RNA_types.h"

 #include "blender/id_map.h"
--- a/intern/cycles/bvh/embree.cpp
+++ b/intern/cycles/bvh/embree.cpp
@@ -21,13 +21,9 @@

 #  include "bvh/embree.h"

-/* Kernel includes are necessary so that the filter function for Embree can access the packed BVH.
- */
-#  include "kernel/bvh/embree.h"
-#  include "kernel/bvh/util.h"
+#  include "kernel/device/cpu/bvh.h"
 #  include "kernel/device/cpu/compat.h"
 #  include "kernel/device/cpu/globals.h"
-#  include "kernel/sample/lcg.h"

 #  include "scene/hair.h"
 #  include "scene/mesh.h"
@@ -46,265 +42,6 @@ static_assert(Object::MAX_MOTION_STEPS <= RTC_MAX_TIME_STEP_COUNT,
 static_assert(Object::MAX_MOTION_STEPS == Geometry::MAX_MOTION_STEPS,
              "Object and Geometry max motion steps inconsistent");

-#  define IS_HAIR(x) (x & 1)
-
-/* This gets called by Embree at every valid ray/object intersection.
- * Things like recording subsurface or shadow hits for later evaluation
- * as well as filtering for volume objects happen here.
- * Cycles' own BVH does that directly inside the traversal calls.
- */
-static void rtc_filter_intersection_func(const RTCFilterFunctionNArguments *args)
-{
-  /* Current implementation in Cycles assumes only single-ray intersection queries. */
-  assert(args->N == 1);
-
-  RTCHit *hit = (RTCHit *)args->hit;
-  CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
-  const KernelGlobalsCPU *kg = ctx->kg;
-  const Ray *cray = ctx->ray;
-
-  if (kernel_embree_is_self_intersection(kg, hit, cray)) {
-    *args->valid = 0;
-  }
-}
-
-/* This gets called by Embree at every valid ray/object intersection.
- * Things like recording subsurface or shadow hits for later evaluation
- * as well as filtering for volume objects happen here.
- * Cycles' own BVH does that directly inside the traversal calls.
- */
-static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
-{
-  /* Current implementation in Cycles assumes only single-ray intersection queries. */
-  assert(args->N == 1);
-
-  const RTCRay *ray = (RTCRay *)args->ray;
-  RTCHit *hit = (RTCHit *)args->hit;
-  CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
-  const KernelGlobalsCPU *kg = ctx->kg;
-  const Ray *cray = ctx->ray;
-
-  switch (ctx->type) {
-    case CCLIntersectContext::RAY_SHADOW_ALL: {
-      Intersection current_isect;
-      kernel_embree_convert_hit(kg, ray, hit, &current_isect);
-      if (intersection_skip_self_shadow(cray->self, current_isect.object, current_isect.prim)) {
-        *args->valid = 0;
-        return;
-      }
-      /* If no transparent shadows or max number of hits exceeded, all light is blocked. */
-      const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type);
-      if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->num_hits >= ctx->max_hits) {
-        ctx->opaque_hit = true;
-        return;
-      }
-
-      ++ctx->num_hits;
-
-      /* Always use baked shadow transparency for curves. */
-      if (current_isect.type & PRIMITIVE_CURVE) {
-        ctx->throughput *= intersection_curve_shadow_transparency(
-            kg, current_isect.object, current_isect.prim, current_isect.u);
-
-        if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
-          ctx->opaque_hit = true;
-          return;
-        }
-        else {
-          *args->valid = 0;
-          return;
-        }
-      }
-
-      /* Test if we need to record this transparent intersection. */
-      const uint max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
-      if (ctx->num_recorded_hits < max_record_hits || ray->tfar < ctx->max_t) {
-        /* If maximum number of hits was reached, replace the intersection with the
-         * highest distance. We want to find the N closest intersections. */
-        const uint num_recorded_hits = min(ctx->num_recorded_hits, max_record_hits);
-        uint isect_index = num_recorded_hits;
-        if (num_recorded_hits + 1 >= max_record_hits) {
-          float max_t = ctx->isect_s[0].t;
-          uint max_recorded_hit = 0;
-
-          for (uint i = 1; i < num_recorded_hits; ++i) {
-            if (ctx->isect_s[i].t > max_t) {
-              max_recorded_hit = i;
-              max_t = ctx->isect_s[i].t;
-            }
-          }
-
-          if (num_recorded_hits >= max_record_hits) {
-            isect_index = max_recorded_hit;
-          }
-
-          /* Limit the ray distance and stop counting hits beyond this.
-           * TODO: is there some way we can tell Embree to stop intersecting beyond
-           * this distance when max number of hits is reached?. Or maybe it will
-           * become irrelevant if we make max_hits a very high number on the CPU. */
-          ctx->max_t = max(current_isect.t, max_t);
-        }
-
-        ctx->isect_s[isect_index] = current_isect;
-      }
-
-      /* Always increase the number of recorded hits, even beyond the maximum,
-       * so that we can detect this and trace another ray if needed. */
-      ++ctx->num_recorded_hits;
-
-      /* This tells Embree to continue tracing. */
-      *args->valid = 0;
-      break;
-    }
-    case CCLIntersectContext::RAY_LOCAL:
-    case CCLIntersectContext::RAY_SSS: {
-      /* Check if it's hitting the correct object. */
-      Intersection current_isect;
-      if (ctx->type == CCLIntersectContext::RAY_SSS) {
-        kernel_embree_convert_sss_hit(kg, ray, hit, &current_isect, ctx->local_object_id);
-      }
-      else {
-        kernel_embree_convert_hit(kg, ray, hit, &current_isect);
-        if (ctx->local_object_id != current_isect.object) {
-          /* This tells Embree to continue tracing. */
-          *args->valid = 0;
-          break;
-        }
-      }
-      if (intersection_skip_self_local(cray->self, current_isect.prim)) {
-        *args->valid = 0;
-        return;
-      }
-
-      /* No intersection information requested, just return a hit. */
-      if (ctx->max_hits == 0) {
-        break;
-      }
-
-      /* Ignore curves. */
-      if (IS_HAIR(hit->geomID)) {
-        /* This tells Embree to continue tracing. */
-        *args->valid = 0;
-        break;
-      }
-
-      LocalIntersection *local_isect = ctx->local_isect;
-      int hit_idx = 0;
-
-      if (ctx->lcg_state) {
-        /* See triangle_intersect_subsurface() for the native equivalent. */
-        for (int i = min((int)ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
-          if (local_isect->hits[i].t == ray->tfar) {
-            /* This tells Embree to continue tracing. */
-            *args->valid = 0;
-            return;
-          }
-        }
-
-        local_isect->num_hits++;
-
-        if (local_isect->num_hits <= ctx->max_hits) {
-          hit_idx = local_isect->num_hits - 1;
-        }
-        else {
-          /* reservoir sampling: if we are at the maximum number of
-           * hits, randomly replace element or skip it */
-          hit_idx = lcg_step_uint(ctx->lcg_state) % local_isect->num_hits;
-
-          if (hit_idx >= ctx->max_hits) {
-            /* This tells Embree to continue tracing. */
-            *args->valid = 0;
-            return;
-          }
-        }
-      }
-      else {
-        /* Record closest intersection only. */
-        if (local_isect->num_hits && current_isect.t > local_isect->hits[0].t) {
-          *args->valid = 0;
-          return;
-        }
-
-        local_isect->num_hits = 1;
-      }
-
-      /* record intersection */
-      local_isect->hits[hit_idx] = current_isect;
-      local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z));
-      /* This tells Embree to continue tracing. */
-      *args->valid = 0;
-      break;
-    }
-    case CCLIntersectContext::RAY_VOLUME_ALL: {
-      /* Append the intersection to the end of the array. */
-      if (ctx->num_hits < ctx->max_hits) {
-        Intersection current_isect;
-        kernel_embree_convert_hit(kg, ray, hit, &current_isect);
-        if (intersection_skip_self(cray->self, current_isect.object, current_isect.prim)) {
-          *args->valid = 0;
-          return;
-        }
-
-        Intersection *isect = &ctx->isect_s[ctx->num_hits];
-        ++ctx->num_hits;
-        *isect = current_isect;
-        /* Only primitives from volume object. */
-        uint tri_object = isect->object;
-        int object_flag = kernel_data_fetch(object_flag, tri_object);
-        if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-          --ctx->num_hits;
-        }
-        /* This tells Embree to continue tracing. */
-        *args->valid = 0;
-      }
-      break;
-    }
-    case CCLIntersectContext::RAY_REGULAR:
-    default:
-      if (kernel_embree_is_self_intersection(kg, hit, cray)) {
-        *args->valid = 0;
-        return;
-      }
-      break;
-  }
-}
-
-static void rtc_filter_func_backface_cull(const RTCFilterFunctionNArguments *args)
-{
-  const RTCRay *ray = (RTCRay *)args->ray;
-  RTCHit *hit = (RTCHit *)args->hit;
-
-  /* Always ignore back-facing intersections. */
-  if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
-          make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
-    *args->valid = 0;
-    return;
-  }
-
-  CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
-  const KernelGlobalsCPU *kg = ctx->kg;
-  const Ray *cray = ctx->ray;
-
-  if (kernel_embree_is_self_intersection(kg, hit, cray)) {
-    *args->valid = 0;
-  }
-}
-
-static void rtc_filter_occluded_func_backface_cull(const RTCFilterFunctionNArguments *args)
-{
-  const RTCRay *ray = (RTCRay *)args->ray;
-  RTCHit *hit = (RTCHit *)args->hit;
-
-  /* Always ignore back-facing intersections. */
-  if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
-          make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
-    *args->valid = 0;
-    return;
-  }
-
-  rtc_filter_occluded_func(args);
-}
-
 static size_t unaccounted_mem = 0;

 static bool rtc_memory_monitor_func(void *userPtr, const ssize_t bytes, const bool)
@@ -535,8 +272,8 @@ void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
  set_tri_vertex_buffer(geom_id, mesh, false);

  rtcSetGeometryUserData(geom_id, (void *)prim_offset);
-  rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
-  rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_intersection_func);
+  rtcSetGeometryOccludedFilterFunction(geom_id, kernel_embree_filter_occluded_func);
+  rtcSetGeometryIntersectFilterFunction(geom_id, kernel_embree_filter_intersection_func);
  rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());

  rtcCommitGeometry(geom_id);
@@ -739,8 +476,8 @@ void BVHEmbree::add_points(const Object *ob, const PointCloud *pointcloud, int i
  set_point_vertex_buffer(geom_id, pointcloud, false);

  rtcSetGeometryUserData(geom_id, (void *)prim_offset);
-  rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_backface_cull);
-  rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_backface_cull);
+  rtcSetGeometryIntersectFilterFunction(geom_id, kernel_embree_filter_func_backface_cull);
+  rtcSetGeometryOccludedFilterFunction(geom_id, kernel_embree_filter_occluded_func_backface_cull);
  rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());

  rtcCommitGeometry(geom_id);
@@ -799,12 +536,13 @@ void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)

  rtcSetGeometryUserData(geom_id, (void *)prim_offset);
  if (hair->curve_shape == CURVE_RIBBON) {
-    rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_intersection_func);
-    rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
+    rtcSetGeometryIntersectFilterFunction(geom_id, kernel_embree_filter_intersection_func);
+    rtcSetGeometryOccludedFilterFunction(geom_id, kernel_embree_filter_occluded_func);
  }
  else {
-    rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_backface_cull);
-    rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_backface_cull);
+    rtcSetGeometryIntersectFilterFunction(geom_id, kernel_embree_filter_func_backface_cull);
+    rtcSetGeometryOccludedFilterFunction(geom_id,
+                                         kernel_embree_filter_occluded_func_backface_cull);
  }
  rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());

--- a/intern/cycles/bvh/params.h
+++ b/intern/cycles/bvh/params.h
@@ -129,7 +129,7 @@ class BVHParams {

    top_level = false;
    bvh_layout = BVH_LAYOUT_BVH2;
-    use_compact_structure = true;
+    use_compact_structure = false;
    use_unaligned_nodes = false;

    num_motion_curve_steps = 0;
--- a/intern/cycles/cmake/external_libs.cmake
+++ b/intern/cycles/cmake/external_libs.cmake
@@ -91,6 +91,8 @@ if(CYCLES_STANDALONE_REPOSITORY)
    _set_default(USD_ROOT_DIR "${_cycles_lib_dir}/usd")
    _set_default(WEBP_ROOT_DIR "${_cycles_lib_dir}/webp")
    _set_default(ZLIB_ROOT "${_cycles_lib_dir}/zlib")
+    _set_default(LEVEL_ZERO_ROOT_DIR "${_cycles_lib_dir}/level-zero")
+    _set_default(SYCL_ROOT_DIR "${_cycles_lib_dir}/dpcpp")

    # Ignore system libraries
    set(CMAKE_IGNORE_PATH "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES};${CMAKE_SYSTEM_INCLUDE_PATH};${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES};${CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES}")
@@ -647,3 +649,22 @@ if(WITH_CYCLES_DEVICE_METAL)
    message(STATUS "Found Metal: ${METAL_LIBRARY}")
  endif()
 endif()
+
+###########################################################################
+# oneAPI
+###########################################################################
+
+if (WITH_CYCLES_DEVICE_ONEAPI)
+  find_package(SYCL)
+  find_package(LevelZero)
+
+  if (SYCL_FOUND AND LEVEL_ZERO_FOUND)
+    message(STATUS "Found oneAPI: ${SYCL_LIBRARY}")
+    message(STATUS "Found Level Zero: ${LEVEL_ZERO_LIBRARY}")
+  else()
+    message(STATUS "oneAPI or Level Zero not found, disabling oneAPI device from Cycles")
+    set(WITH_CYCLES_DEVICE_ONEAPI OFF)
+  endif()
+endif()
+
+unset(_cycles_lib_dir)
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -82,6 +82,15 @@ set(SRC_HIP
  hip/util.h
 )

+set(SRC_ONEAPI
+  oneapi/device_impl.cpp
+  oneapi/device_impl.h
+  oneapi/device.cpp
+  oneapi/device.h
+  oneapi/queue.cpp
+  oneapi/queue.h
+)
+
 set(SRC_DUMMY
  dummy/device.cpp
  dummy/device.h
@@ -134,6 +143,7 @@ set(SRC
  ${SRC_DUMMY}
  ${SRC_MULTI}
  ${SRC_OPTIX}
+  ${SRC_ONEAPI}
  ${SRC_HEADERS}
 )

@@ -181,6 +191,9 @@ if(WITH_CYCLES_DEVICE_METAL)
    ${SRC_METAL}
  )
 endif()
+if (WITH_CYCLES_DEVICE_ONEAPI)
+  add_definitions(-DWITH_ONEAPI)
+endif()

 if(WITH_OPENIMAGEDENOISE)
  list(APPEND LIB
@@ -193,6 +206,11 @@ include_directories(SYSTEM ${INC_SYS})

 cycles_add_library(cycles_device "${LIB}" ${SRC})

+if(WITH_CYCLES_DEVICE_ONEAPI)
+  # Need to have proper rebuilding in case of changes in cycles_kernel_oneapi due external project behaviour
+  add_dependencies(cycles_device cycles_kernel_oneapi)
+endif()
+
 source_group("cpu" FILES ${SRC_CPU})
 source_group("cuda" FILES ${SRC_CUDA})
 source_group("dummy" FILES ${SRC_DUMMY})
@@ -200,4 +218,5 @@ source_group("hip" FILES ${SRC_HIP})
 source_group("multi" FILES ${SRC_MULTI})
 source_group("metal" FILES ${SRC_METAL})
 source_group("optix" FILES ${SRC_OPTIX})
+source_group("oneapi" FILES ${SRC_ONEAPI})
 source_group("common" FILES ${SRC_BASE} ${SRC_HEADERS})
--- a/intern/cycles/device/cpu/device_impl.cpp
+++ b/intern/cycles/device/cpu/device_impl.cpp
@@ -197,7 +197,7 @@ void CPUDevice::const_copy_to(const char *name, void *host, size_t size)

    // Update scene handle (since it is different for each device on multi devices)
    KernelData *const data = (KernelData *)host;
-    data->bvh.scene = embree_scene;
+    data->device_bvh = embree_scene;
  }
 #endif
  kernel_const_copy(&kernel_globals, name, host, size);
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -16,6 +16,7 @@
 #include "device/hip/device.h"
 #include "device/metal/device.h"
 #include "device/multi/device.h"
+#include "device/oneapi/device.h"
 #include "device/optix/device.h"

 #include "util/foreach.h"
@@ -39,6 +40,7 @@ vector<DeviceInfo> Device::optix_devices;
 vector<DeviceInfo> Device::cpu_devices;
 vector<DeviceInfo> Device::hip_devices;
 vector<DeviceInfo> Device::metal_devices;
+vector<DeviceInfo> Device::oneapi_devices;
 uint Device::devices_initialized_mask = 0;

 /* Device */
@@ -101,6 +103,13 @@ Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
        device = device_metal_create(info, stats, profiler);
      break;
 #endif
+
+#ifdef WITH_ONEAPI
+    case DEVICE_ONEAPI:
+      device = device_oneapi_create(info, stats, profiler);
+      break;
+#endif
+
    default:
      break;
  }
@@ -126,6 +135,8 @@ DeviceType Device::type_from_string(const char *name)
    return DEVICE_HIP;
  else if (strcmp(name, "METAL") == 0)
    return DEVICE_METAL;
+  else if (strcmp(name, "ONEAPI") == 0)
+    return DEVICE_ONEAPI;

  return DEVICE_NONE;
 }
@@ -144,6 +155,8 @@ string Device::string_from_type(DeviceType type)
    return "HIP";
  else if (type == DEVICE_METAL)
    return "METAL";
+  else if (type == DEVICE_ONEAPI)
+    return "ONEAPI";

  return "";
 }
@@ -163,6 +176,9 @@ vector<DeviceType> Device::available_types()
 #endif
 #ifdef WITH_METAL
  types.push_back(DEVICE_METAL);
+#endif
+#ifdef WITH_ONEAPI
+  types.push_back(DEVICE_ONEAPI);
 #endif
  return types;
 }
@@ -219,6 +235,20 @@ vector<DeviceInfo> Device::available_devices(uint mask)
  }
 #endif

+#ifdef WITH_ONEAPI
+  if (mask & DEVICE_MASK_ONEAPI) {
+    if (!(devices_initialized_mask & DEVICE_MASK_ONEAPI)) {
+      if (device_oneapi_init()) {
+        device_oneapi_info(oneapi_devices);
+      }
+      devices_initialized_mask |= DEVICE_MASK_ONEAPI;
+    }
+    foreach (DeviceInfo &info, oneapi_devices) {
+      devices.push_back(info);
+    }
+  }
+#endif
+
  if (mask & DEVICE_MASK_CPU) {
    if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
      device_cpu_info(cpu_devices);
@@ -282,6 +312,15 @@ string Device::device_capabilities(uint mask)
  }
 #endif

+#ifdef WITH_ONEAPI
+  if (mask & DEVICE_MASK_ONEAPI) {
+    if (device_oneapi_init()) {
+      capabilities += "\noneAPI device capabilities:\n";
+      capabilities += device_oneapi_capabilities();
+    }
+  }
+#endif
+
 #ifdef WITH_METAL
  if (mask & DEVICE_MASK_METAL) {
    if (device_metal_init()) {
@@ -380,6 +419,7 @@ void Device::free_memory()
  cuda_devices.free_memory();
  optix_devices.free_memory();
  hip_devices.free_memory();
+  oneapi_devices.free_memory();
  cpu_devices.free_memory();
  metal_devices.free_memory();
 }
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -29,6 +29,7 @@ class DeviceQueue;
 class Progress;
 class CPUKernels;
 class CPUKernelThreadGlobals;
+class Scene;

 /* Device Types */

@@ -40,6 +41,7 @@ enum DeviceType {
  DEVICE_OPTIX,
  DEVICE_HIP,
  DEVICE_METAL,
+  DEVICE_ONEAPI,
  DEVICE_DUMMY,
 };

@@ -49,6 +51,7 @@ enum DeviceTypeMask {
  DEVICE_MASK_OPTIX = (1 << DEVICE_OPTIX),
  DEVICE_MASK_HIP = (1 << DEVICE_HIP),
  DEVICE_MASK_METAL = (1 << DEVICE_METAL),
+  DEVICE_MASK_ONEAPI = (1 << DEVICE_ONEAPI),
  DEVICE_MASK_ALL = ~0
 };

@@ -184,6 +187,11 @@ class Device {
    return 0;
  }

+  /* Called after kernel texture setup, and prior to integrator state setup. */
+  virtual void optimize_for_scene(Scene * /*scene*/)
+  {
+  }
+
  virtual bool is_resident(device_ptr /*key*/, Device *sub_device)
  {
    /* Memory is always resident if this is not a multi device, regardless of whether the pointer
@@ -273,6 +281,7 @@ class Device {
  static vector<DeviceInfo> cpu_devices;
  static vector<DeviceInfo> hip_devices;
  static vector<DeviceInfo> metal_devices;
+  static vector<DeviceInfo> oneapi_devices;
  static uint devices_initialized_mask;
 };

--- a/intern/cycles/device/hip/util.h
+++ b/intern/cycles/device/hip/util.h
@@ -51,7 +51,7 @@ static inline bool hipSupportsDevice(const int hipDevId)
  hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
  hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);

-  return (major > 10) || (major == 10 && minor >= 1);
+  return (major >= 9);
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/device/metal/device.mm
+++ b/intern/cycles/device/metal/device.mm
@@ -34,7 +34,8 @@ void device_metal_info(vector<DeviceInfo> &devices)
  int device_index = 0;
  for (id<MTLDevice> &device : usable_devices) {
    /* Compute unique ID for persistent user preferences. */
-    string device_name = [device.name UTF8String];
+    string device_name = MetalInfo::get_device_name(device);
+
    string id = string("METAL_") + device_name;

    /* Hardware ID might not be unique, add device number in that case. */
@@ -48,12 +49,6 @@ void device_metal_info(vector<DeviceInfo> &devices)
    info.type = DEVICE_METAL;
    info.description = string_remove_trademark(string(device_name));

-    /* Ensure unique naming on Apple Silicon / SoC devices which return the same string for CPU and
-     * GPU */
-    if (info.description == system_cpu_brand_string()) {
-      info.description += " (GPU)";
-    }
-
    info.num = device_index;
    /* We don't know if it's used for display, but assume it is. */
    info.display_device = true;
@@ -69,14 +64,15 @@ string device_metal_capabilities()
 {
  string result = "";
  auto allDevices = MTLCopyAllDevices();
-  uint32_t num_devices = allDevices.count;
+  uint32_t num_devices = (uint32_t)allDevices.count;
  if (num_devices == 0) {
    return "No Metal devices found\n";
  }
  result += string_printf("Number of devices: %u\n", num_devices);

  for (id<MTLDevice> device in allDevices) {
-    result += string_printf("\t\tDevice: %s\n", [device.name UTF8String]);
+    string device_name = MetalInfo::get_device_name(device);
+    result += string_printf("\t\tDevice: %s\n", device_name.c_str());
  }

  return result;
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -42,7 +42,6 @@ class MetalDevice : public Device {
      nil; /* encoder used for fetching device pointers from MTLAccelerationStructure */
  /*---------------------------------------------------*/

-  string device_name;
  MetalGPUVendor device_vendor;

  uint kernel_features;
@@ -76,7 +75,8 @@ class MetalDevice : public Device {
  std::vector<id<MTLTexture>> texture_slot_map;

  bool use_metalrt = false;
-  bool use_function_specialisation = false;
+  MetalPipelineType kernel_specialization_level = PSO_GENERIC;
+  std::atomic_bool async_compile_and_load = false;

  virtual BVHLayoutMask get_bvh_layout_mask() const override;

@@ -92,9 +92,7 @@ class MetalDevice : public Device {

  bool use_adaptive_compilation();

-  string get_source(const uint kernel_features);
-
-  string compile_kernel(const uint kernel_features, const char *name);
+  void make_source(MetalPipelineType pso_type, const uint kernel_features);

  virtual bool load_kernels(const uint kernel_features) override;

@@ -112,7 +110,9 @@ class MetalDevice : public Device {

  virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;

-  id<MTLLibrary> compile(string const &source);
+  virtual void optimize_for_scene(Scene *scene) override;
+
+  bool compile_and_load(MetalPipelineType pso_type);

  /* ------------------------------------------------------------------ */
  /* low-level memory management */
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -6,9 +6,12 @@
 #  include "device/metal/device_impl.h"
 #  include "device/metal/device.h"

+#  include "scene/scene.h"
+
 #  include "util/debug.h"
 #  include "util/md5.h"
 #  include "util/path.h"
+#  include "util/time.h"

 CCL_NAMESPACE_BEGIN

@@ -43,10 +46,9 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
  auto usable_devices = MetalInfo::get_usable_devices();
  assert(mtlDevId < usable_devices.size());
  mtlDevice = usable_devices[mtlDevId];
-  device_name = [mtlDevice.name UTF8String];
-  device_vendor = MetalInfo::get_vendor_from_device_name(device_name);
+  device_vendor = MetalInfo::get_device_vendor(mtlDevice);
  assert(device_vendor != METAL_GPU_UNKNOWN);
-  metal_printf("Creating new Cycles device for Metal: %s\n", device_name.c_str());
+  metal_printf("Creating new Cycles device for Metal: %s\n", info.description.c_str());

  /* determine default storage mode based on whether UMA is supported */

@@ -78,6 +80,10 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
    case METAL_GPU_APPLE: {
      max_threads_per_threadgroup = 512;
      use_metalrt = info.use_metalrt;
+
+      /* Specialize the intersection kernels on Apple GPUs by default as these can be built very
+       * quickly. */
+      kernel_specialization_level = PSO_SPECIALIZED_INTERSECT;
      break;
    }
  }
@@ -90,6 +96,13 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
    capture_enabled = true;
  }

+  if (auto envstr = getenv("CYCLES_METAL_SPECIALIZATION_LEVEL")) {
+    kernel_specialization_level = (MetalPipelineType)atoi(envstr);
+  }
+  metal_printf("kernel_specialization_level = %s\n",
+               kernel_type_as_string(
+                   (MetalPipelineType)min((int)kernel_specialization_level, (int)PSO_NUM - 1)));
+
  MTLArgumentDescriptor *arg_desc_params = [[MTLArgumentDescriptor alloc] init];
  arg_desc_params.dataType = MTLDataTypePointer;
  arg_desc_params.access = MTLArgumentAccessReadOnly;
@@ -209,61 +222,86 @@ bool MetalDevice::use_adaptive_compilation()
  return DebugFlags().metal.adaptive_compile;
 }

-string MetalDevice::get_source(const uint kernel_features)
+void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_features)
 {
-  string build_options;
-
+  string global_defines;
  if (use_adaptive_compilation()) {
-    build_options += " -D__KERNEL_FEATURES__=" + to_string(kernel_features);
+    global_defines += "#define __KERNEL_FEATURES__ " + to_string(kernel_features) + "\n";
  }

  if (use_metalrt) {
-    build_options += "-D__METALRT__ ";
+    global_defines += "#define __METALRT__\n";
    if (motion_blur) {
-      build_options += "-D__METALRT_MOTION__ ";
+      global_defines += "#define __METALRT_MOTION__\n";
    }
  }

 #  ifdef WITH_CYCLES_DEBUG
-  build_options += "-D__KERNEL_DEBUG__ ";
+  global_defines += "#define __KERNEL_DEBUG__\n";
 #  endif

  switch (device_vendor) {
    default:
      break;
    case METAL_GPU_INTEL:
-      build_options += "-D__KERNEL_METAL_INTEL__ ";
+      global_defines += "#define __KERNEL_METAL_INTEL__\n";
      break;
    case METAL_GPU_AMD:
-      build_options += "-D__KERNEL_METAL_AMD__ ";
+      global_defines += "#define __KERNEL_METAL_AMD__\n";
      break;
    case METAL_GPU_APPLE:
-      build_options += "-D__KERNEL_METAL_APPLE__ ";
+      global_defines += "#define __KERNEL_METAL_APPLE__\n";
      break;
  }

-  /* reformat -D defines list into compilable form */
-  vector<string> components;
-  string_replace(build_options, "-D", "");
-  string_split(components, build_options, " ");
-
-  string globalDefines;
-  for (const string &component : components) {
-    vector<string> assignments;
-    string_split(assignments, component, "=");
-    if (assignments.size() == 2)
-      globalDefines += string_printf(
-          "#define %s %s\n", assignments[0].c_str(), assignments[1].c_str());
-    else
-      globalDefines += string_printf("#define %s\n", assignments[0].c_str());
-  }
-
-  string source = globalDefines + "\n#include \"kernel/device/metal/kernel.metal\"\n";
+  string &source = this->source[pso_type];
+  source = "\n#include \"kernel/device/metal/kernel.metal\"\n";
  source = path_source_replace_includes(source, path_get("source"));

-  metal_printf("Global defines:\n%s\n", globalDefines.c_str());
+  /* Perform any required specialization on the source.
+   * With Metal function constants we can generate a single variant of the kernel source which can
+   * be repeatedly respecialized.
+   */
+  string baked_constants;

-  return source;
+  /* Replace specific KernelData "dot" dereferences with a Metal function_constant identifier of
+   * the same character length. Build a string of all active constant values which is then hashed
+   * in order to identify the PSO.
+   */
+  if (pso_type != PSO_GENERIC) {
+    const double starttime = time_dt();
+
+#  define KERNEL_STRUCT_BEGIN(name, parent) \
+    string_replace_same_length(source, "kernel_data." #parent ".", "kernel_data_" #parent "_");
+
+    /* Add constants to md5 so that 'get_best_pipeline' is able to return a suitable match. */
+#  define KERNEL_STRUCT_MEMBER(parent, _type, name) \
+    baked_constants += string(#parent "." #name "=") + \
+                       to_string(_type(launch_params.data.parent.name)) + "\n";
+
+#  include "kernel/data_template.h"
+
+    /* Opt in to all of available specializations. This can be made more granular for the
+     * PSO_SPECIALIZED_INTERSECT case in order to minimize the number of specialization requests,
+     * but the overhead should be negligible as these are very quick to (re)build and aren't
+     * serialized to disk via MTLBinaryArchives.
+     */
+    global_defines += "#define __KERNEL_USE_DATA_CONSTANTS__\n";
+
+    metal_printf("KernelData patching took %.1f ms\n", (time_dt() - starttime) * 1000.0);
+  }
+
+  source = global_defines + source;
+  metal_printf("================\n%s================\n\%s================\n",
+               global_defines.c_str(),
+               baked_constants.c_str());
+
+  /* Generate an MD5 from the source and include any baked constants. This is used when caching
+   * PSOs. */
+  MD5Hash md5;
+  md5.append(baked_constants);
+  md5.append(source);
+  source_md5[pso_type] = md5.get_hex();
 }

 bool MetalDevice::load_kernels(const uint _kernel_features)
@@ -279,24 +317,22 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
   * active, but may still need to be rendered without motion blur if that isn't active as well. */
  motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION;

-  source[PSO_GENERIC] = get_source(kernel_features);
-  mtlLibrary[PSO_GENERIC] = compile(source[PSO_GENERIC]);
-
-  MD5Hash md5;
-  md5.append(source[PSO_GENERIC]);
-  source_md5[PSO_GENERIC] = md5.get_hex();
-
-  metal_printf("Front-end compilation finished (generic)\n");
-
-  bool result = MetalDeviceKernels::load(this, false);
+  bool result = compile_and_load(PSO_GENERIC);

  reserve_local_memory(kernel_features);
-
  return result;
 }

-id<MTLLibrary> MetalDevice::compile(string const &source)
+bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
 {
+  make_source(pso_type, kernel_features);
+
+  if (!MetalDeviceKernels::should_load_kernels(this, pso_type)) {
+    /* We already have a full set of matching pipelines which are cached or queued. */
+    metal_printf("%s kernels already requested\n", kernel_type_as_string(pso_type));
+    return true;
+  }
+
  MTLCompileOptions *options = [[MTLCompileOptions alloc] init];

  options.fastMathEnabled = YES;
@@ -304,19 +340,30 @@ id<MTLLibrary> MetalDevice::compile(string const &source)
    options.languageVersion = MTLLanguageVersion2_4;
  }

-  NSError *error = NULL;
-  id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str())
-                                                      options:options
-                                                        error:&error];
+  if (getenv("CYCLES_METAL_PROFILING") || getenv("CYCLES_METAL_DEBUG")) {
+    path_write_text(path_cache_get(string_printf("%s.metal", kernel_type_as_string(pso_type))),
+                    source[pso_type]);
+  }

-  if (!mtlLibrary) {
+  const double starttime = time_dt();
+
+  NSError *error = NULL;
+  mtlLibrary[pso_type] = [mtlDevice newLibraryWithSource:@(source[pso_type].c_str())
+                                                 options:options
+                                                   error:&error];
+
+  if (!mtlLibrary[pso_type]) {
    NSString *err = [error localizedDescription];
    set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
  }

+  metal_printf("Front-end compilation finished in %.1f seconds (%s)\n",
+               time_dt() - starttime,
+               kernel_type_as_string(pso_type));
+
  [options release];

-  return mtlLibrary;
+  return MetalDeviceKernels::load(this, pso_type);
 }

 void MetalDevice::reserve_local_memory(const uint kernel_features)
@@ -623,11 +670,63 @@ device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, siz
  return 0;
 }

+void MetalDevice::optimize_for_scene(Scene *scene)
+{
+  MetalPipelineType specialization_level = kernel_specialization_level;
+
+  if (specialization_level < PSO_SPECIALIZED_INTERSECT) {
+    return;
+  }
+
+  /* PSO_SPECIALIZED_INTERSECT kernels are fast to specialize, so we always load them
+   * synchronously. */
+  compile_and_load(PSO_SPECIALIZED_INTERSECT);
+
+  if (specialization_level < PSO_SPECIALIZED_SHADE) {
+    return;
+  }
+  if (!scene->params.background) {
+    /* Don't load PSO_SPECIALIZED_SHADE kernels during viewport rendering as they are slower to
+     * build. */
+    return;
+  }
+
+  /* PSO_SPECIALIZED_SHADE kernels are slower to specialize, so we load them asynchronously, and
+   * only if there isn't an existing load in flight.
+   */
+  auto specialize_shade_fn = ^() {
+    compile_and_load(PSO_SPECIALIZED_SHADE);
+    async_compile_and_load = false;
+  };
+
+  bool async_specialize_shade = true;
+
+  /* Block if a per-kernel profiling is enabled (ensure steady rendering rate). */
+  if (getenv("CYCLES_METAL_PROFILING") != nullptr) {
+    async_specialize_shade = false;
+  }
+
+  if (async_specialize_shade) {
+    if (!async_compile_and_load) {
+      async_compile_and_load = true;
+      dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
+                     specialize_shade_fn);
+    }
+    else {
+      metal_printf(
+          "Async PSO_SPECIALIZED_SHADE load request already in progress - dropping request\n");
+    }
+  }
+  else {
+    specialize_shade_fn();
+  }
+}
+
 void MetalDevice::const_copy_to(const char *name, void *host, size_t size)
 {
  if (strcmp(name, "data") == 0) {
    assert(size == sizeof(KernelData));
-    memcpy((uint8_t *)&launch_params + offsetof(KernelParamsMetal, data), host, size);
+    memcpy((uint8_t *)&launch_params.data, host, sizeof(KernelData));
    return;
  }

@@ -648,7 +747,7 @@ void MetalDevice::const_copy_to(const char *name, void *host, size_t size)
  /* Update data storage pointers in launch parameters. */
  if (strcmp(name, "integrator_state") == 0) {
    /* IntegratorStateGPU is contiguous pointers */
-    const size_t pointer_block_size = sizeof(IntegratorStateGPU);
+    const size_t pointer_block_size = offsetof(IntegratorStateGPU, sort_partition_divisor);
    update_launch_pointers(
        offsetof(KernelParamsMetal, integrator_state), host, size, pointer_block_size);
  }
--- a/intern/cycles/device/metal/kernel.h
+++ b/intern/cycles/device/metal/kernel.h
@@ -31,7 +31,7 @@ enum {
 enum { METALRT_TABLE_DEFAULT, METALRT_TABLE_SHADOW, METALRT_TABLE_LOCAL, METALRT_TABLE_NUM };

 /* Pipeline State Object types */
-enum {
+enum MetalPipelineType {
  /* A kernel that can be used with all scenes, supporting all features.
   * It is slow to compile, but only needs to be compiled once and is then
   * cached for future render sessions. This allows a render to get underway
@@ -39,28 +39,33 @@ enum {
   */
  PSO_GENERIC,

-  /* A kernel that is relatively quick to compile, but is specialized for the
-   * scene being rendered. It only contains the functionality and even baked in
-   * constants for values that means it needs to be recompiled whenever a
-   * dependent setting is changed. The render performance of this kernel is
-   * significantly faster though, and justifies the extra compile time.
+  /* A intersection kernel that is very quick to specialize and results in faster intersection
+   * kernel performance. It uses Metal function constants to replace several KernelData variables
+   * with fixed constants.
   */
-  /* METAL_WIP: This isn't used and will require more changes to enable. */
-  PSO_SPECIALISED,
+  PSO_SPECIALIZED_INTERSECT,
+
+  /* A shading kernel that is slow to specialize, but results in faster shading kernel performance
+   * rendered. It uses Metal function constants to replace several KernelData variables with fixed
+   * constants and short-circuit all unused SVM node case handlers.
+   */
+  PSO_SPECIALIZED_SHADE,

  PSO_NUM
 };

-const char *kernel_type_as_string(int kernel_type);
+const char *kernel_type_as_string(MetalPipelineType pso_type);

 struct MetalKernelPipeline {

  void compile();

  id<MTLLibrary> mtlLibrary = nil;
-  bool scene_specialized;
+  MetalPipelineType pso_type;
  string source_md5;
+  size_t usage_count = 0;

+  KernelData kernel_data_;
  bool use_metalrt;
  bool metalrt_hair;
  bool metalrt_hair_thick;
@@ -75,6 +80,8 @@ struct MetalKernelPipeline {
  id<MTLComputePipelineState> pipeline = nil;
  int num_threads_per_block = 0;

+  bool should_use_binary_archive() const;
+
  string error_str;

  API_AVAILABLE(macos(11.0))
@@ -85,7 +92,8 @@ struct MetalKernelPipeline {
 /* Cache of Metal kernels for each DeviceKernel. */
 namespace MetalDeviceKernels {

-bool load(MetalDevice *device, bool scene_specialized);
+bool should_load_kernels(MetalDevice *device, MetalPipelineType pso_type);
+bool load(MetalDevice *device, MetalPipelineType pso_type);
 const MetalKernelPipeline *get_best_pipeline(const MetalDevice *device, DeviceKernel kernel);

 } /* namespace MetalDeviceKernels */
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -5,6 +5,7 @@

 #  include "device/metal/kernel.h"
 #  include "device/metal/device_impl.h"
+#  include "kernel/device/metal/function_constants.h"
 #  include "util/md5.h"
 #  include "util/path.h"
 #  include "util/tbb.h"
@@ -16,13 +17,15 @@ CCL_NAMESPACE_BEGIN
 /* limit to 2 MTLCompiler instances */
 int max_mtlcompiler_threads = 2;

-const char *kernel_type_as_string(int kernel_type)
+const char *kernel_type_as_string(MetalPipelineType pso_type)
 {
-  switch (kernel_type) {
+  switch (pso_type) {
    case PSO_GENERIC:
      return "PSO_GENERIC";
-    case PSO_SPECIALISED:
-      return "PSO_SPECIALISED";
+    case PSO_SPECIALIZED_INTERSECT:
+      return "PSO_SPECIALIZED_INTERSECT";
+    case PSO_SPECIALIZED_SHADE:
+      return "PSO_SPECIALIZED_SHADE";
    default:
      assert(0);
  }
@@ -50,7 +53,11 @@ struct ShaderCache {

  /* Non-blocking request for a kernel, optionally specialized to the scene being rendered by
   * device. */
-  void load_kernel(DeviceKernel kernel, MetalDevice *device, bool scene_specialized);
+  void load_kernel(DeviceKernel kernel, MetalDevice *device, MetalPipelineType pso_type);
+
+  bool should_load_kernel(DeviceKernel device_kernel,
+                          MetalDevice *device,
+                          MetalPipelineType pso_type);

  void wait_for_all();

@@ -139,9 +146,53 @@ void ShaderCache::compile_thread_func(int thread_index)
  }
 }

+bool ShaderCache::should_load_kernel(DeviceKernel device_kernel,
+                                     MetalDevice *device,
+                                     MetalPipelineType pso_type)
+{
+  if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
+    /* Skip megakernel. */
+    return false;
+  }
+
+  if (device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
+    if ((device->kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0) {
+      /* Skip shade_surface_raytrace kernel if the scene doesn't require it. */
+      return false;
+    }
+  }
+
+  if (pso_type != PSO_GENERIC) {
+    /* Only specialize kernels where it can make an impact. */
+    if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
+        device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
+      return false;
+    }
+
+    /* Only specialize shading / intersection kernels as requested. */
+    bool is_shade_kernel = (device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+    bool is_shade_pso = (pso_type == PSO_SPECIALIZED_SHADE);
+    if (is_shade_pso != is_shade_kernel) {
+      return false;
+    }
+  }
+
+  {
+    /* check whether the kernel has already been requested / cached */
+    thread_scoped_lock lock(cache_mutex);
+    for (auto &pipeline : pipelines[device_kernel]) {
+      if (pipeline->source_md5 == device->source_md5[pso_type]) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
 void ShaderCache::load_kernel(DeviceKernel device_kernel,
                              MetalDevice *device,
-                              bool scene_specialized)
+                              MetalPipelineType pso_type)
 {
  {
    /* create compiler threads on first run */
@@ -154,52 +205,21 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
    }
  }

-  if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
-    /* skip megakernel */
+  if (!should_load_kernel(device_kernel, device, pso_type)) {
    return;
  }

-  if (scene_specialized) {
-    /* Only specialize kernels where it can make an impact. */
-    if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
-        device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
-      return;
-    }
-  }
-
-  {
-    /* check whether the kernel has already been requested / cached */
-    thread_scoped_lock lock(cache_mutex);
-    for (auto &pipeline : pipelines[device_kernel]) {
-      if (scene_specialized) {
-        if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
-          /* we already requested a pipeline that is specialized for this kernel data */
-          metal_printf("Specialized kernel already requested (%s)\n",
-                       device_kernel_as_string(device_kernel));
-          return;
-        }
-      }
-      else {
-        if (pipeline->source_md5 == device->source_md5[PSO_GENERIC]) {
-          /* we already requested a generic pipeline for this kernel */
-          metal_printf("Generic kernel already requested (%s)\n",
-                       device_kernel_as_string(device_kernel));
-          return;
-        }
-      }
-    }
-  }
-
  incomplete_requests++;

  PipelineRequest request;
  request.pipeline = new MetalKernelPipeline;
-  request.pipeline->scene_specialized = scene_specialized;
+  memcpy(&request.pipeline->kernel_data_,
+         &device->launch_params.data,
+         sizeof(request.pipeline->kernel_data_));
+  request.pipeline->pso_type = pso_type;
  request.pipeline->mtlDevice = mtlDevice;
-  request.pipeline->source_md5 =
-      device->source_md5[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
-  request.pipeline->mtlLibrary =
-      device->mtlLibrary[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
+  request.pipeline->source_md5 = device->source_md5[pso_type];
+  request.pipeline->mtlLibrary = device->mtlLibrary[pso_type];
  request.pipeline->device_kernel = device_kernel;
  request.pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;

@@ -214,7 +234,24 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,

  {
    thread_scoped_lock lock(cache_mutex);
-    pipelines[device_kernel].push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
+    auto &collection = pipelines[device_kernel];
+
+    /* Cache up to 3 kernel variants with the same pso_type, purging oldest first. */
+    int max_entries_of_same_pso_type = 3;
+    for (int i = (int)collection.size() - 1; i >= 0; i--) {
+      if (collection[i]->pso_type == pso_type) {
+        max_entries_of_same_pso_type -= 1;
+        if (max_entries_of_same_pso_type == 0) {
+          metal_printf("Purging oldest %s:%s kernel from ShaderCache\n",
+                       kernel_type_as_string(pso_type),
+                       device_kernel_as_string(device_kernel));
+          collection.erase(collection.begin() + i);
+          break;
+        }
+      }
+    }
+
+    collection.push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
    request_queue.push_back(request);
  }
  cond_var.notify_one();
@@ -248,8 +285,9 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
      continue;
    }

-    if (pipeline->scene_specialized) {
-      if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
+    if (pipeline->pso_type != PSO_GENERIC) {
+      if (pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_INTERSECT] ||
+          pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_SHADE]) {
        best_pipeline = pipeline.get();
      }
    }
@@ -258,13 +296,65 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
    }
  }

+  if (best_pipeline->usage_count == 0 && best_pipeline->pso_type != PSO_GENERIC) {
+    metal_printf("Swapping in %s version of %s\n",
+                 kernel_type_as_string(best_pipeline->pso_type),
+                 device_kernel_as_string(kernel));
+  }
+  best_pipeline->usage_count += 1;
+
  return best_pipeline;
 }

+bool MetalKernelPipeline::should_use_binary_archive() const
+{
+  if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
+    if (atoi(str) != 0) {
+      /* Don't archive if we have opted out by env var. */
+      return false;
+    }
+  }
+
+  if (pso_type == PSO_GENERIC) {
+    /* Archive the generic kernels. */
+    return true;
+  }
+
+  if (device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND &&
+      device_kernel <= DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW) {
+    /* Archive all shade kernels - they take a long time to compile. */
+    return true;
+  }
+
+  /* The remaining kernels are all fast to compile. They may get cached by the system shader cache,
+   * but will be quick to regenerate if not. */
+  return false;
+}
+
+static MTLFunctionConstantValues *GetConstantValues(KernelData const *data = nullptr)
+{
+  MTLFunctionConstantValues *constant_values = [MTLFunctionConstantValues new];
+
+  MTLDataType MTLDataType_int = MTLDataTypeInt;
+  MTLDataType MTLDataType_float = MTLDataTypeFloat;
+  MTLDataType MTLDataType_float4 = MTLDataTypeFloat4;
+  KernelData zero_data = {0};
+  if (!data) {
+    data = &zero_data;
+  }
+
+#  define KERNEL_STRUCT_MEMBER(parent, _type, name) \
+    [constant_values setConstantValue:&data->parent.name \
+                                 type:MTLDataType_##_type \
+                              atIndex:KernelData_##parent##_##name];
+
+#  include "kernel/data_template.h"
+
+  return constant_values;
+}
+
 void MetalKernelPipeline::compile()
 {
-  int pso_type = scene_specialized ? PSO_SPECIALISED : PSO_GENERIC;
-
  const std::string function_name = std::string("cycles_metal_") +
                                    device_kernel_as_string(device_kernel);

@@ -281,6 +371,17 @@ void MetalKernelPipeline::compile()
  if (@available(macOS 11.0, *)) {
    MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
    func_desc.name = entryPoint;
+
+    if (pso_type == PSO_SPECIALIZED_SHADE) {
+      func_desc.constantValues = GetConstantValues(&kernel_data_);
+    }
+    else if (pso_type == PSO_SPECIALIZED_INTERSECT) {
+      func_desc.constantValues = GetConstantValues(&kernel_data_);
+    }
+    else {
+      func_desc.constantValues = GetConstantValues();
+    }
+
    function = [mtlLibrary newFunctionWithDescriptor:func_desc error:&error];
  }

@@ -427,10 +528,7 @@ void MetalKernelPipeline::compile()

  MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;

-  bool use_binary_archive = true;
-  if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
-    use_binary_archive = (atoi(str) == 0);
-  }
+  bool use_binary_archive = should_use_binary_archive();

  id<MTLBinaryArchive> archive = nil;
  string metalbin_path;
@@ -608,17 +706,30 @@ void MetalKernelPipeline::compile()
  }
 }

-bool MetalDeviceKernels::load(MetalDevice *device, bool scene_specialized)
+bool MetalDeviceKernels::load(MetalDevice *device, MetalPipelineType pso_type)
+{
+  const double starttime = time_dt();
+  auto shader_cache = get_shader_cache(device->mtlDevice);
+  for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
+    shader_cache->load_kernel((DeviceKernel)i, device, pso_type);
+  }
+
+  shader_cache->wait_for_all();
+  metal_printf("Back-end compilation finished in %.1f seconds (%s)\n",
+               time_dt() - starttime,
+               kernel_type_as_string(pso_type));
+  return true;
+}
+
+bool MetalDeviceKernels::should_load_kernels(MetalDevice *device, MetalPipelineType pso_type)
 {
  auto shader_cache = get_shader_cache(device->mtlDevice);
  for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
-    shader_cache->load_kernel((DeviceKernel)i, device, scene_specialized);
+    if (shader_cache->should_load_kernel((DeviceKernel)i, device, pso_type)) {
+      return true;
+    }
  }
-
-  if (!scene_specialized || getenv("CYCLES_METAL_PROFILING")) {
-    shader_cache->wait_for_all();
-  }
-  return true;
+  return false;
 }

 const MetalKernelPipeline *MetalDeviceKernels::get_best_pipeline(const MetalDevice *device,
--- a/intern/cycles/device/metal/queue.h
+++ b/intern/cycles/device/metal/queue.h
@@ -24,6 +24,7 @@ class MetalDeviceQueue : public DeviceQueue {

  virtual int num_concurrent_states(const size_t) const override;
  virtual int num_concurrent_busy_states() const override;
+  virtual int num_sort_partition_elements() const override;

  virtual void init_execution() override;

--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -293,6 +293,11 @@ int MetalDeviceQueue::num_concurrent_busy_states() const
  return result;
 }

+int MetalDeviceQueue::num_sort_partition_elements() const
+{
+  return MetalInfo::optimal_sort_partition_elements(metal_device_->mtlDevice);
+}
+
 void MetalDeviceQueue::init_execution()
 {
  /* Synchronize all textures and memory copies before executing task. */
@@ -359,7 +364,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
  /* Prepare any non-pointer (i.e. plain-old-data) KernelParamsMetal data */
  /* The plain-old-data is contiguous, continuing to the end of KernelParamsMetal */
  size_t plain_old_launch_data_offset = offsetof(KernelParamsMetal, integrator_state) +
-                                        sizeof(IntegratorStateGPU);
+                                        offsetof(IntegratorStateGPU, sort_partition_divisor);
  size_t plain_old_launch_data_size = sizeof(KernelParamsMetal) - plain_old_launch_data_offset;
  memcpy(init_arg_buffer + globals_offsets + plain_old_launch_data_offset,
         (uint8_t *)&metal_device_->launch_params + plain_old_launch_data_offset,
@@ -416,7 +421,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,

  /* this relies on IntegratorStateGPU layout being contiguous device_ptrs  */
  const size_t pointer_block_end = offsetof(KernelParamsMetal, integrator_state) +
-                                   sizeof(IntegratorStateGPU);
+                                   offsetof(IntegratorStateGPU, sort_partition_divisor);
  for (size_t offset = 0; offset < pointer_block_end; offset += sizeof(device_ptr)) {
    int pointer_index = int(offset / sizeof(device_ptr));
    MetalDevice::MetalMem *mmem = *(
@@ -550,7 +555,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
    /* Enhanced command buffer errors are only available in 11.0+ */
    if (@available(macos 11.0, *)) {
      if (command_buffer.status == MTLCommandBufferStatusError && command_buffer.error != nil) {
-        printf("CommandBuffer Failed: %s\n", [kernel_name UTF8String]);
+        metal_device_->set_error(string("CommandBuffer Failed: ") + [kernel_name UTF8String]);
        NSArray<id<MTLCommandBufferEncoderInfo>> *encoderInfos = [command_buffer.error.userInfo
            valueForKey:MTLCommandBufferEncoderInfoErrorKey];
        if (encoderInfos != nil) {
@@ -564,7 +569,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
        }
      }
      else if (command_buffer.error) {
-        printf("CommandBuffer Failed: %s\n", [kernel_name UTF8String]);
+        metal_device_->set_error(string("CommandBuffer Failed: ") + [kernel_name UTF8String]);
      }
    }
  }];
--- a/intern/cycles/device/metal/util.h
+++ b/intern/cycles/device/metal/util.h
@@ -25,10 +25,20 @@ enum MetalGPUVendor {
  METAL_GPU_INTEL = 3,
 };

+enum AppleGPUArchitecture {
+  APPLE_UNKNOWN,
+  APPLE_M1,
+  APPLE_M2,
+};
+
 /* Contains static Metal helper functions. */
 struct MetalInfo {
  static vector<id<MTLDevice>> const &get_usable_devices();
-  static MetalGPUVendor get_vendor_from_device_name(string const &device_name);
+  static int get_apple_gpu_core_count(id<MTLDevice> device);
+  static MetalGPUVendor get_device_vendor(id<MTLDevice> device);
+  static AppleGPUArchitecture get_apple_gpu_architecture(id<MTLDevice> device);
+  static int optimal_sort_partition_elements(id<MTLDevice> device);
+  static string get_device_name(id<MTLDevice> device);
 };

 /* Pool of MTLBuffers whose lifetime is linked to a single MTLCommandBuffer */
--- a/intern/cycles/device/metal/util.mm
+++ b/intern/cycles/device/metal/util.mm
@@ -10,26 +10,83 @@
 #  include "util/string.h"
 #  include "util/time.h"

+#  include <IOKit/IOKitLib.h>
 #  include <pwd.h>
 #  include <sys/shm.h>
 #  include <time.h>

 CCL_NAMESPACE_BEGIN

-MetalGPUVendor MetalInfo::get_vendor_from_device_name(string const &device_name)
+string MetalInfo::get_device_name(id<MTLDevice> device)
 {
-  if (device_name.find("Intel") != string::npos) {
+  string device_name = [device.name UTF8String];
+  if (get_device_vendor(device) == METAL_GPU_APPLE) {
+    /* Append the GPU core count so we can distinguish between GPU variants in benchmarks. */
+    int gpu_core_count = get_apple_gpu_core_count(device);
+    device_name += string_printf(gpu_core_count ? " (GPU - %d cores)" : " (GPU)", gpu_core_count);
+  }
+  return device_name;
+}
+
+int MetalInfo::get_apple_gpu_core_count(id<MTLDevice> device)
+{
+  int core_count = 0;
+  if (@available(macos 12.0, *)) {
+    io_service_t gpu_service = IOServiceGetMatchingService(
+        kIOMainPortDefault, IORegistryEntryIDMatching(device.registryID));
+    if (CFNumberRef numberRef = (CFNumberRef)IORegistryEntryCreateCFProperty(
+            gpu_service, CFSTR("gpu-core-count"), 0, 0)) {
+      if (CFGetTypeID(numberRef) == CFNumberGetTypeID()) {
+        CFNumberGetValue(numberRef, kCFNumberSInt32Type, &core_count);
+      }
+      CFRelease(numberRef);
+    }
+  }
+  return core_count;
+}
+
+AppleGPUArchitecture MetalInfo::get_apple_gpu_architecture(id<MTLDevice> device)
+{
+  const char *device_name = [device.name UTF8String];
+  if (strstr(device_name, "M1")) {
+    return APPLE_M1;
+  }
+  else if (strstr(device_name, "M2")) {
+    return APPLE_M2;
+  }
+  return APPLE_UNKNOWN;
+}
+
+MetalGPUVendor MetalInfo::get_device_vendor(id<MTLDevice> device)
+{
+  const char *device_name = [device.name UTF8String];
+  if (strstr(device_name, "Intel")) {
    return METAL_GPU_INTEL;
  }
-  else if (device_name.find("AMD") != string::npos) {
+  else if (strstr(device_name, "AMD")) {
    return METAL_GPU_AMD;
  }
-  else if (device_name.find("Apple") != string::npos) {
+  else if (strstr(device_name, "Apple")) {
    return METAL_GPU_APPLE;
  }
  return METAL_GPU_UNKNOWN;
 }

+int MetalInfo::optimal_sort_partition_elements(id<MTLDevice> device)
+{
+  if (auto str = getenv("CYCLES_METAL_SORT_PARTITION_ELEMENTS")) {
+    return atoi(str);
+  }
+
+  /* On M1 and M2 GPUs, we see better cache utilization if we partition the active indices before
+   * sorting each partition by material. Partitioning into chunks of 65536 elements results in an
+   * overall render time speedup of up to 15%. */
+  if (get_device_vendor(device) == METAL_GPU_APPLE) {
+    return 65536;
+  }
+  return 0;
+}
+
 vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
 {
  static vector<id<MTLDevice>> usable_devices;
@@ -41,9 +98,8 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()

  metal_printf("Usable Metal devices:\n");
  for (id<MTLDevice> device in MTLCopyAllDevices()) {
-    const char *device_name = [device.name UTF8String];
-
-    MetalGPUVendor vendor = get_vendor_from_device_name(device_name);
+    string device_name = get_device_name(device);
+    MetalGPUVendor vendor = get_device_vendor(device);
    bool usable = false;

    if (@available(macos 12.2, *)) {
@@ -55,12 +111,12 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
    }

    if (usable) {
-      metal_printf("- %s\n", device_name);
+      metal_printf("- %s\n", device_name.c_str());
      [device retain];
      usable_devices.push_back(device);
    }
    else {
-      metal_printf("  (skipping \"%s\")\n", device_name);
+      metal_printf("  (skipping \"%s\")\n", device_name.c_str());
    }
  }
  if (usable_devices.empty()) {
--- a/intern/cycles/device/oneapi/device.cpp
+++ b/intern/cycles/device/oneapi/device.cpp
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2021-2022 Intel Corporation */
+
+#include "device/oneapi/device.h"
+
+#include "util/log.h"
+
+#ifdef WITH_ONEAPI
+#  include "device/device.h"
+#  include "device/oneapi/device_impl.h"
+
+#  include "util/path.h"
+#  include "util/string.h"
+
+#  ifdef __linux__
+#    include <dlfcn.h>
+#  endif
+#endif /* WITH_ONEAPI */
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef WITH_ONEAPI
+static OneAPIDLLInterface oneapi_dll;
+#endif
+
+#ifdef _WIN32
+#  define LOAD_ONEAPI_SHARED_LIBRARY(path) (void *)(LoadLibrary(path))
+#  define FREE_SHARED_LIBRARY(handle) FreeLibrary((HMODULE)handle)
+#  define GET_SHARED_LIBRARY_SYMBOL(handle, name) GetProcAddress((HMODULE)handle, name)
+#elif __linux__
+#  define LOAD_ONEAPI_SHARED_LIBRARY(path) dlopen(path, RTLD_NOW)
+#  define FREE_SHARED_LIBRARY(handle) dlclose(handle)
+#  define GET_SHARED_LIBRARY_SYMBOL(handle, name) dlsym(handle, name)
+#endif
+
+bool device_oneapi_init()
+{
+#if !defined(WITH_ONEAPI)
+  return false;
+#else
+
+  string lib_path = path_get("lib");
+#  ifdef _WIN32
+  lib_path = path_join(lib_path, "cycles_kernel_oneapi.dll");
+#  else
+  lib_path = path_join(lib_path, "cycles_kernel_oneapi.so");
+#  endif
+  void *lib_handle = LOAD_ONEAPI_SHARED_LIBRARY(lib_path.c_str());
+
+  /* This shouldn't happen, but it still makes sense to have a branch for this. */
+  if (lib_handle == NULL) {
+    LOG(ERROR) << "oneAPI kernel shared library cannot be loaded for some reason. This should not "
+                  "happen, however, it occurs hence oneAPI rendering will be disabled";
+    return false;
+  }
+
+#  define DLL_INTERFACE_CALL(function, return_type, ...) \
+    (oneapi_dll.function) = reinterpret_cast<decltype(oneapi_dll.function)>( \
+        GET_SHARED_LIBRARY_SYMBOL(lib_handle, #function)); \
+    if (oneapi_dll.function == NULL) { \
+      LOG(ERROR) << "oneAPI shared library function \"" << #function \
+                 << "\" has not been loaded from kernel shared  - disable oneAPI " \
+                    "library disable oneAPI implementation due to this"; \
+      FREE_SHARED_LIBRARY(lib_handle); \
+      return false; \
+    }
+#  include "kernel/device/oneapi/dll_interface_template.h"
+#  undef DLL_INTERFACE_CALL
+
+  VLOG_INFO << "oneAPI kernel shared library has been loaded successfully";
+
+  /* We need to have this oneapi kernel shared library during all life-span of the Blender.
+   * So it is not unloaded because of this.
+   * FREE_SHARED_LIBRARY(lib_handle); */
+
+  /* NOTE(@nsirgien): we need to enable JIT cache from here and
+   * right now this cache policy is controlled by env. variables. */
+  /* NOTE(hallade) we also disable use of copy engine as it
+   * improves stability as of intel/LLVM SYCL-nightly/20220529.
+   * All these env variable can be set beforehand by end-users and
+   * will in that case -not- be overwritten. */
+#  ifdef _WIN32
+  if (getenv("SYCL_CACHE_PERSISTENT") == nullptr) {
+    _putenv_s("SYCL_CACHE_PERSISTENT", "1");
+  }
+  if (getenv("SYCL_CACHE_TRESHOLD") == nullptr) {
+    _putenv_s("SYCL_CACHE_THRESHOLD", "0");
+  }
+  if (getenv("SYCL_DEVICE_FILTER") == nullptr) {
+    _putenv_s("SYCL_DEVICE_FILTER", "host,level_zero");
+  }
+  if (getenv("SYCL_ENABLE_PCI") == nullptr) {
+    _putenv_s("SYCL_ENABLE_PCI", "1");
+  }
+  if (getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE") == nullptr) {
+    _putenv_s("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0");
+  }
+#  elif __linux__
+  setenv("SYCL_CACHE_PERSISTENT", "1", false);
+  setenv("SYCL_CACHE_THRESHOLD", "0", false);
+  setenv("SYCL_DEVICE_FILTER", "host,level_zero", false);
+  setenv("SYCL_ENABLE_PCI", "1", false);
+  setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
+#  endif
+
+  return true;
+#endif
+}
+
+#if defined(_WIN32) || defined(__linux__)
+#  undef LOAD_SYCL_SHARED_LIBRARY
+#  undef LOAD_ONEAPI_SHARED_LIBRARY
+#  undef FREE_SHARED_LIBRARY
+#  undef GET_SHARED_LIBRARY_SYMBOL
+#endif
+
+Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
+{
+#ifdef WITH_ONEAPI
+  return new OneapiDevice(info, oneapi_dll, stats, profiler);
+#else
+  (void)info;
+  (void)stats;
+  (void)profiler;
+
+  LOG(FATAL) << "Requested to create oneAPI device while not enabled for this build.";
+
+  return nullptr;
+#endif
+}
+
+#ifdef WITH_ONEAPI
+static void device_iterator_cb(const char *id, const char *name, int num, void *user_ptr)
+{
+  vector<DeviceInfo> *devices = (vector<DeviceInfo> *)user_ptr;
+
+  DeviceInfo info;
+
+  info.type = DEVICE_ONEAPI;
+  info.description = name;
+  info.num = num;
+
+  /* NOTE(@nsirgien): Should be unique at least on proper oneapi installation. */
+  info.id = id;
+
+  info.has_nanovdb = true;
+  info.denoisers = 0;
+
+  info.has_gpu_queue = true;
+
+  /* NOTE(@nsirgien): oneAPI right now is focused on one device usage. In future it maybe will
+   * change, but right now peer access from one device to another device is not supported. */
+  info.has_peer_memory = false;
+
+  /* NOTE(@nsirgien): Seems not possible to know from SYCL/oneAPI or Level0. */
+  info.display_device = false;
+
+  devices->push_back(info);
+  VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
+}
+#endif
+
+void device_oneapi_info(vector<DeviceInfo> &devices)
+{
+#ifdef WITH_ONEAPI
+  (oneapi_dll.oneapi_iterate_devices)(device_iterator_cb, &devices);
+#else  /* WITH_ONEAPI */
+  (void)devices;
+#endif /* WITH_ONEAPI */
+}
+
+string device_oneapi_capabilities()
+{
+  string capabilities;
+#ifdef WITH_ONEAPI
+  char *c_capabilities = (oneapi_dll.oneapi_device_capabilities)();
+  if (c_capabilities) {
+    capabilities = c_capabilities;
+    (oneapi_dll.oneapi_free)(c_capabilities);
+  }
+#endif
+  return capabilities;
+}
+
+CCL_NAMESPACE_END
--- a/intern/cycles/device/oneapi/device.h
+++ b/intern/cycles/device/oneapi/device.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+#include "util/string.h"
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Device;
+class DeviceInfo;
+class Profiler;
+class Stats;
+
+bool device_oneapi_init();
+
+Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
+
+void device_oneapi_info(vector<DeviceInfo> &devices);
+
+string device_oneapi_capabilities();
+
+CCL_NAMESPACE_END
--- a/intern/cycles/device/oneapi/device_impl.cpp
+++ b/intern/cycles/device/oneapi/device_impl.cpp
@@ -0,0 +1,438 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2021-2022 Intel Corporation */
+
+#ifdef WITH_ONEAPI
+
+#  include "device/oneapi/device_impl.h"
+
+#  include "util/debug.h"
+#  include "util/log.h"
+
+#  include "kernel/device/oneapi/kernel.h"
+
+CCL_NAMESPACE_BEGIN
+
+static void queue_error_cb(const char *message, void *user_ptr)
+{
+  if (user_ptr) {
+    *reinterpret_cast<std::string *>(user_ptr) = message;
+  }
+}
+
+OneapiDevice::OneapiDevice(const DeviceInfo &info,
+                           OneAPIDLLInterface &oneapi_dll_object,
+                           Stats &stats,
+                           Profiler &profiler)
+    : Device(info, stats, profiler),
+      device_queue_(nullptr),
+      texture_info_(this, "texture_info", MEM_GLOBAL),
+      kg_memory_(nullptr),
+      kg_memory_device_(nullptr),
+      kg_memory_size_(0),
+      oneapi_dll_(oneapi_dll_object)
+{
+  need_texture_info_ = false;
+
+  oneapi_dll_.oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);
+
+  /* OneAPI calls should be initialized on this moment. */
+  assert(oneapi_dll_.oneapi_create_queue != nullptr);
+
+  bool is_finished_ok = oneapi_dll_.oneapi_create_queue(device_queue_, info.num);
+  if (is_finished_ok == false) {
+    set_error("oneAPI queue initialization error: got runtime exception \"" +
+              oneapi_error_string_ + "\"");
+  }
+  else {
+    VLOG_DEBUG << "oneAPI queue has been successfully created for the device \""
+               << info.description << "\"";
+    assert(device_queue_);
+  }
+
+  size_t globals_segment_size;
+  is_finished_ok = oneapi_dll_.oneapi_kernel_globals_size(device_queue_, globals_segment_size);
+  if (is_finished_ok == false) {
+    set_error("oneAPI constant memory initialization got runtime exception \"" +
+              oneapi_error_string_ + "\"");
+  }
+  else {
+    VLOG_DEBUG << "Successfully created global/constant memory segment (kernel globals object)";
+  }
+
+  kg_memory_ = oneapi_dll_.oneapi_usm_aligned_alloc_host(device_queue_, globals_segment_size, 16);
+  oneapi_dll_.oneapi_usm_memset(device_queue_, kg_memory_, 0, globals_segment_size);
+
+  kg_memory_device_ = oneapi_dll_.oneapi_usm_alloc_device(device_queue_, globals_segment_size);
+
+  kg_memory_size_ = globals_segment_size;
+}
+
+OneapiDevice::~OneapiDevice()
+{
+  texture_info_.free();
+  oneapi_dll_.oneapi_usm_free(device_queue_, kg_memory_);
+  oneapi_dll_.oneapi_usm_free(device_queue_, kg_memory_device_);
+
+  for (ConstMemMap::iterator mt = const_mem_map_.begin(); mt != const_mem_map_.end(); mt++)
+    delete mt->second;
+
+  if (device_queue_)
+    oneapi_dll_.oneapi_free_queue(device_queue_);
+}
+
+bool OneapiDevice::check_peer_access(Device * /*peer_device*/)
+{
+  return false;
+}
+
+BVHLayoutMask OneapiDevice::get_bvh_layout_mask() const
+{
+  return BVH_LAYOUT_BVH2;
+}
+
+bool OneapiDevice::load_kernels(const uint requested_features)
+{
+  assert(device_queue_);
+  /* NOTE(@nsirgien): oneAPI can support compilation of kernel code with certain feature set
+   * with specialization constants, but it hasn't been implemented yet. */
+  (void)requested_features;
+
+  bool is_finished_ok = oneapi_dll_.oneapi_run_test_kernel(device_queue_);
+  if (is_finished_ok == false) {
+    set_error("oneAPI kernel load: got runtime exception \"" + oneapi_error_string_ + "\"");
+  }
+  else {
+    VLOG_INFO << "Runtime compilation done for \"" << info.description << "\"";
+    assert(device_queue_);
+  }
+  return is_finished_ok;
+}
+
+void OneapiDevice::load_texture_info()
+{
+  if (need_texture_info_) {
+    need_texture_info_ = false;
+    texture_info_.copy_to_device();
+  }
+}
+
+void OneapiDevice::generic_alloc(device_memory &mem)
+{
+  size_t memory_size = mem.memory_size();
+
+  /* TODO(@nsirgien): In future, if scene doesn't fit into device memory, then
+   * we can use USM host memory.
+   * Because of the expected performance impact, implementation of this has had a low priority
+   * and is not implemented yet. */
+
+  assert(device_queue_);
+  /* NOTE(@nsirgien): There are three types of Unified Shared Memory (USM) in oneAPI: host, device
+   * and shared. For new project it maybe more beneficial to use USM shared memory, because it
+   * provides automatic migration mechanism in order to allow to use the same pointer on host and
+   * on device, without need to worry about explicit memory transfer operations. But for
+   * Blender/Cycles this type of memory is not very suitable in current application architecture,
+   * because Cycles already uses two different pointer for host activity and device activity, and
+   * also has to perform all needed memory transfer operations. So, USM device memory
+   * type has been used for oneAPI device in order to better fit in Cycles architecture. */
+  void *device_pointer = oneapi_dll_.oneapi_usm_alloc_device(device_queue_, memory_size);
+  if (device_pointer == nullptr) {
+    size_t max_memory_on_device = oneapi_dll_.oneapi_get_memcapacity(device_queue_);
+    set_error("oneAPI kernel - device memory allocation error for " +
+              string_human_readable_size(mem.memory_size()) +
+              ", possibly caused by lack of available memory space on the device: " +
+              string_human_readable_size(stats.mem_used) + " of " +
+              string_human_readable_size(max_memory_on_device) + " is already allocated");
+    return;
+  }
+  assert(device_pointer);
+
+  mem.device_pointer = reinterpret_cast<ccl::device_ptr>(device_pointer);
+  mem.device_size = memory_size;
+
+  stats.mem_alloc(memory_size);
+}
+
+void OneapiDevice::generic_copy_to(device_memory &mem)
+{
+  size_t memory_size = mem.memory_size();
+
+  /* Copy operation from host shouldn't be requested if there is no memory allocated on host. */
+  assert(mem.host_pointer);
+  assert(device_queue_);
+  oneapi_dll_.oneapi_usm_memcpy(
+      device_queue_, (void *)mem.device_pointer, (void *)mem.host_pointer, memory_size);
+}
+
+/* TODO: Make sycl::queue part of OneapiQueue and avoid using pointers to sycl::queue. */
+SyclQueue *OneapiDevice::sycl_queue()
+{
+  return device_queue_;
+}
+
+string OneapiDevice::oneapi_error_message()
+{
+  return string(oneapi_error_string_);
+}
+
+OneAPIDLLInterface OneapiDevice::oneapi_dll_object()
+{
+  return oneapi_dll_;
+}
+
+void *OneapiDevice::kernel_globals_device_pointer()
+{
+  return kg_memory_device_;
+}
+
+void OneapiDevice::generic_free(device_memory &mem)
+{
+  assert(mem.device_pointer);
+  stats.mem_free(mem.device_size);
+  mem.device_size = 0;
+
+  assert(device_queue_);
+  oneapi_dll_.oneapi_usm_free(device_queue_, (void *)mem.device_pointer);
+  mem.device_pointer = 0;
+}
+
+void OneapiDevice::mem_alloc(device_memory &mem)
+{
+  if (mem.type == MEM_TEXTURE) {
+    assert(!"mem_alloc not supported for textures.");
+  }
+  else if (mem.type == MEM_GLOBAL) {
+    assert(!"mem_alloc not supported for global memory.");
+  }
+  else {
+    if (mem.name) {
+      VLOG_DEBUG << "OneapiDevice::mem_alloc: \"" << mem.name << "\", "
+                 << string_human_readable_number(mem.memory_size()) << " bytes. ("
+                 << string_human_readable_size(mem.memory_size()) << ")";
+    }
+    generic_alloc(mem);
+  }
+}
+
+void OneapiDevice::mem_copy_to(device_memory &mem)
+{
+  if (mem.name) {
+    VLOG_DEBUG << "OneapiDevice::mem_copy_to: \"" << mem.name << "\", "
+               << string_human_readable_number(mem.memory_size()) << " bytes. ("
+               << string_human_readable_size(mem.memory_size()) << ")";
+  }
+
+  if (mem.type == MEM_GLOBAL) {
+    global_free(mem);
+    global_alloc(mem);
+  }
+  else if (mem.type == MEM_TEXTURE) {
+    tex_free((device_texture &)mem);
+    tex_alloc((device_texture &)mem);
+  }
+  else {
+    if (!mem.device_pointer)
+      mem_alloc(mem);
+
+    generic_copy_to(mem);
+  }
+}
+
+void OneapiDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem)
+{
+  if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) {
+    assert(!"mem_copy_from not supported for textures.");
+  }
+  else if (mem.host_pointer) {
+    const size_t size = (w > 0 || h > 0 || elem > 0) ? (elem * w * h) : mem.memory_size();
+    const size_t offset = elem * y * w;
+
+    if (mem.name) {
+      VLOG_DEBUG << "OneapiDevice::mem_copy_from: \"" << mem.name << "\" object of "
+                 << string_human_readable_number(mem.memory_size()) << " bytes. ("
+                 << string_human_readable_size(mem.memory_size()) << ") from offset " << offset
+                 << " data " << size << " bytes";
+    }
+
+    assert(device_queue_);
+
+    assert(size != 0);
+    assert(mem.device_pointer);
+    char *shifted_host = reinterpret_cast<char *>(mem.host_pointer) + offset;
+    char *shifted_device = reinterpret_cast<char *>(mem.device_pointer) + offset;
+    bool is_finished_ok = oneapi_dll_.oneapi_usm_memcpy(
+        device_queue_, shifted_host, shifted_device, size);
+    if (is_finished_ok == false) {
+      set_error("oneAPI memory operation error: got runtime exception \"" + oneapi_error_string_ +
+                "\"");
+    }
+  }
+}
+
+void OneapiDevice::mem_zero(device_memory &mem)
+{
+  if (mem.name) {
+    VLOG_DEBUG << "OneapiDevice::mem_zero: \"" << mem.name << "\", "
+               << string_human_readable_number(mem.memory_size()) << " bytes. ("
+               << string_human_readable_size(mem.memory_size()) << ")\n";
+  }
+
+  if (!mem.device_pointer) {
+    mem_alloc(mem);
+  }
+  if (!mem.device_pointer) {
+    return;
+  }
+
+  assert(device_queue_);
+  bool is_finished_ok = oneapi_dll_.oneapi_usm_memset(
+      device_queue_, (void *)mem.device_pointer, 0, mem.memory_size());
+  if (is_finished_ok == false) {
+    set_error("oneAPI memory operation error: got runtime exception \"" + oneapi_error_string_ +
+              "\"");
+  }
+}
+
+void OneapiDevice::mem_free(device_memory &mem)
+{
+  if (mem.name) {
+    VLOG_DEBUG << "OneapiDevice::mem_free: \"" << mem.name << "\", "
+               << string_human_readable_number(mem.device_size) << " bytes. ("
+               << string_human_readable_size(mem.device_size) << ")\n";
+  }
+
+  if (mem.type == MEM_GLOBAL) {
+    global_free(mem);
+  }
+  else if (mem.type == MEM_TEXTURE) {
+    tex_free((device_texture &)mem);
+  }
+  else {
+    generic_free(mem);
+  }
+}
+
+device_ptr OneapiDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
+{
+  return reinterpret_cast<device_ptr>(reinterpret_cast<char *>(mem.device_pointer) +
+                                      mem.memory_elements_size(offset));
+}
+
+void OneapiDevice::const_copy_to(const char *name, void *host, size_t size)
+{
+  assert(name);
+
+  VLOG_DEBUG << "OneapiDevice::const_copy_to \"" << name << "\" object "
+             << string_human_readable_number(size) << " bytes. ("
+             << string_human_readable_size(size) << ")";
+
+  ConstMemMap::iterator i = const_mem_map_.find(name);
+  device_vector<uchar> *data;
+
+  if (i == const_mem_map_.end()) {
+    data = new device_vector<uchar>(this, name, MEM_READ_ONLY);
+    data->alloc(size);
+    const_mem_map_.insert(ConstMemMap::value_type(name, data));
+  }
+  else {
+    data = i->second;
+  }
+
+  assert(data->memory_size() <= size);
+  memcpy(data->data(), host, size);
+  data->copy_to_device();
+
+  oneapi_dll_.oneapi_set_global_memory(
+      device_queue_, kg_memory_, name, (void *)data->device_pointer);
+
+  oneapi_dll_.oneapi_usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_);
+}
+
+void OneapiDevice::global_alloc(device_memory &mem)
+{
+  assert(mem.name);
+
+  size_t size = mem.memory_size();
+  VLOG_DEBUG << "OneapiDevice::global_alloc \"" << mem.name << "\" object "
+             << string_human_readable_number(size) << " bytes. ("
+             << string_human_readable_size(size) << ")";
+
+  generic_alloc(mem);
+  generic_copy_to(mem);
+
+  oneapi_dll_.oneapi_set_global_memory(
+      device_queue_, kg_memory_, mem.name, (void *)mem.device_pointer);
+
+  oneapi_dll_.oneapi_usm_memcpy(device_queue_, kg_memory_device_, kg_memory_, kg_memory_size_);
+}
+
+void OneapiDevice::global_free(device_memory &mem)
+{
+  if (mem.device_pointer) {
+    generic_free(mem);
+  }
+}
+
+void OneapiDevice::tex_alloc(device_texture &mem)
+{
+  generic_alloc(mem);
+  generic_copy_to(mem);
+
+  /* Resize if needed. Also, in case of resize - allocate in advance for future allocs. */
+  const uint slot = mem.slot;
+  if (slot >= texture_info_.size()) {
+    texture_info_.resize(slot + 128);
+  }
+
+  texture_info_[slot] = mem.info;
+  need_texture_info_ = true;
+
+  texture_info_[slot].data = (uint64_t)mem.device_pointer;
+}
+
+void OneapiDevice::tex_free(device_texture &mem)
+{
+  /* There is no texture memory in SYCL. */
+  if (mem.device_pointer) {
+    generic_free(mem);
+  }
+}
+
+unique_ptr<DeviceQueue> OneapiDevice::gpu_queue_create()
+{
+  return make_unique<OneapiDeviceQueue>(this);
+}
+
+int OneapiDevice::get_num_multiprocessors()
+{
+  assert(device_queue_);
+  return oneapi_dll_.oneapi_get_num_multiprocessors(device_queue_);
+}
+
+int OneapiDevice::get_max_num_threads_per_multiprocessor()
+{
+  assert(device_queue_);
+  return oneapi_dll_.oneapi_get_max_num_threads_per_multiprocessor(device_queue_);
+}
+
+bool OneapiDevice::should_use_graphics_interop()
+{
+  /* NOTE(@nsirgien): oneAPI doesn't yet support direct writing into graphics API objects, so
+   * return false. */
+  return false;
+}
+
+void *OneapiDevice::usm_aligned_alloc_host(size_t memory_size, size_t alignment)
+{
+  assert(device_queue_);
+  return oneapi_dll_.oneapi_usm_aligned_alloc_host(device_queue_, memory_size, alignment);
+}
+
+void OneapiDevice::usm_free(void *usm_ptr)
+{
+  assert(device_queue_);
+  return oneapi_dll_.oneapi_usm_free(device_queue_, usm_ptr);
+}
+
+CCL_NAMESPACE_END
+
+#endif
--- a/intern/cycles/device/oneapi/device_impl.h
+++ b/intern/cycles/device/oneapi/device_impl.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2021-2022 Intel Corporation */
+
+#ifdef WITH_ONEAPI
+
+#  include "device/device.h"
+#  include "device/oneapi/device.h"
+#  include "device/oneapi/queue.h"
+
+#  include "util/map.h"
+
+CCL_NAMESPACE_BEGIN
+
+class DeviceQueue;
+
+class OneapiDevice : public Device {
+ private:
+  SyclQueue *device_queue_;
+
+  using ConstMemMap = map<string, device_vector<uchar> *>;
+  ConstMemMap const_mem_map_;
+  device_vector<TextureInfo> texture_info_;
+  bool need_texture_info_;
+  void *kg_memory_;
+  void *kg_memory_device_;
+  size_t kg_memory_size_ = (size_t)0;
+  OneAPIDLLInterface oneapi_dll_;
+  std::string oneapi_error_string_;
+
+ public:
+  virtual BVHLayoutMask get_bvh_layout_mask() const override;
+
+  OneapiDevice(const DeviceInfo &info,
+               OneAPIDLLInterface &oneapi_dll_object,
+               Stats &stats,
+               Profiler &profiler);
+
+  virtual ~OneapiDevice();
+
+  bool check_peer_access(Device *peer_device) override;
+
+  bool load_kernels(const uint requested_features) override;
+
+  void load_texture_info();
+
+  void generic_alloc(device_memory &mem);
+
+  void generic_copy_to(device_memory &mem);
+
+  void generic_free(device_memory &mem);
+
+  SyclQueue *sycl_queue();
+
+  string oneapi_error_message();
+
+  OneAPIDLLInterface oneapi_dll_object();
+
+  void *kernel_globals_device_pointer();
+
+  void mem_alloc(device_memory &mem) override;
+
+  void mem_copy_to(device_memory &mem) override;
+
+  void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
+
+  void mem_copy_from(device_memory &mem)
+  {
+    mem_copy_from(mem, 0, 0, 0, 0);
+  }
+
+  void mem_zero(device_memory &mem) override;
+
+  void mem_free(device_memory &mem) override;
+
+  device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
+
+  virtual void const_copy_to(const char *name, void *host, size_t size) override;
+
+  void global_alloc(device_memory &mem);
+
+  void global_free(device_memory &mem);
+
+  void tex_alloc(device_texture &mem);
+
+  void tex_free(device_texture &mem);
+
+  /* Graphics resources interoperability. */
+  virtual bool should_use_graphics_interop() override;
+
+  virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
+
+  int get_num_multiprocessors();
+  int get_max_num_threads_per_multiprocessor();
+
+  /* NOTE(@nsirgien): Create this methods to avoid some compilation problems on Windows with host
+   * side compilation (MSVC). */
+  void *usm_aligned_alloc_host(size_t memory_size, size_t alignment);
+  void usm_free(void *usm_ptr);
+};
+
+CCL_NAMESPACE_END
+
+#endif
--- a/intern/cycles/device/oneapi/dll_interface.h
+++ b/intern/cycles/device/oneapi/dll_interface.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+/* Include kernel header to get access to SYCL-specific types, like SyclQueue and
+ * OneAPIDeviceIteratorCallback. */
+#include "kernel/device/oneapi/kernel.h"
+
+#ifdef WITH_ONEAPI
+struct OneAPIDLLInterface {
+#  define DLL_INTERFACE_CALL(function, return_type, ...) \
+    return_type (*function)(__VA_ARGS__) = nullptr;
+#  include "kernel/device/oneapi/dll_interface_template.h"
+#  undef DLL_INTERFACE_CALL
+};
+#endif
--- a/intern/cycles/device/oneapi/queue.cpp
+++ b/intern/cycles/device/oneapi/queue.cpp
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2021-2022 Intel Corporation */
+
+#ifdef WITH_ONEAPI
+
+#  include "device/oneapi/queue.h"
+#  include "device/oneapi/device_impl.h"
+#  include "util/log.h"
+#  include "util/time.h"
+#  include <iomanip>
+#  include <vector>
+
+#  include "kernel/device/oneapi/kernel.h"
+
+CCL_NAMESPACE_BEGIN
+
+struct KernelExecutionInfo {
+  double elapsed_summary = 0.0;
+  int enqueue_count = 0;
+};
+
+/* OneapiDeviceQueue */
+
+OneapiDeviceQueue::OneapiDeviceQueue(OneapiDevice *device)
+    : DeviceQueue(device),
+      oneapi_device_(device),
+      oneapi_dll_(device->oneapi_dll_object()),
+      kernel_context_(nullptr)
+{
+}
+
+OneapiDeviceQueue::~OneapiDeviceQueue()
+{
+  delete kernel_context_;
+}
+
+int OneapiDeviceQueue::num_concurrent_states(const size_t state_size) const
+{
+  const int max_num_threads = oneapi_device_->get_num_multiprocessors() *
+                              oneapi_device_->get_max_num_threads_per_multiprocessor();
+  int num_states = max(8 * max_num_threads, 65536) * 16;
+
+  VLOG_DEVICE_STATS << "GPU queue concurrent states: " << num_states << ", using up to "
+                    << string_human_readable_size(num_states * state_size);
+
+  return num_states;
+}
+
+int OneapiDeviceQueue::num_concurrent_busy_states() const
+{
+  const int max_num_threads = oneapi_device_->get_num_multiprocessors() *
+                              oneapi_device_->get_max_num_threads_per_multiprocessor();
+
+  return 4 * max(8 * max_num_threads, 65536);
+}
+
+void OneapiDeviceQueue::init_execution()
+{
+  oneapi_device_->load_texture_info();
+
+  SyclQueue *device_queue = oneapi_device_->sycl_queue();
+  void *kg_dptr = (void *)oneapi_device_->kernel_globals_device_pointer();
+  assert(device_queue);
+  assert(kg_dptr);
+  kernel_context_ = new KernelContext{device_queue, kg_dptr};
+
+  debug_init_execution();
+}
+
+bool OneapiDeviceQueue::enqueue(DeviceKernel kernel,
+                                const int signed_kernel_work_size,
+                                DeviceKernelArguments const &_args)
+{
+  if (oneapi_device_->have_error()) {
+    return false;
+  }
+
+  void **args = const_cast<void **>(_args.values);
+
+  debug_enqueue(kernel, signed_kernel_work_size);
+  assert(signed_kernel_work_size >= 0);
+  size_t kernel_work_size = (size_t)signed_kernel_work_size;
+
+  size_t kernel_local_size = oneapi_dll_.oneapi_kernel_preferred_local_size(
+      kernel_context_->queue, (::DeviceKernel)kernel, kernel_work_size);
+  size_t uniformed_kernel_work_size = round_up(kernel_work_size, kernel_local_size);
+
+  assert(kernel_context_);
+
+  /* Call the oneAPI kernel DLL to launch the requested kernel. */
+  bool is_finished_ok = oneapi_dll_.oneapi_enqueue_kernel(
+      kernel_context_, kernel, uniformed_kernel_work_size, args);
+
+  if (is_finished_ok == false) {
+    oneapi_device_->set_error("oneAPI kernel \"" + std::string(device_kernel_as_string(kernel)) +
+                              "\" execution error: got runtime exception \"" +
+                              oneapi_device_->oneapi_error_message() + "\"");
+  }
+
+  return is_finished_ok;
+}
+
+bool OneapiDeviceQueue::synchronize()
+{
+  if (oneapi_device_->have_error()) {
+    return false;
+  }
+
+  bool is_finished_ok = oneapi_dll_.oneapi_queue_synchronize(oneapi_device_->sycl_queue());
+  if (is_finished_ok == false)
+    oneapi_device_->set_error("oneAPI unknown kernel execution error: got runtime exception \"" +
+                              oneapi_device_->oneapi_error_message() + "\"");
+
+  debug_synchronize();
+
+  return !(oneapi_device_->have_error());
+}
+
+void OneapiDeviceQueue::zero_to_device(device_memory &mem)
+{
+  oneapi_device_->mem_zero(mem);
+}
+
+void OneapiDeviceQueue::copy_to_device(device_memory &mem)
+{
+  oneapi_device_->mem_copy_to(mem);
+}
+
+void OneapiDeviceQueue::copy_from_device(device_memory &mem)
+{
+  oneapi_device_->mem_copy_from(mem);
+}
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_ONEAPI */
--- a/intern/cycles/device/oneapi/queue.h
+++ b/intern/cycles/device/oneapi/queue.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2021-2022 Intel Corporation */
+
+#pragma once
+
+#ifdef WITH_ONEAPI
+
+#  include "device/kernel.h"
+#  include "device/memory.h"
+#  include "device/queue.h"
+
+#  include "device/oneapi/device.h"
+#  include "device/oneapi/dll_interface.h"
+
+CCL_NAMESPACE_BEGIN
+
+class OneapiDevice;
+class device_memory;
+
+/* Base class for OneAPI queues. */
+class OneapiDeviceQueue : public DeviceQueue {
+ public:
+  explicit OneapiDeviceQueue(OneapiDevice *device);
+  ~OneapiDeviceQueue();
+
+  virtual int num_concurrent_states(const size_t state_size) const override;
+
+  virtual int num_concurrent_busy_states() const override;
+
+  virtual void init_execution() override;
+
+  virtual bool enqueue(DeviceKernel kernel,
+                       const int kernel_work_size,
+                       DeviceKernelArguments const &args) override;
+
+  virtual bool synchronize() override;
+
+  virtual void zero_to_device(device_memory &mem) override;
+  virtual void copy_to_device(device_memory &mem) override;
+  virtual void copy_from_device(device_memory &mem) override;
+
+ protected:
+  OneapiDevice *oneapi_device_;
+  OneAPIDLLInterface oneapi_dll_;
+  KernelContext *kernel_context_;
+  bool with_kernel_statistics_;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_ONEAPI */
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -26,7 +26,6 @@
 #  include "util/task.h"
 #  include "util/time.h"

-#  undef __KERNEL_CPU__
 #  define __KERNEL_OPTIX__
 #  include "kernel/device/optix/globals.h"

@@ -2047,7 +2046,7 @@ void OptiXDevice::const_copy_to(const char *name, void *host, size_t size)

    /* Update traversable handle (since it is different for each device on multi devices). */
    KernelData *const data = (KernelData *)host;
-    *(OptixTraversableHandle *)&data->bvh.scene = tlas_handle;
+    *(OptixTraversableHandle *)&data->device_bvh = tlas_handle;

    update_launch_params(offsetof(KernelParamsOptiX, data), host, size);
    return;
--- a/intern/cycles/device/optix/queue.cpp
+++ b/intern/cycles/device/optix/queue.cpp
@@ -8,7 +8,6 @@

 #  include "util/time.h"

-#  undef __KERNEL_CPU__
 #  define __KERNEL_OPTIX__
 #  include "kernel/device/optix/globals.h"

--- a/intern/cycles/device/queue.h
+++ b/intern/cycles/device/queue.h
@@ -105,6 +105,13 @@ class DeviceQueue {
   * value. */
  virtual int num_concurrent_busy_states() const = 0;

+  /* Number of elements in a partition of sorted shaders, that improves memory locality of
+   * integrator state fetch at the cost of decreased coherence for shader kernel execution. */
+  virtual int num_sort_partition_elements() const
+  {
+    return 65536;
+  }
+
  /* Initialize execution of kernels on this queue.
   *
   * Will, for example, load all data required by the kernels from Device to global or path state.
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -373,7 +373,7 @@ void PathTrace::path_trace(RenderWork &render_work)
    work_balance_infos_[i].time_spent += work_time;
    work_balance_infos_[i].occupancy = statistics.occupancy;

-    VLOG_WORK << "Rendered " << num_samples << " samples in " << work_time << " seconds ("
+    VLOG_INFO << "Rendered " << num_samples << " samples in " << work_time << " seconds ("
              << work_time / num_samples
              << " seconds per sample), occupancy: " << statistics.occupancy;
  });
@@ -1103,6 +1103,8 @@ static const char *device_type_for_description(const DeviceType type)
      return "OptiX";
    case DEVICE_HIP:
      return "HIP";
+    case DEVICE_ONEAPI:
+      return "oneAPI";
    case DEVICE_DUMMY:
      return "Dummy";
    case DEVICE_MULTI:
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -181,27 +181,45 @@ void PathTraceWorkGPU::alloc_integrator_queue()

 void PathTraceWorkGPU::alloc_integrator_sorting()
 {
+  /* Compute sort partitions, to balance between memory locality and coherence.
+   * Sort partitioning becomes less effective when more shaders are in the wavefront. In lieu of a
+   * more sophisticated heuristic we simply disable sort partitioning if the shader count is high.
+   */
+  num_sort_partitions_ = 1;
+  if (device_scene_->data.max_shaders < 300) {
+    const int num_elements = queue_->num_sort_partition_elements();
+    if (num_elements) {
+      num_sort_partitions_ = max(max_num_paths_ / num_elements, 1);
+    }
+  }
+
+  integrator_state_gpu_.sort_partition_divisor = (int)divide_up(max_num_paths_,
+                                                                num_sort_partitions_);
+
  /* Allocate arrays for shader sorting. */
-  const int max_shaders = device_scene_->data.max_shaders;
-  if (integrator_shader_sort_counter_.size() < max_shaders) {
-    integrator_shader_sort_counter_.alloc(max_shaders);
+  const int sort_buckets = device_scene_->data.max_shaders * num_sort_partitions_;
+  if (integrator_shader_sort_counter_.size() < sort_buckets) {
+    integrator_shader_sort_counter_.alloc(sort_buckets);
    integrator_shader_sort_counter_.zero_to_device();
-
-    integrator_shader_raytrace_sort_counter_.alloc(max_shaders);
-    integrator_shader_raytrace_sort_counter_.zero_to_device();
-
-    integrator_shader_mnee_sort_counter_.alloc(max_shaders);
-    integrator_shader_mnee_sort_counter_.zero_to_device();
-
-    integrator_shader_sort_prefix_sum_.alloc(max_shaders);
-    integrator_shader_sort_prefix_sum_.zero_to_device();
-
    integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] =
        (int *)integrator_shader_sort_counter_.device_pointer;
-    integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
-        (int *)integrator_shader_raytrace_sort_counter_.device_pointer;
-    integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
-        (int *)integrator_shader_mnee_sort_counter_.device_pointer;
+
+    if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
+      integrator_shader_raytrace_sort_counter_.alloc(sort_buckets);
+      integrator_shader_raytrace_sort_counter_.zero_to_device();
+      integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
+          (int *)integrator_shader_raytrace_sort_counter_.device_pointer;
+    }
+
+    if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
+      integrator_shader_mnee_sort_counter_.alloc(sort_buckets);
+      integrator_shader_mnee_sort_counter_.zero_to_device();
+      integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
+          (int *)integrator_shader_mnee_sort_counter_.device_pointer;
+    }
+
+    integrator_shader_sort_prefix_sum_.alloc(sort_buckets);
+    integrator_shader_sort_prefix_sum_.zero_to_device();
  }
 }

@@ -333,8 +351,12 @@ void PathTraceWorkGPU::enqueue_reset()
  queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_RESET, max_num_paths_, args);
  queue_->zero_to_device(integrator_queue_counter_);
  queue_->zero_to_device(integrator_shader_sort_counter_);
-  queue_->zero_to_device(integrator_shader_raytrace_sort_counter_);
-  queue_->zero_to_device(integrator_shader_mnee_sort_counter_);
+  if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
+    queue_->zero_to_device(integrator_shader_raytrace_sort_counter_);
+  }
+  if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
+    queue_->zero_to_device(integrator_shader_mnee_sort_counter_);
+  }

  /* Tiles enqueue need to know number of active paths, which is based on this counter. Zero the
   * counter on the host side because `zero_to_device()` is not doing it. */
@@ -486,9 +508,9 @@ void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel,
  /* Compute prefix sum of number of active paths with each shader. */
  {
    const int work_size = 1;
-    int max_shaders = device_scene_->data.max_shaders;
+    int sort_buckets = device_scene_->data.max_shaders * num_sort_partitions_;

-    DeviceKernelArguments args(&d_counter, &d_prefix_sum, &max_shaders);
+    DeviceKernelArguments args(&d_counter, &d_prefix_sum, &sort_buckets);

    queue_->enqueue(DEVICE_KERNEL_PREFIX_SUM, work_size, args);
  }
--- a/intern/cycles/integrator/path_trace_work_gpu.h
+++ b/intern/cycles/integrator/path_trace_work_gpu.h
@@ -156,6 +156,9 @@ class PathTraceWorkGPU : public PathTraceWork {
  bool interop_use_checked_ = false;
  bool interop_use_ = false;

+  /* Number of partitions to sort state indices into prior to material sort. */
+  int num_sort_partitions_;
+
  /* Maximum number of concurrent integrator states. */
  int max_num_paths_;

--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -37,7 +37,12 @@ set(SRC_KERNEL_DEVICE_OPTIX
  device/optix/kernel_shader_raytrace.cu
 )

+set(SRC_KERNEL_DEVICE_ONEAPI
+  device/oneapi/kernel.cpp
+)
+
 set(SRC_KERNEL_DEVICE_CPU_HEADERS
+  device/cpu/bvh.h
  device/cpu/compat.h
  device/cpu/image.h
  device/cpu/globals.h
@@ -67,17 +72,30 @@ set(SRC_KERNEL_DEVICE_HIP_HEADERS
 )

 set(SRC_KERNEL_DEVICE_OPTIX_HEADERS
+  device/optix/bvh.h
  device/optix/compat.h
  device/optix/globals.h
 )

 set(SRC_KERNEL_DEVICE_METAL_HEADERS
+  device/metal/bvh.h
  device/metal/compat.h
  device/metal/context_begin.h
  device/metal/context_end.h
+  device/metal/function_constants.h
  device/metal/globals.h
 )

+set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS
+  device/oneapi/compat.h
+  device/oneapi/context_begin.h
+  device/oneapi/context_end.h
+  device/oneapi/globals.h
+  device/oneapi/image.h
+  device/oneapi/kernel.h
+  device/oneapi/kernel_templates.h
+)
+
 set(SRC_KERNEL_CLOSURE_HEADERS
  closure/alloc.h
  closure/bsdf.h
@@ -140,6 +158,7 @@ set(SRC_KERNEL_SVM_HEADERS
  svm/math_util.h
  svm/mix.h
  svm/musgrave.h
+  svm/node_types_template.h
  svm/noise.h
  svm/noisetex.h
  svm/normal.h
@@ -198,8 +217,6 @@ set(SRC_KERNEL_BVH_HEADERS
  bvh/util.h
  bvh/volume.h
  bvh/volume_all.h
-  bvh/embree.h
-  bvh/metal.h
 )

 set(SRC_KERNEL_CAMERA_HEADERS
@@ -268,6 +285,7 @@ set(SRC_KERNEL_UTIL_HEADERS

 set(SRC_KERNEL_TYPES_HEADERS
  data_arrays.h
+  data_template.h
  tables.h
  types.h
 )
@@ -299,6 +317,7 @@ set(SRC_UTIL_HEADERS
  ../util/math_float2.h
  ../util/math_float3.h
  ../util/math_float4.h
+  ../util/math_float8.h
  ../util/math_int2.h
  ../util/math_int3.h
  ../util/math_int4.h
@@ -336,8 +355,6 @@ set(SRC_UTIL_HEADERS
  ../util/types_uint4.h
  ../util/types_uint4_impl.h
  ../util/types_ushort4.h
-  ../util/types_vector3.h
-  ../util/types_vector3_impl.h
 )

 set(LIB
@@ -687,6 +704,209 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
  cycles_set_solution_folder(cycles_kernel_optix)
 endif()

+if(WITH_CYCLES_DEVICE_ONEAPI)
+  if(WIN32)
+    set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi.dll)
+  else()
+    set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi.so)
+  endif()
+
+  set(cycles_oneapi_kernel_sources
+    ${SRC_KERNEL_DEVICE_ONEAPI}
+    ${SRC_KERNEL_HEADERS}
+    ${SRC_KERNEL_DEVICE_GPU_HEADERS}
+    ${SRC_KERNEL_DEVICE_ONEAPI_HEADERS}
+    ${SRC_UTIL_HEADERS}
+  )
+
+  # SYCL_CPP_FLAGS is a variable that the user can set to pass extra compiler options
+  set(sycl_compiler_flags
+      ${CMAKE_CURRENT_SOURCE_DIR}/${SRC_KERNEL_DEVICE_ONEAPI}
+      -fsycl
+      -fsycl-unnamed-lambda
+      -fdelayed-template-parsing
+      -mllvm -inlinedefault-threshold=300
+      -mllvm -inlinehint-threshold=400
+      -shared
+      -DWITH_ONEAPI
+      -ffast-math
+      -DNDEBUG
+      -O2
+      -o ${cycles_kernel_oneapi_lib}
+      -I${CMAKE_CURRENT_SOURCE_DIR}/..
+      ${SYCL_CPP_FLAGS}
+      )
+
+
+  if (WITH_CYCLES_ONEAPI_SYCL_HOST_ENABLED)
+    list(APPEND sycl_compiler_flags -DWITH_ONEAPI_SYCL_HOST_ENABLED)
+  endif()
+
+  # Set defaults for spir64 and spir64_gen options
+  if (NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
+    set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'")
+  endif()
+  if (NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen)
+    SET (CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}" CACHE STRING "Extra build options for spir64_gen target")
+  endif()
+  # enabling zebin (graphics binary format with improved compatibility) on Windows only while support on Linux isn't available yet
+  if(WIN32)
+    string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "--format zebin ")
+  endif()
+  string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "-device ${CYCLES_ONEAPI_SPIR64_GEN_DEVICES} ")
+
+  if (WITH_CYCLES_ONEAPI_BINARIES)
+    # Iterate over all targest and their options
+    list (JOIN CYCLES_ONEAPI_SYCL_TARGETS "," targets_string)
+    list (APPEND sycl_compiler_flags -fsycl-targets=${targets_string})
+    foreach(target ${CYCLES_ONEAPI_SYCL_TARGETS})
+      if(DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_${target})
+        list (APPEND sycl_compiler_flags -Xsycl-target-backend=${target} "${CYCLES_ONEAPI_SYCL_OPTIONS_${target}}")
+      endif()
+    endforeach()
+  else()
+    # If AOT is disabled, build for spir64
+    list(APPEND sycl_compiler_flags
+      -fsycl-targets=spir64
+      -Xsycl-target-backend=spir64 "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}")
+  endif()
+
+  if(WITH_NANOVDB)
+    list(APPEND sycl_compiler_flags
+      -DWITH_NANOVDB
+      -I"${NANOVDB_INCLUDE_DIR}")
+  endif()
+
+  if(WITH_CYCLES_DEBUG)
+    list(APPEND sycl_compiler_flags -DWITH_CYCLES_DEBUG)
+  endif()
+
+  get_filename_component(sycl_compiler_root ${SYCL_COMPILER} DIRECTORY)
+  get_filename_component(sycl_compiler_compiler_name ${SYCL_COMPILER} NAME_WE)
+
+  if(NOT OCLOC_INSTALL_DIR)
+    get_filename_component(OCLOC_INSTALL_DIR "${sycl_compiler_root}/../lib/ocloc" ABSOLUTE)
+  endif()
+  if(WITH_CYCLES_ONEAPI_BINARIES AND NOT EXISTS ${OCLOC_INSTALL_DIR})
+    message(FATAL_ERROR "WITH_CYCLES_ONEAPI_BINARIES requires ocloc but ${OCLOC_INSTALL_DIR} directory doesn't exist."
+                        " A different ocloc directory can be set using OCLOC_INSTALL_DIR cmake variable.")
+  endif()
+
+  if(UNIX AND NOT APPLE)
+    if(NOT WITH_CXX11_ABI)
+      check_library_exists(sycl
+        _ZN2cl4sycl7handler22verifyUsedKernelBundleERKSs ${sycl_compiler_root}/../lib SYCL_NO_CXX11_ABI)
+      if(SYCL_NO_CXX11_ABI)
+        list(APPEND sycl_compiler_flags -D_GLIBCXX_USE_CXX11_ABI=0)
+      endif()
+    endif()
+  endif()
+
+  if(WIN32)
+    list(APPEND sycl_compiler_flags
+    -fms-extensions
+    -fms-compatibility
+    -D_WINDLL
+    -D_MBCS
+    -DWIN32
+    -D_WINDOWS
+    -D_CRT_NONSTDC_NO_DEPRECATE
+    -D_CRT_SECURE_NO_DEPRECATE
+    -DONEAPI_EXPORT)
+
+    if(sycl_compiler_compiler_name MATCHES "dpcpp")
+      # The oneAPI distribution calls the compiler "dpcpp" and comes with a script that sets environment variables.
+      add_custom_command(
+        OUTPUT ${cycles_kernel_oneapi_lib}
+        COMMAND "${sycl_compiler_root}/../../env/vars.bat"
+        COMMAND ${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}
+        DEPENDS ${cycles_oneapi_kernel_sources})
+    else()
+      # The open source SYCL compiler just goes by clang++ and does not have such a script.
+      # Set the variables manually.
+      string(REPLACE /Redist/ /Tools/ MSVC_TOOLS_DIR ${MSVC_REDIST_DIR})
+      if(NOT CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION) # case for Ninja on Windows
+        get_filename_component(cmake_mt_dir ${CMAKE_MT} DIRECTORY)
+        string(REPLACE /bin/ /Lib/ WINDOWS_KIT_DIR ${cmake_mt_dir})
+        get_filename_component(WINDOWS_KIT_DIR "${WINDOWS_KIT_DIR}/../" ABSOLUTE)
+      else()
+        set(WINDOWS_KIT_DIR ${WINDOWS_KITS_DIR}/Lib/${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION})
+      endif()
+      list(APPEND sycl_compiler_flags
+                  -L "${MSVC_TOOLS_DIR}/lib/x64"
+                  -L "${WINDOWS_KIT_DIR}/um/x64"
+                  -L "${WINDOWS_KIT_DIR}/ucrt/x64")
+      add_custom_command(
+        OUTPUT ${cycles_kernel_oneapi_lib}
+        COMMAND ${CMAKE_COMMAND} -E env
+                "LIB=${sycl_compiler_root}/../lib" # for compiler to find sycl.lib
+                "PATH=${OCLOC_INSTALL_DIR};${sycl_compiler_root}"
+                ${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}
+        DEPENDS ${cycles_oneapi_kernel_sources})
+    endif()
+  else()
+    list(APPEND sycl_compiler_flags -fPIC)
+
+    # We avoid getting __FAST_MATH__ to be defined when building on CentOS 7 until the compilation crash
+    # it triggers at either AoT or JIT stages gets fixed.
+    list(APPEND sycl_compiler_flags -fhonor-nans)
+
+    # add $ORIGIN to cycles_kernel_oneapi.so rpath so libsycl.so and
+    # libpi_level_zero.so can be placed next to it and get found.
+    list(APPEND sycl_compiler_flags -Wl,-rpath,'$$ORIGIN')
+
+    # The oneAPI distribution calls the compiler "dpcpp" and comes with a script that sets environment variables.
+    if(sycl_compiler_compiler_name MATCHES "dpcpp")
+      add_custom_command(
+        OUTPUT ${cycles_kernel_oneapi_lib}
+        COMMAND bash -c \"source ${sycl_compiler_root}/../../env/vars.sh&&${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}\"
+        DEPENDS ${cycles_oneapi_kernel_sources})
+    else()
+      # The open source SYCL compiler just goes by clang++ and does not have such a script.
+      # Set the variables manually.
+      if(NOT IGC_INSTALL_DIR)
+        get_filename_component(IGC_INSTALL_DIR "${sycl_compiler_root}/../lib/igc" ABSOLUTE)
+      endif()
+      add_custom_command(
+        OUTPUT ${cycles_kernel_oneapi_lib}
+        COMMAND ${CMAKE_COMMAND} -E env
+                "LD_LIBRARY_PATH=${sycl_compiler_root}/../lib:${OCLOC_INSTALL_DIR}/lib:${IGC_INSTALL_DIR}/lib"
+                "PATH=${OCLOC_INSTALL_DIR}/bin:${sycl_compiler_root}:$ENV{PATH}" # env PATH is for compiler to find ld
+                ${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}
+        DEPENDS ${cycles_oneapi_kernel_sources})
+    endif()
+  endif()
+
+  # install dynamic libraries required at runtime
+  if(WIN32)
+    set(SYCL_RUNTIME_DEPENDENCIES
+        sycl.dll
+        pi_level_zero.dll
+    )
+    if(NOT WITH_BLENDER)
+      # For the Cycles standalone put libraries next to the Cycles application.
+      delayed_install("${sycl_compiler_root}" "${SYCL_RUNTIME_DEPENDENCIES}" ${CYCLES_INSTALL_PATH})
+    else()
+      # For Blender put the libraries next to the Blender executable.
+      #
+      # Note that the installation path in the delayed_install is relative to the versioned folder,
+      # which means we need to go one level up.
+      delayed_install("${sycl_compiler_root}" "${SYCL_RUNTIME_DEPENDENCIES}" "../")
+    endif()
+  elseif(UNIX AND NOT APPLE)
+    file(GLOB SYCL_RUNTIME_DEPENDENCIES
+              ${sycl_compiler_root}/../lib/libsycl.so
+              ${sycl_compiler_root}/../lib/libsycl.so.[0-9]
+              ${sycl_compiler_root}/../lib/libsycl.so.[0-9].[0-9].[0-9]-[0-9]
+    )
+    list(APPEND SYCL_RUNTIME_DEPENDENCIES ${sycl_compiler_root}/../lib/libpi_level_zero.so)
+    delayed_install("" "${SYCL_RUNTIME_DEPENDENCIES}" ${CYCLES_INSTALL_PATH}/lib)
+  endif()
+
+  delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cycles_kernel_oneapi_lib}" ${CYCLES_INSTALL_PATH}/lib)
+  add_custom_target(cycles_kernel_oneapi ALL DEPENDS ${cycles_kernel_oneapi_lib})
+endif()
+
 # OSL module

 if(WITH_CYCLES_OSL)
@@ -752,6 +972,7 @@ cycles_add_library(cycles_kernel "${LIB}"
  ${SRC_KERNEL_DEVICE_HIP_HEADERS}
  ${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
  ${SRC_KERNEL_DEVICE_METAL_HEADERS}
+  ${SRC_KERNEL_DEVICE_ONEAPI_HEADERS}
 )

 source_group("bake" FILES ${SRC_KERNEL_BAKE_HEADERS})
@@ -764,6 +985,7 @@ source_group("device\\gpu" FILES ${SRC_KERNEL_DEVICE_GPU_HEADERS})
 source_group("device\\hip" FILES ${SRC_KERNEL_DEVICE_HIP} ${SRC_KERNEL_DEVICE_HIP_HEADERS})
 source_group("device\\optix" FILES ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS})
 source_group("device\\metal" FILES ${SRC_KERNEL_DEVICE_METAL} ${SRC_KERNEL_DEVICE_METAL_HEADERS})
+source_group("device\\oneapi" FILES ${SRC_KERNEL_DEVICE_ONEAPI} ${SRC_KERNEL_DEVICE_ONEAPI_HEADERS})
 source_group("film" FILES ${SRC_KERNEL_FILM_HEADERS})
 source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS})
 source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS})
@@ -782,6 +1004,9 @@ endif()
 if(WITH_CYCLES_HIP)
  add_dependencies(cycles_kernel cycles_kernel_hip)
 endif()
+if(WITH_CYCLES_DEVICE_ONEAPI)
+  add_dependencies(cycles_kernel cycles_kernel_oneapi)
+endif()

 # Install kernel source for runtime compilation

--- a/intern/cycles/kernel/bake/bake.h
+++ b/intern/cycles/kernel/bake/bake.h
@@ -29,14 +29,14 @@ ccl_device void kernel_displace_evaluate(KernelGlobals kg,
  object_inverse_dir_transform(kg, &sd, &D);

 #ifdef __KERNEL_DEBUG_NAN__
-  if (!isfinite3_safe(D)) {
+  if (!isfinite_safe(D)) {
    kernel_assert(!"Cycles displacement with non-finite value detected");
  }
 #endif

  /* Ensure finite displacement, preventing BVH from becoming degenerate and avoiding possible
   * traversal issues caused by non-finite math. */
-  D = ensure_finite3(D);
+  D = ensure_finite(D);

  /* Write output. */
  output[offset * 3 + 0] += D.x;
@@ -68,13 +68,13 @@ ccl_device void kernel_background_evaluate(KernelGlobals kg,
  float3 color = shader_background_eval(&sd);

 #ifdef __KERNEL_DEBUG_NAN__
-  if (!isfinite3_safe(color)) {
+  if (!isfinite_safe(color)) {
    kernel_assert(!"Cycles background with non-finite value detected");
  }
 #endif

  /* Ensure finite color, avoiding possible numerical instabilities in the path tracing kernels. */
-  color = ensure_finite3(color);
+  color = ensure_finite(color);

  /* Write output. */
  output[offset * 3 + 0] += color.x;
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -1,40 +1,47 @@
 /* SPDX-License-Identifier: Apache-2.0
 * Copyright 2011-2022 Blender Foundation */

-/* BVH
- *
- * Bounding volume hierarchy for ray tracing. We compile different variations
- * of the same BVH traversal function for faster rendering when some types of
- * primitives are not needed, using #includes to work around the lack of
- * C++ templates in OpenCL.
- *
- * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs",
- * the code has been extended and modified to support more primitives and work
- * with CPU/CUDA/OpenCL. */
-
 #pragma once

-#ifdef __EMBREE__
-#  include "kernel/bvh/embree.h"
-#endif
-
-#ifdef __METALRT__
-#  include "kernel/bvh/metal.h"
-#endif
-
 #include "kernel/bvh/types.h"
 #include "kernel/bvh/util.h"

 #include "kernel/integrator/state_util.h"

+/* Device specific acceleration structures for ray tracing. */
+
+#if defined(__EMBREE__)
+#  include "kernel/device/cpu/bvh.h"
+#  define __BVH2__
+#elif defined(__METALRT__)
+#  include "kernel/device/metal/bvh.h"
+#elif defined(__KERNEL_OPTIX__)
+#  include "kernel/device/optix/bvh.h"
+#else
+#  define __BVH2__
+#endif
+
 CCL_NAMESPACE_BEGIN

-#if !defined(__KERNEL_GPU_RAYTRACING__)
+#ifdef __BVH2__

-/* Regular BVH traversal */
+/* BVH2
+ *
+ * Bounding volume hierarchy for ray tracing, when no native acceleration
+ * structure is available for the device.
+
+ * We compile different variations of the same BVH traversal function for
+ * faster rendering when some types of primitives are not needed, using #includes
+ * to work around the lack of C++ templates in OpenCL.
+ *
+ * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs",
+ * the code has been extended and modified to support more primitives and work
+ * with CPU and various GPU kernel languages. */

 #  include "kernel/bvh/nodes.h"

+/* Regular BVH traversal */
+
 #  define BVH_FUNCTION_NAME bvh_intersect
 #  define BVH_FUNCTION_FEATURES BVH_POINTCLOUD
 #  include "kernel/bvh/traversal.h"
@@ -57,9 +64,46 @@ CCL_NAMESPACE_BEGIN
 #    include "kernel/bvh/traversal.h"
 #  endif

-/* Subsurface scattering BVH traversal */
+ccl_device_intersect bool scene_intersect(KernelGlobals kg,
+                                          ccl_private const Ray *ray,
+                                          const uint visibility,
+                                          ccl_private Intersection *isect)
+{
+  if (!intersection_ray_valid(ray)) {
+    return false;
+  }
+
+#  ifdef __EMBREE__
+  if (kernel_data.device_bvh) {
+    return kernel_embree_intersect(kg, ray, visibility, isect);
+  }
+#  endif
+
+#  ifdef __OBJECT_MOTION__
+  if (kernel_data.bvh.have_motion) {
+#    ifdef __HAIR__
+    if (kernel_data.bvh.have_curves) {
+      return bvh_intersect_hair_motion(kg, ray, isect, visibility);
+    }
+#    endif /* __HAIR__ */
+
+    return bvh_intersect_motion(kg, ray, isect, visibility);
+  }
+#  endif /* __OBJECT_MOTION__ */
+
+#  ifdef __HAIR__
+  if (kernel_data.bvh.have_curves) {
+    return bvh_intersect_hair(kg, ray, isect, visibility);
+  }
+#  endif /* __HAIR__ */
+
+  return bvh_intersect(kg, ray, isect, visibility);
+}
+
+/* Single object BVH traversal, for SSS/AO/bevel. */
+
+#  ifdef __BVH_LOCAL__

-#  if defined(__BVH_LOCAL__)
 #    define BVH_FUNCTION_NAME bvh_intersect_local
 #    define BVH_FUNCTION_FEATURES BVH_HAIR
 #    include "kernel/bvh/local.h"
@@ -69,25 +113,40 @@ CCL_NAMESPACE_BEGIN
 #      define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
 #      include "kernel/bvh/local.h"
 #    endif
-#  endif /* __BVH_LOCAL__ */

-/* Volume BVH traversal */
+ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
+                                                ccl_private const Ray *ray,
+                                                ccl_private LocalIntersection *local_isect,
+                                                int local_object,
+                                                ccl_private uint *lcg_state,
+                                                int max_hits)
+{
+  if (!intersection_ray_valid(ray)) {
+    if (local_isect) {
+      local_isect->num_hits = 0;
+    }
+    return false;
+  }

-#  if defined(__VOLUME__)
-#    define BVH_FUNCTION_NAME bvh_intersect_volume
-#    define BVH_FUNCTION_FEATURES BVH_HAIR
-#    include "kernel/bvh/volume.h"
-
-#    if defined(__OBJECT_MOTION__)
-#      define BVH_FUNCTION_NAME bvh_intersect_volume_motion
-#      define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
-#      include "kernel/bvh/volume.h"
+#    ifdef __EMBREE__
+  if (kernel_data.device_bvh) {
+    return kernel_embree_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
+  }
 #    endif
-#  endif /* __VOLUME__ */

-/* Record all intersections - Shadow BVH traversal */
+#    ifdef __OBJECT_MOTION__
+  if (kernel_data.bvh.have_motion) {
+    return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
+  }
+#    endif /* __OBJECT_MOTION__ */
+  return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
+}
+#  endif
+
+/* Transparent shadow BVH traversal, recording multiple intersections. */
+
+#  ifdef __SHADOW_RECORD_ALL__

-#  if defined(__SHADOW_RECORD_ALL__)
 #    define BVH_FUNCTION_NAME bvh_intersect_shadow_all
 #    define BVH_FUNCTION_FEATURES BVH_POINTCLOUD
 #    include "kernel/bvh/shadow_all.h"
@@ -110,412 +169,6 @@ CCL_NAMESPACE_BEGIN
 #      include "kernel/bvh/shadow_all.h"
 #    endif

-#  endif /* __SHADOW_RECORD_ALL__ */
-
-/* Record all intersections - Volume BVH traversal. */
-
-#  if defined(__VOLUME_RECORD_ALL__)
-#    define BVH_FUNCTION_NAME bvh_intersect_volume_all
-#    define BVH_FUNCTION_FEATURES BVH_HAIR
-#    include "kernel/bvh/volume_all.h"
-
-#    if defined(__OBJECT_MOTION__)
-#      define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
-#      define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
-#      include "kernel/bvh/volume_all.h"
-#    endif
-#  endif /* __VOLUME_RECORD_ALL__ */
-
-#  undef BVH_FEATURE
-#  undef BVH_NAME_JOIN
-#  undef BVH_NAME_EVAL
-#  undef BVH_FUNCTION_FULL_NAME
-
-#endif /* !defined(__KERNEL_GPU_RAYTRACING__) */
-
-ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray)
-{
-  /* NOTE: Due to some vectorization code  non-finite origin point might
-   * cause lots of false-positive intersections which will overflow traversal
-   * stack.
-   * This code is a quick way to perform early output, to avoid crashes in
-   * such cases.
-   * From production scenes so far it seems it's enough to test first element
-   * only.
-   * Scene intersection may also called with empty rays for conditional trace
-   * calls that evaluate to false, so filter those out.
-   */
-  return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
-}
-
-ccl_device_intersect bool scene_intersect(KernelGlobals kg,
-                                          ccl_private const Ray *ray,
-                                          const uint visibility,
-                                          ccl_private Intersection *isect)
-{
-#ifdef __KERNEL_OPTIX__
-  uint p0 = 0;
-  uint p1 = 0;
-  uint p2 = 0;
-  uint p3 = 0;
-  uint p4 = visibility;
-  uint p5 = PRIMITIVE_NONE;
-  uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
-  uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
-
-  uint ray_mask = visibility & 0xFF;
-  uint ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT;
-  if (0 == ray_mask && (visibility & ~0xFF) != 0) {
-    ray_mask = 0xFF;
-  }
-  else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
-    ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT;
-  }
-
-  optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
-             ray->P,
-             ray->D,
-             0.0f,
-             ray->t,
-             ray->time,
-             ray_mask,
-             ray_flags,
-             0, /* SBT offset for PG_HITD */
-             0,
-             0,
-             p0,
-             p1,
-             p2,
-             p3,
-             p4,
-             p5,
-             p6,
-             p7);
-
-  isect->t = __uint_as_float(p0);
-  isect->u = __uint_as_float(p1);
-  isect->v = __uint_as_float(p2);
-  isect->prim = p3;
-  isect->object = p4;
-  isect->type = p5;
-
-  return p5 != PRIMITIVE_NONE;
-#elif defined(__METALRT__)
-
-  if (!scene_intersect_valid(ray)) {
-    isect->t = ray->t;
-    isect->type = PRIMITIVE_NONE;
-    return false;
-  }
-
-#  if defined(__KERNEL_DEBUG__)
-  if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
-    isect->t = ray->t;
-    isect->type = PRIMITIVE_NONE;
-    kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
-    return false;
-  }
-
-  if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
-    isect->t = ray->t;
-    isect->type = PRIMITIVE_NONE;
-    kernel_assert(!"Invalid ift_default");
-    return false;
-  }
-#  endif
-
-  metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
-  metalrt_intersector_type metalrt_intersect;
-
-  if (!kernel_data.bvh.have_curves) {
-    metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
-  }
-
-  MetalRTIntersectionPayload payload;
-  payload.self = ray->self;
-  payload.u = 0.0f;
-  payload.v = 0.0f;
-  payload.visibility = visibility;
-
-  typename metalrt_intersector_type::result_type intersection;
-
-  uint ray_mask = visibility & 0xFF;
-  if (0 == ray_mask && (visibility & ~0xFF) != 0) {
-    ray_mask = 0xFF;
-    /* No further intersector setup required: Default MetalRT behavior is any-hit. */
-  }
-  else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
-    /* No further intersector setup required: Shadow ray early termination is controlled by the
-     * intersection handler */
-  }
-
-#  if defined(__METALRT_MOTION__)
-  payload.time = ray->time;
-  intersection = metalrt_intersect.intersect(r,
-                                             metal_ancillaries->accel_struct,
-                                             ray_mask,
-                                             ray->time,
-                                             metal_ancillaries->ift_default,
-                                             payload);
-#  else
-  intersection = metalrt_intersect.intersect(
-      r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
-#  endif
-
-  if (intersection.type == intersection_type::none) {
-    isect->t = ray->t;
-    isect->type = PRIMITIVE_NONE;
-
-    return false;
-  }
-
-  isect->t = intersection.distance;
-
-  isect->prim = payload.prim;
-  isect->type = payload.type;
-  isect->object = intersection.user_instance_id;
-
-  isect->t = intersection.distance;
-  if (intersection.type == intersection_type::triangle) {
-    isect->u = 1.0f - intersection.triangle_barycentric_coord.y -
-               intersection.triangle_barycentric_coord.x;
-    isect->v = intersection.triangle_barycentric_coord.x;
-  }
-  else {
-    isect->u = payload.u;
-    isect->v = payload.v;
-  }
-
-  return isect->type != PRIMITIVE_NONE;
-
-#else
-
-  if (!scene_intersect_valid(ray)) {
-    return false;
-  }
-
-#  ifdef __EMBREE__
-  if (kernel_data.bvh.scene) {
-    isect->t = ray->t;
-    CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
-    IntersectContext rtc_ctx(&ctx);
-    RTCRayHit ray_hit;
-    ctx.ray = ray;
-    kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
-    rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
-    if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID &&
-        ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
-      kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
-      return true;
-    }
-    return false;
-  }
-#  endif /* __EMBREE__ */
-
-#  ifdef __OBJECT_MOTION__
-  if (kernel_data.bvh.have_motion) {
-#    ifdef __HAIR__
-    if (kernel_data.bvh.have_curves) {
-      return bvh_intersect_hair_motion(kg, ray, isect, visibility);
-    }
-#    endif /* __HAIR__ */
-
-    return bvh_intersect_motion(kg, ray, isect, visibility);
-  }
-#  endif   /* __OBJECT_MOTION__ */
-
-#  ifdef __HAIR__
-  if (kernel_data.bvh.have_curves) {
-    return bvh_intersect_hair(kg, ray, isect, visibility);
-  }
-#  endif /* __HAIR__ */
-
-  return bvh_intersect(kg, ray, isect, visibility);
-#endif   /* __KERNEL_OPTIX__ */
-}
-
-#ifdef __BVH_LOCAL__
-ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
-                                                ccl_private const Ray *ray,
-                                                ccl_private LocalIntersection *local_isect,
-                                                int local_object,
-                                                ccl_private uint *lcg_state,
-                                                int max_hits)
-{
-#  ifdef __KERNEL_OPTIX__
-  uint p0 = pointer_pack_to_uint_0(lcg_state);
-  uint p1 = pointer_pack_to_uint_1(lcg_state);
-  uint p2 = pointer_pack_to_uint_0(local_isect);
-  uint p3 = pointer_pack_to_uint_1(local_isect);
-  uint p4 = local_object;
-  uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
-  uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
-
-  /* Is set to zero on miss or if ray is aborted, so can be used as return value. */
-  uint p5 = max_hits;
-
-  if (local_isect) {
-    local_isect->num_hits = 0; /* Initialize hit count to zero. */
-  }
-  optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
-             ray->P,
-             ray->D,
-             0.0f,
-             ray->t,
-             ray->time,
-             0xFF,
-             /* Need to always call into __anyhit__kernel_optix_local_hit. */
-             OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
-             2, /* SBT offset for PG_HITL */
-             0,
-             0,
-             p0,
-             p1,
-             p2,
-             p3,
-             p4,
-             p5,
-             p6,
-             p7);
-
-  return p5;
-#  elif defined(__METALRT__)
-  if (!scene_intersect_valid(ray)) {
-    if (local_isect) {
-      local_isect->num_hits = 0;
-    }
-    return false;
-  }
-
-#    if defined(__KERNEL_DEBUG__)
-  if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
-    if (local_isect) {
-      local_isect->num_hits = 0;
-    }
-    kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
-    return false;
-  }
-
-  if (is_null_intersection_function_table(metal_ancillaries->ift_local)) {
-    if (local_isect) {
-      local_isect->num_hits = 0;
-    }
-    kernel_assert(!"Invalid ift_local");
-    return false;
-  }
-#    endif
-
-  metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
-  metalrt_intersector_type metalrt_intersect;
-
-  metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
-  if (!kernel_data.bvh.have_curves) {
-    metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
-  }
-
-  MetalRTIntersectionLocalPayload payload;
-  payload.self = ray->self;
-  payload.local_object = local_object;
-  payload.max_hits = max_hits;
-  payload.local_isect.num_hits = 0;
-  if (lcg_state) {
-    payload.has_lcg_state = true;
-    payload.lcg_state = *lcg_state;
-  }
-  payload.result = false;
-
-  typename metalrt_intersector_type::result_type intersection;
-
-#    if defined(__METALRT_MOTION__)
-  intersection = metalrt_intersect.intersect(
-      r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload);
-#    else
-  intersection = metalrt_intersect.intersect(
-      r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload);
-#    endif
-
-  if (lcg_state) {
-    *lcg_state = payload.lcg_state;
-  }
-  *local_isect = payload.local_isect;
-
-  return payload.result;
-
-#  else
-
-  if (!scene_intersect_valid(ray)) {
-    if (local_isect) {
-      local_isect->num_hits = 0;
-    }
-    return false;
-  }
-
-#    ifdef __EMBREE__
-  if (kernel_data.bvh.scene) {
-    const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) &
-                           SD_OBJECT_TRANSFORM_APPLIED);
-    CCLIntersectContext ctx(
-        kg, has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
-    ctx.lcg_state = lcg_state;
-    ctx.max_hits = max_hits;
-    ctx.ray = ray;
-    ctx.local_isect = local_isect;
-    if (local_isect) {
-      local_isect->num_hits = 0;
-    }
-    ctx.local_object_id = local_object;
-    IntersectContext rtc_ctx(&ctx);
-    RTCRay rtc_ray;
-    kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
-
-    /* If this object has its own BVH, use it. */
-    if (has_bvh) {
-      RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
-      if (geom) {
-        float3 P = ray->P;
-        float3 dir = ray->D;
-        float3 idir = ray->D;
-        Transform ob_itfm;
-        rtc_ray.tfar = ray->t *
-                       bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
-        /* bvh_instance_motion_push() returns the inverse transform but
-         * it's not needed here. */
-        (void)ob_itfm;
-
-        rtc_ray.org_x = P.x;
-        rtc_ray.org_y = P.y;
-        rtc_ray.org_z = P.z;
-        rtc_ray.dir_x = dir.x;
-        rtc_ray.dir_y = dir.y;
-        rtc_ray.dir_z = dir.z;
-        RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
-        kernel_assert(scene);
-        if (scene) {
-          rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
-        }
-      }
-    }
-    else {
-      rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
-    }
-
-    /* rtcOccluded1 sets tfar to -inf if a hit was found. */
-    return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0);
-    ;
-  }
-#    endif /* __EMBREE__ */
-
-#    ifdef __OBJECT_MOTION__
-  if (kernel_data.bvh.have_motion) {
-    return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
-  }
-#    endif /* __OBJECT_MOTION__ */
-  return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
-#  endif   /* __KERNEL_OPTIX__ */
-}
-#endif
-
-#ifdef __SHADOW_RECORD_ALL__
 ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
                                                     IntegratorShadowState state,
                                                     ccl_private const Ray *ray,
@@ -524,131 +177,18 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
                                                     ccl_private uint *num_recorded_hits,
                                                     ccl_private float *throughput)
 {
-#  ifdef __KERNEL_OPTIX__
-  uint p0 = state;
-  uint p1 = __float_as_uint(1.0f); /* Throughput. */
-  uint p2 = 0;                     /* Number of hits. */
-  uint p3 = max_hits;
-  uint p4 = visibility;
-  uint p5 = false;
-  uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
-  uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
-
-  uint ray_mask = visibility & 0xFF;
-  if (0 == ray_mask && (visibility & ~0xFF) != 0) {
-    ray_mask = 0xFF;
-  }
-
-  optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
-             ray->P,
-             ray->D,
-             0.0f,
-             ray->t,
-             ray->time,
-             ray_mask,
-             /* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */
-             OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
-             1, /* SBT offset for PG_HITS */
-             0,
-             0,
-             p0,
-             p1,
-             p2,
-             p3,
-             p4,
-             p5,
-             p6,
-             p7);
-
-  *num_recorded_hits = uint16_unpack_from_uint_0(p2);
-  *throughput = __uint_as_float(p1);
-
-  return p5;
-#  elif defined(__METALRT__)
-
-  if (!scene_intersect_valid(ray)) {
-    return false;
-  }
-
-#    if defined(__KERNEL_DEBUG__)
-  if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
-    kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
-    return false;
-  }
-
-  if (is_null_intersection_function_table(metal_ancillaries->ift_shadow)) {
-    kernel_assert(!"Invalid ift_shadow");
-    return false;
-  }
-#    endif
-
-  metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
-  metalrt_intersector_type metalrt_intersect;
-
-  metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
-  if (!kernel_data.bvh.have_curves) {
-    metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
-  }
-
-  MetalRTIntersectionShadowPayload payload;
-  payload.self = ray->self;
-  payload.visibility = visibility;
-  payload.max_hits = max_hits;
-  payload.num_hits = 0;
-  payload.num_recorded_hits = 0;
-  payload.throughput = 1.0f;
-  payload.result = false;
-  payload.state = state;
-
-  uint ray_mask = visibility & 0xFF;
-  if (0 == ray_mask && (visibility & ~0xFF) != 0) {
-    ray_mask = 0xFF;
-  }
-
-  typename metalrt_intersector_type::result_type intersection;
-
-#    if defined(__METALRT_MOTION__)
-  payload.time = ray->time;
-  intersection = metalrt_intersect.intersect(r,
-                                             metal_ancillaries->accel_struct,
-                                             ray_mask,
-                                             ray->time,
-                                             metal_ancillaries->ift_shadow,
-                                             payload);
-#    else
-  intersection = metalrt_intersect.intersect(
-      r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_shadow, payload);
-#    endif
-
-  *num_recorded_hits = payload.num_recorded_hits;
-  *throughput = payload.throughput;
-
-  return payload.result;
-
-#  else
-  if (!scene_intersect_valid(ray)) {
+  if (!intersection_ray_valid(ray)) {
    *num_recorded_hits = 0;
    *throughput = 1.0f;
    return false;
  }

 #    ifdef __EMBREE__
-  if (kernel_data.bvh.scene) {
-    CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
-    Intersection *isect_array = (Intersection *)state->shadow_isect;
-    ctx.isect_s = isect_array;
-    ctx.max_hits = max_hits;
-    ctx.ray = ray;
-    IntersectContext rtc_ctx(&ctx);
-    RTCRay rtc_ray;
-    kernel_embree_setup_ray(*ray, rtc_ray, visibility);
-    rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
-
-    *num_recorded_hits = ctx.num_recorded_hits;
-    *throughput = ctx.throughput;
-    return ctx.opaque_hit;
+  if (kernel_data.device_bvh) {
+    return kernel_embree_intersect_shadow_all(
+        kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
  }
-#    endif /* __EMBREE__ */
+#    endif

 #    ifdef __OBJECT_MOTION__
  if (kernel_data.bvh.have_motion) {
@@ -662,7 +202,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
    return bvh_intersect_shadow_all_motion(
        kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
  }
-#    endif   /* __OBJECT_MOTION__ */
+#    endif /* __OBJECT_MOTION__ */

 #    ifdef __HAIR__
  if (kernel_data.bvh.have_curves) {
@@ -673,132 +213,29 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,

  return bvh_intersect_shadow_all(
      kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
-#  endif   /* __KERNEL_OPTIX__ */
 }
-#endif /* __SHADOW_RECORD_ALL__ */
+#  endif /* __SHADOW_RECORD_ALL__ */
+
+/* Volume BVH traversal, for initializing or updating the volume stack. */
+
+#  if defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__)
+
+#    define BVH_FUNCTION_NAME bvh_intersect_volume
+#    define BVH_FUNCTION_FEATURES BVH_HAIR
+#    include "kernel/bvh/volume.h"
+
+#    if defined(__OBJECT_MOTION__)
+#      define BVH_FUNCTION_NAME bvh_intersect_volume_motion
+#      define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
+#      include "kernel/bvh/volume.h"
+#    endif

-#ifdef __VOLUME__
 ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
                                                 ccl_private const Ray *ray,
                                                 ccl_private Intersection *isect,
                                                 const uint visibility)
 {
-#  ifdef __KERNEL_OPTIX__
-  uint p0 = 0;
-  uint p1 = 0;
-  uint p2 = 0;
-  uint p3 = 0;
-  uint p4 = visibility;
-  uint p5 = PRIMITIVE_NONE;
-  uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
-  uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
-
-  uint ray_mask = visibility & 0xFF;
-  if (0 == ray_mask && (visibility & ~0xFF) != 0) {
-    ray_mask = 0xFF;
-  }
-
-  optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
-             ray->P,
-             ray->D,
-             0.0f,
-             ray->t,
-             ray->time,
-             ray_mask,
-             /* Need to always call into __anyhit__kernel_optix_volume_test. */
-             OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
-             3, /* SBT offset for PG_HITV */
-             0,
-             0,
-             p0,
-             p1,
-             p2,
-             p3,
-             p4,
-             p5,
-             p6,
-             p7);
-
-  isect->t = __uint_as_float(p0);
-  isect->u = __uint_as_float(p1);
-  isect->v = __uint_as_float(p2);
-  isect->prim = p3;
-  isect->object = p4;
-  isect->type = p5;
-
-  return p5 != PRIMITIVE_NONE;
-#  elif defined(__METALRT__)
-
-  if (!scene_intersect_valid(ray)) {
-    return false;
-  }
-#    if defined(__KERNEL_DEBUG__)
-  if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
-    kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
-    return false;
-  }
-
-  if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
-    kernel_assert(!"Invalid ift_default");
-    return false;
-  }
-#    endif
-
-  metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
-  metalrt_intersector_type metalrt_intersect;
-
-  metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
-  if (!kernel_data.bvh.have_curves) {
-    metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
-  }
-
-  MetalRTIntersectionPayload payload;
-  payload.self = ray->self;
-  payload.visibility = visibility;
-
-  typename metalrt_intersector_type::result_type intersection;
-
-  uint ray_mask = visibility & 0xFF;
-  if (0 == ray_mask && (visibility & ~0xFF) != 0) {
-    ray_mask = 0xFF;
-  }
-
-#    if defined(__METALRT_MOTION__)
-  payload.time = ray->time;
-  intersection = metalrt_intersect.intersect(r,
-                                             metal_ancillaries->accel_struct,
-                                             ray_mask,
-                                             ray->time,
-                                             metal_ancillaries->ift_default,
-                                             payload);
-#    else
-  intersection = metalrt_intersect.intersect(
-      r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
-#    endif
-
-  if (intersection.type == intersection_type::none) {
-    return false;
-  }
-
-  isect->prim = payload.prim;
-  isect->type = payload.type;
-  isect->object = intersection.user_instance_id;
-
-  isect->t = intersection.distance;
-  if (intersection.type == intersection_type::triangle) {
-    isect->u = 1.0f - intersection.triangle_barycentric_coord.y -
-               intersection.triangle_barycentric_coord.x;
-    isect->v = intersection.triangle_barycentric_coord.x;
-  }
-  else {
-    isect->u = payload.u;
-    isect->v = payload.v;
-  }
-
-  return isect->type != PRIMITIVE_NONE;
-
-#  else
-  if (!scene_intersect_valid(ray)) {
+  if (!intersection_ray_valid(ray)) {
    return false;
  }

@@ -809,44 +246,56 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
 #    endif /* __OBJECT_MOTION__ */

  return bvh_intersect_volume(kg, ray, isect, visibility);
-#  endif   /* __KERNEL_OPTIX__ */
 }
-#endif /* __VOLUME__ */
+#  endif /* defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) */

-#ifdef __VOLUME_RECORD_ALL__
-ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg,
-                                                     ccl_private const Ray *ray,
-                                                     ccl_private Intersection *isect,
-                                                     const uint max_hits,
-                                                     const uint visibility)
+/* Volume BVH traversal, for initializing or updating the volume stack.
+ * Variation that records multiple intersections at once. */
+
+#  if defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__)
+
+#    define BVH_FUNCTION_NAME bvh_intersect_volume_all
+#    define BVH_FUNCTION_FEATURES BVH_HAIR
+#    include "kernel/bvh/volume_all.h"
+
+#    if defined(__OBJECT_MOTION__)
+#      define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
+#      define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
+#      include "kernel/bvh/volume_all.h"
+#    endif
+
+ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg,
+                                                 ccl_private const Ray *ray,
+                                                 ccl_private Intersection *isect,
+                                                 const uint max_hits,
+                                                 const uint visibility)
 {
-  if (!scene_intersect_valid(ray)) {
+  if (!intersection_ray_valid(ray)) {
    return false;
  }

-#  ifdef __EMBREE__
-  if (kernel_data.bvh.scene) {
-    CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
-    ctx.isect_s = isect;
-    ctx.max_hits = max_hits;
-    ctx.num_hits = 0;
-    ctx.ray = ray;
-    IntersectContext rtc_ctx(&ctx);
-    RTCRay rtc_ray;
-    kernel_embree_setup_ray(*ray, rtc_ray, visibility);
-    rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
-    return ctx.num_hits;
+#    ifdef __EMBREE__
+  if (kernel_data.device_bvh) {
+    return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
  }
-#  endif /* __EMBREE__ */
+#    endif

-#  ifdef __OBJECT_MOTION__
+#    ifdef __OBJECT_MOTION__
  if (kernel_data.bvh.have_motion) {
    return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
  }
-#  endif /* __OBJECT_MOTION__ */
+#    endif /* __OBJECT_MOTION__ */

  return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
 }
-#endif /* __VOLUME_RECORD_ALL__ */
+
+#  endif /* defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) */
+
+#  undef BVH_FEATURE
+#  undef BVH_NAME_JOIN
+#  undef BVH_NAME_EVAL
+#  undef BVH_FUNCTION_FULL_NAME
+
+#endif /* __BVH2__ */

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/bvh/embree.h
+++ b/intern/cycles/kernel/bvh/embree.h
@@ -1,176 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2018-2022 Blender Foundation. */
-
-#pragma once
-
-#include <embree3/rtcore_ray.h>
-#include <embree3/rtcore_scene.h>
-
-#include "kernel/device/cpu/compat.h"
-#include "kernel/device/cpu/globals.h"
-
-#include "kernel/bvh/util.h"
-
-#include "util/vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-struct CCLIntersectContext {
-  typedef enum {
-    RAY_REGULAR = 0,
-    RAY_SHADOW_ALL = 1,
-    RAY_LOCAL = 2,
-    RAY_SSS = 3,
-    RAY_VOLUME_ALL = 4,
-  } RayType;
-
-  KernelGlobals kg;
-  RayType type;
-
-  /* For avoiding self intersections */
-  const Ray *ray;
-
-  /* for shadow rays */
-  Intersection *isect_s;
-  uint max_hits;
-  uint num_hits;
-  uint num_recorded_hits;
-  float throughput;
-  float max_t;
-  bool opaque_hit;
-
-  /* for SSS Rays: */
-  LocalIntersection *local_isect;
-  int local_object_id;
-  uint *lcg_state;
-
-  CCLIntersectContext(KernelGlobals kg_, RayType type_)
-  {
-    kg = kg_;
-    type = type_;
-    ray = NULL;
-    max_hits = 1;
-    num_hits = 0;
-    num_recorded_hits = 0;
-    throughput = 1.0f;
-    max_t = FLT_MAX;
-    opaque_hit = false;
-    isect_s = NULL;
-    local_isect = NULL;
-    local_object_id = -1;
-    lcg_state = NULL;
-  }
-};
-
-class IntersectContext {
- public:
-  IntersectContext(CCLIntersectContext *ctx)
-  {
-    rtcInitIntersectContext(&context);
-    userRayExt = ctx;
-  }
-  RTCIntersectContext context;
-  CCLIntersectContext *userRayExt;
-};
-
-ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
-                                               RTCRay &rtc_ray,
-                                               const uint visibility)
-{
-  rtc_ray.org_x = ray.P.x;
-  rtc_ray.org_y = ray.P.y;
-  rtc_ray.org_z = ray.P.z;
-  rtc_ray.dir_x = ray.D.x;
-  rtc_ray.dir_y = ray.D.y;
-  rtc_ray.dir_z = ray.D.z;
-  rtc_ray.tnear = 0.0f;
-  rtc_ray.tfar = ray.t;
-  rtc_ray.time = ray.time;
-  rtc_ray.mask = visibility;
-}
-
-ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray,
-                                                  RTCRayHit &rayhit,
-                                                  const uint visibility)
-{
-  kernel_embree_setup_ray(ray, rayhit.ray, visibility);
-  rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
-  rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
-}
-
-ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg,
-                                                          const RTCHit *hit,
-                                                          const Ray *ray)
-{
-  bool status = false;
-  if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
-    const int oID = hit->instID[0] / 2;
-    if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
-      RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
-          rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
-      const int pID = hit->primID +
-                      (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
-      status = intersection_skip_self_shadow(ray->self, oID, pID);
-    }
-  }
-  else {
-    const int oID = hit->geomID / 2;
-    if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
-      const int pID = hit->primID + (intptr_t)rtcGetGeometryUserData(
-                                        rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
-      status = intersection_skip_self_shadow(ray->self, oID, pID);
-    }
-  }
-
-  return status;
-}
-
-ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
-                                                 const RTCRay *ray,
-                                                 const RTCHit *hit,
-                                                 Intersection *isect)
-{
-  isect->t = ray->tfar;
-  if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
-    RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
-        rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
-    isect->prim = hit->primID +
-                  (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
-    isect->object = hit->instID[0] / 2;
-  }
-  else {
-    isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
-                                    rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
-    isect->object = hit->geomID / 2;
-  }
-
-  const bool is_hair = hit->geomID & 1;
-  if (is_hair) {
-    const KernelCurveSegment segment = kernel_data_fetch(curve_segments, isect->prim);
-    isect->type = segment.type;
-    isect->prim = segment.prim;
-    isect->u = hit->u;
-    isect->v = hit->v;
-  }
-  else {
-    isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
-    isect->u = 1.0f - hit->v - hit->u;
-    isect->v = hit->u;
-  }
-}
-
-ccl_device_inline void kernel_embree_convert_sss_hit(
-    KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object)
-{
-  isect->u = 1.0f - hit->v - hit->u;
-  isect->v = hit->u;
-  isect->t = ray->tfar;
-  RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
-      rtcGetGeometry(kernel_data.bvh.scene, object * 2));
-  isect->prim = hit->primID +
-                (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
-  isect->object = object;
-  isect->type = kernel_data_fetch(objects, object).primitive_type;
-}
-
-CCL_NAMESPACE_END
--- a/Show More
+++ b/Show More