Merge branch 'main' into asset-shelf

RNA: ignore some large arrays in override code
This speeds up saving `070_0100.anim.blend` from the Heist project from ~3s to ~300ms by adding PROPOVERRIDE_IGNORE in a few places. It's not completely obvious to me when `PROPOVERRIDE_IGNORE` should be used and when it shouldn't. Given that the same is done for meshes already, it seems correct. Pull Request: blender/blender#107196
2023-04-21 11:02:31 +02:00 · 2023-04-21 10:15:51 +02:00 · 2023-04-21 10:06:55 +02:00 · 2023-04-21 09:55:37 +02:00 · 2023-04-21 08:28:03 +02:00 · 2023-04-21 07:52:17 +02:00
1243 changed files with 66242 additions and 26698 deletions
--- a/.gitea/issue_template/bug.yaml
+++ b/.gitea/issue_template/bug.yaml
@@ -15,6 +15,7 @@ body:
                * Test [daily builds](https://builder.blender.org/) to verify if the issue is already fixed.
                * Test [previous versions](https://download.blender.org/release/) to find an older working version.
                * For feature requests, feedback, questions or build issues, see [communication channels](https://wiki.blender.org/wiki/Communication/Contact#User_Feedback_and_Requests).
+                * Security vulnerabilities should be [reported privately](https://wiki.blender.org/wiki/Process/Vulnerability_Reports).
                * If there are multiple bugs, make multiple bug reports.

  - type: textarea
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -333,10 +333,7 @@ option(WITH_MOD_OCEANSIM        "Enable Ocean Modifier" ON)
 # Image format support
 option(WITH_IMAGE_OPENEXR       "Enable OpenEXR Support (http://www.openexr.com)" ON)
 option(WITH_IMAGE_OPENJPEG      "Enable OpenJpeg Support (http://www.openjpeg.org)" ON)
-option(WITH_IMAGE_TIFF          "Enable LibTIFF Support" ON)
-option(WITH_IMAGE_DDS           "Enable DDS Image Support" ON)
 option(WITH_IMAGE_CINEON        "Enable CINEON and DPX Image Support" ON)
-option(WITH_IMAGE_HDR           "Enable HDR Image Support" ON)
 option(WITH_IMAGE_WEBP          "Enable WebP Image Support" ON)

 # Audio/Video format support
@@ -524,7 +521,8 @@ endif()
 if(NOT APPLE)
  option(WITH_CYCLES_DEVICE_HIP        "Enable Cycles AMD HIP support" ON)
  option(WITH_CYCLES_HIP_BINARIES      "Build Cycles AMD HIP binaries" OFF)
-  set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
+  # Radeon VII (gfx906) not currently working with HIP SDK, so left out of the list.
+  set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
  mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
  mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
 endif()
@@ -648,15 +646,15 @@ if(WIN32)
 endif()

 # Compiler tool-chain.
-if(UNIX AND NOT APPLE)
+if(UNIX)
  if(CMAKE_COMPILER_IS_GNUCC)
    option(WITH_LINKER_GOLD "Use ld.gold linker which is usually faster than ld.bfd" ON)
    mark_as_advanced(WITH_LINKER_GOLD)
-    option(WITH_LINKER_LLD "Use ld.lld linker which is usually faster than ld.gold" OFF)
-    mark_as_advanced(WITH_LINKER_LLD)
  endif()
  if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
-    option(WITH_LINKER_MOLD "Use ld.mold linker which is usually faster than ld.gold & ld.lld." OFF)
+    option(WITH_LINKER_LLD "Use ld.lld linker which is usually faster than ld.gold" OFF)
+    mark_as_advanced(WITH_LINKER_LLD)
+    option(WITH_LINKER_MOLD "Use ld.mold linker which is usually faster than ld.gold & ld.lld. Needs \"sold\" subscription on macOS." OFF)
    mark_as_advanced(WITH_LINKER_MOLD)
  endif()
 endif()
@@ -693,8 +691,10 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
      else()
        string(APPEND _asan_defaults " -fsanitize=object-size")
      endif()
-    else()
+    elseif(CMAKE_COMPILER_IS_GNUCC)
      string(APPEND _asan_defaults " -fsanitize=leak -fsanitize=object-size")
+    else()
+      string(APPEND _asan_defaults " -fsanitize=leak")
    endif()

    set(COMPILER_ASAN_CFLAGS "${_asan_defaults}" CACHE STRING "C flags for address sanitizer")
@@ -711,6 +711,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
        [HKEY_LOCAL_MACHINE\\SOFTWARE\\Wow6432Node\\LLVM\\LLVM;]/lib/clang/7.0.0/lib/windows
        [HKEY_LOCAL_MACHINE\\SOFTWARE\\Wow6432Node\\LLVM\\LLVM;]/lib/clang/6.0.0/lib/windows
      )
+      mark_as_advanced(COMPILER_ASAN_LIBRARY)
    elseif(APPLE)
      execute_process(COMMAND ${CMAKE_CXX_COMPILER}
        -print-file-name=lib
@@ -725,13 +726,14 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
          "${CLANG_LIB_DIR}/darwin/"
      )
      unset(CLANG_LIB_DIR)
-    else()
+      mark_as_advanced(COMPILER_ASAN_LIBRARY)
+    elseif(CMAKE_COMPILER_IS_GNUCC)
      find_library(
        COMPILER_ASAN_LIBRARY asan ${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}
      )
+      mark_as_advanced(COMPILER_ASAN_LIBRARY)
    endif()

-    mark_as_advanced(COMPILER_ASAN_LIBRARY)
  endif()
 endif()

@@ -892,9 +894,6 @@ set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_ALEMBIC OFF)
 set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_VULKAN_BACKEND OFF)
 set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_CYCLES_OSL OFF)

-# Haru needs `TIFFFaxBlackCodes` & `TIFFFaxWhiteCodes` symbols from TIFF.
-set_and_warn_dependency(WITH_IMAGE_TIFF WITH_HARU       OFF)
-
 # auto enable openimageio for cycles
 if(WITH_CYCLES)
  # auto enable llvm for cycles_osl
@@ -1582,6 +1581,8 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
  add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_MISSING_NORETURN -Wno-missing-noreturn)
  add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_UNUSED_BUT_SET_VARIABLE -Wno-unused-but-set-variable)
  add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_DEPRECATED_DECLARATIONS -Wno-deprecated-declarations)
+  add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_STRICT_PROTOTYPES -Wno-strict-prototypes)
+  add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_BITWISE_INSTEAD_OF_LOGICAL -Wno-bitwise-instead-of-logical)

  add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNUSED_PARAMETER -Wno-unused-parameter)
  add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNUSED_PRIVATE_FIELD -Wno-unused-private-field)
@@ -1595,6 +1596,7 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
  add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNDEFINED_VAR_TEMPLATE -Wno-undefined-var-template)
  add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_INSTANTIATION_AFTER_SPECIALIZATION -Wno-instantiation-after-specialization)
  add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_MISLEADING_INDENTATION    -Wno-misleading-indentation)
+  add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_BITWISE_INSTEAD_OF_LOGICAL -Wno-bitwise-instead-of-logical)

 elseif(CMAKE_C_COMPILER_ID MATCHES "Intel")

@@ -1937,11 +1939,8 @@ if(FIRST_RUN)

  info_cfg_text("Image Formats:")
  info_cfg_option(WITH_IMAGE_CINEON)
-  info_cfg_option(WITH_IMAGE_DDS)
-  info_cfg_option(WITH_IMAGE_HDR)
  info_cfg_option(WITH_IMAGE_OPENEXR)
  info_cfg_option(WITH_IMAGE_OPENJPEG)
-  info_cfg_option(WITH_IMAGE_TIFF)

  info_cfg_text("Audio:")
  info_cfg_option(WITH_CODEC_AVI)
--- a/build_files/build_environment/CMakeLists.txt
+++ b/build_files/build_environment/CMakeLists.txt
@@ -90,28 +90,26 @@ include(cmake/haru.cmake)
 # Boost needs to be included after `python.cmake` due to the PYTHON_BINARY variable being needed.
 include(cmake/boost.cmake)
 include(cmake/pugixml.cmake)
-include(cmake/ispc.cmake)
-include(cmake/openimagedenoise.cmake)
-include(cmake/embree.cmake)
-include(cmake/openpgl.cmake)
-include(cmake/fmt.cmake)
-include(cmake/robinmap.cmake)
-include(cmake/xml2.cmake)
-
 include(cmake/fribidi.cmake)
 include(cmake/harfbuzz.cmake)
 if(NOT APPLE)
  include(cmake/xr_openxr.cmake)
-  if(NOT WIN32 OR BUILD_MODE STREQUAL Release)
-    include(cmake/dpcpp.cmake)
-    include(cmake/dpcpp_deps.cmake)
-  endif()
+  include(cmake/dpcpp.cmake)
+  include(cmake/dpcpp_deps.cmake)
  if(NOT WIN32)
    include(cmake/igc.cmake)
    include(cmake/gmmlib.cmake)
    include(cmake/ocloc.cmake)
  endif()
 endif()
+include(cmake/ispc.cmake)
+include(cmake/openimagedenoise.cmake)
+# Embree needs to be included after dpcpp as it uses it for compiling with GPU support
+include(cmake/embree.cmake)
+include(cmake/openpgl.cmake)
+include(cmake/fmt.cmake)
+include(cmake/robinmap.cmake)
+include(cmake/xml2.cmake)

 # OpenColorIO and dependencies.
 include(cmake/expat.cmake)
--- a/build_files/build_environment/cmake/download.cmake
+++ b/build_files/build_environment/cmake/download.cmake
@@ -156,6 +156,7 @@ download_source(OPENCLHEADERS)
 download_source(ICDLOADER)
 download_source(MP11)
 download_source(SPIRV_HEADERS)
+download_source(UNIFIED_RUNTIME)
 download_source(IGC)
 download_source(IGC_LLVM)
 download_source(IGC_OPENCL_CLANG)
--- a/build_files/build_environment/cmake/dpcpp.cmake
+++ b/build_files/build_environment/cmake/dpcpp.cmake
@@ -5,6 +5,9 @@
 # for now.
 string(REPLACE "-DCMAKE_CXX_STANDARD=17" " " DPCPP_CMAKE_FLAGS "${DEFAULT_CMAKE_FLAGS}")

+# DPCPP already generates debug libs, there isn't much point in compiling it in debug mode itself.
+string(REPLACE "-DCMAKE_BUILD_TYPE=Debug" "-DCMAKE_BUILD_TYPE=Release" DPCPP_CMAKE_FLAGS "${DPCPP_CMAKE_FLAGS}")
+
 if(WIN32)
  set(LLVM_GENERATOR "Ninja")
 else()
@@ -38,17 +41,18 @@ set(DPCPP_EXTRA_ARGS
  -DLEVEL_ZERO_LIBRARY=${LIBDIR}/level-zero/lib/${LIBPREFIX}ze_loader${SHAREDLIBEXT}
  -DLEVEL_ZERO_INCLUDE_DIR=${LIBDIR}/level-zero/include
  -DLLVM_EXTERNAL_SPIRV_HEADERS_SOURCE_DIR=${BUILD_DIR}/spirvheaders/src/external_spirvheaders/
+  -DUNIFIED_RUNTIME_SOURCE_DIR=${BUILD_DIR}/unifiedruntime/src/external_unifiedruntime/
  # Below here is copied from an invocation of buildbot/config.py
  -DLLVM_ENABLE_ASSERTIONS=ON
  -DLLVM_TARGETS_TO_BUILD=X86
-  -DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
+  -DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw^^lld
  -DLLVM_EXTERNAL_SYCL_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/sycl
  -DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/llvm-spirv
  -DLLVM_EXTERNAL_XPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
  -DXPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
  -DLLVM_EXTERNAL_XPTIFW_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xptifw
  -DLLVM_EXTERNAL_LIBDEVICE_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/libdevice
-  -DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
+  -DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw^^lld
  -DLIBCLC_TARGETS_TO_BUILD=
  -DLIBCLC_GENERATE_REMANGLED_VARIANTS=OFF
  -DSYCL_BUILD_PI_HIP_PLATFORM=AMD
@@ -104,13 +108,19 @@ add_dependencies(
  external_mp11
  external_level-zero
  external_spirvheaders
+  external_unifiedruntime
 )

 if(BUILD_MODE STREQUAL Release AND WIN32)
  ExternalProject_Add_Step(external_dpcpp after_install
-      COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cl.exe
-      COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cpp.exe
-      COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang.exe
      COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/dpcpp ${HARVEST_TARGET}/dpcpp
+      COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/clang-cl.exe
+      COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/clang-cpp.exe
+      COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/clang.exe
+      COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/ld.lld.exe
+      COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/ld64.lld.exe
+      COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/lld.exe
+      COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/lld-link.exe
+      COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/wasm-ld.exe
  )
 endif()
--- a/build_files/build_environment/cmake/dpcpp_deps.cmake
+++ b/build_files/build_environment/cmake/dpcpp_deps.cmake
@@ -59,3 +59,13 @@ ExternalProject_Add(external_spirvheaders
  BUILD_COMMAND echo .
  INSTALL_COMMAND echo .
 )
+
+ExternalProject_Add(external_unifiedruntime
+  URL file://${PACKAGE_DIR}/${UNIFIED_RUNTIME_FILE}
+  URL_HASH ${UNIFIED_RUNTIME_HASH_TYPE}=${UNIFIED_RUNTIME_HASH}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  PREFIX ${BUILD_DIR}/unifiedruntime
+  CONFIGURE_COMMAND echo .
+  BUILD_COMMAND echo .
+  INSTALL_COMMAND echo .
+)
--- a/build_files/build_environment/cmake/embree.cmake
+++ b/build_files/build_environment/cmake/embree.cmake
@@ -3,6 +3,8 @@
 # Note the utility apps may use png/tiff/gif system libraries, but the
 # library itself does not depend on them, so should give no problems.

+set(EMBREE_CMAKE_FLAGS ${DEFAULT_CMAKE_FLAGS})
+
 set(EMBREE_EXTRA_ARGS
  -DEMBREE_ISPC_SUPPORT=OFF
  -DEMBREE_TUTORIALS=OFF
@@ -31,6 +33,43 @@ if(NOT BLENDER_PLATFORM_ARM)
  )
 endif()

+if(NOT APPLE)
+  if(WIN32)
+    # Levels below -O2 don't work well for Embree+SYCL.
+    string(REGEX REPLACE "-O[A-Za-z0-9]" "" EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG ${BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG})
+    string(APPEND EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG " -O2")
+    string(REGEX REPLACE "-O[A-Za-z0-9]" "" EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG ${BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG})
+    string(APPEND EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG " -O2")
+    set(EMBREE_CMAKE_FLAGS
+      -DCMAKE_BUILD_TYPE=${BUILD_MODE}
+      -DCMAKE_CXX_FLAGS_RELEASE=${BLENDER_CLANG_CMAKE_CXX_FLAGS_RELEASE}
+      -DCMAKE_CXX_FLAGS_MINSIZEREL=${BLENDER_CLANG_CMAKE_CXX_FLAGS_MINSIZEREL}
+      -DCMAKE_CXX_FLAGS_RELWITHDEBINFO=${BLENDER_CLANG_CMAKE_CXX_FLAGS_RELWITHDEBINFO}
+      -DCMAKE_CXX_FLAGS_DEBUG=${EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG}
+      -DCMAKE_C_FLAGS_RELEASE=${BLENDER_CLANG_CMAKE_C_FLAGS_RELEASE}
+      -DCMAKE_C_FLAGS_MINSIZEREL=${BLENDER_CLANG_CMAKE_C_FLAGS_MINSIZEREL}
+      -DCMAKE_C_FLAGS_RELWITHDEBINFO=${BLENDER_CLANG_CMAKE_C_FLAGS_RELWITHDEBINFO}
+      -DCMAKE_C_FLAGS_DEBUG=${EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG}
+      -DCMAKE_CXX_STANDARD=17
+    )
+    set(EMBREE_EXTRA_ARGS
+      -DCMAKE_CXX_COMPILER=${LIBDIR}/dpcpp/bin/clang++.exe
+      -DCMAKE_C_COMPILER=${LIBDIR}/dpcpp/bin/clang.exe
+      -DCMAKE_SHARED_LINKER_FLAGS=-L"${LIBDIR}/dpcpp/lib"
+      -DEMBREE_SYCL_SUPPORT=ON
+      ${EMBREE_EXTRA_ARGS}
+    )
+  else()
+    set(EMBREE_EXTRA_ARGS
+      -DCMAKE_CXX_COMPILER=${LIBDIR}/dpcpp/bin/clang++
+      -DCMAKE_C_COMPILER=${LIBDIR}/dpcpp/bin/clang
+      -DCMAKE_SHARED_LINKER_FLAGS=-L"${LIBDIR}/dpcpp/lib"
+      -DEMBREE_SYCL_SUPPORT=ON
+      ${EMBREE_EXTRA_ARGS}
+    )
+  endif()
+endif()
+
 if(TBB_STATIC_LIBRARY)
  set(EMBREE_EXTRA_ARGS
    ${EMBREE_EXTRA_ARGS}
@@ -42,16 +81,25 @@ ExternalProject_Add(external_embree
  URL file://${PACKAGE_DIR}/${EMBREE_FILE}
  DOWNLOAD_DIR ${DOWNLOAD_DIR}
  URL_HASH ${EMBREE_HASH_TYPE}=${EMBREE_HASH}
+  CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR}
  PREFIX ${BUILD_DIR}/embree
  PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/embree/src/external_embree < ${PATCH_DIR}/embree.diff
-  CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/embree ${DEFAULT_CMAKE_FLAGS} ${EMBREE_EXTRA_ARGS}
+  CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/embree ${EMBREE_CMAKE_FLAGS} ${EMBREE_EXTRA_ARGS}
  INSTALL_DIR ${LIBDIR}/embree
 )

-add_dependencies(
-  external_embree
-  external_tbb
-)
+if(NOT APPLE)
+  add_dependencies(
+    external_embree
+    external_tbb
+    external_dpcpp
+  )
+else()
+  add_dependencies(
+    external_embree
+    external_tbb
+  )
+endif()

 if(WIN32)
  if(BUILD_MODE STREQUAL Release)
@@ -66,6 +114,7 @@ if(WIN32)
    ExternalProject_Add_Step(external_embree after_install
      COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/bin/embree4_d.dll ${HARVEST_TARGET}/embree/bin/embree4_d.dll
      COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_d.lib ${HARVEST_TARGET}/embree/lib/embree4_d.lib
+      COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_sycl_d.lib ${HARVEST_TARGET}/embree/lib/embree4_sycl_d.lib
      DEPENDEES install
    )
  endif()
--- a/build_files/build_environment/cmake/options.cmake
+++ b/build_files/build_environment/cmake/options.cmake
@@ -74,6 +74,27 @@ if(WIN32)
  set(BLENDER_CMAKE_CXX_FLAGS_RELEASE "/MD ${COMMON_MSVC_FLAGS} /D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS /O2 /Ob2 /D NDEBUG /D PLATFORM_WINDOWS /DPSAPI_VERSION=2 /DTINYFORMAT_ALLOW_WCHAR_STRINGS")
  set(BLENDER_CMAKE_CXX_FLAGS_RELWITHDEBINFO "/MD ${COMMON_MSVC_FLAGS} /D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS /Zi /O2 /Ob1 /D NDEBUG /D PLATFORM_WINDOWS /DPSAPI_VERSION=2 /DTINYFORMAT_ALLOW_WCHAR_STRINGS")

+  # Set similar flags for CLANG compilation.
+  set(COMMON_CLANG_FLAGS "-D_DLL -D_MT") # Equivalent to MSVC /MD
+
+  if(WITH_OPTIMIZED_DEBUG)
+    set(BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -O2 -D_DEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
+  else()
+    set(BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -g -D_DEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
+  endif()
+  set(BLENDER_CLANG_CMAKE_C_FLAGS_MINSIZEREL "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -Os -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
+  set(BLENDER_CLANG_CMAKE_C_FLAGS_RELEASE "${COMMON_CLANG_FLAGS}  -Xclang --dependent-lib=msvcrt -O2 -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
+  set(BLENDER_CLANG_CMAKE_C_FLAGS_RELWITHDEBINFO "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -g -O2 -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
+
+  if(WITH_OPTIMIZED_DEBUG)
+    set(BLENDER_CLANG_CMAKE_CXX_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -D_DEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS -DBOOST_DEBUG_PYTHON -DBOOST_ALL_NO_LIB")
+  else()
+    set(BLENDER_CLANG_CMAKE_CXX_FLAGS_DEBUG "${COMMON_CLANG_FLAG} -Xclang --dependent-lib=msvcrtd -D_DEBUG -DPLATFORM_WINDOWS -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -g -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS -DBOOST_DEBUG_PYTHON -DBOOST_ALL_NO_LIB")
+  endif()
+  set(BLENDER_CLANG_CMAKE_CXX_FLAGS_MINSIZEREL "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -DNDEBUG  -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
+  set(BLENDER_CLANG_CMAKE_CXX_FLAGS_RELEASE "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
+  set(BLENDER_CLANG_CMAKE_CXX_FLAGS_RELWITHDEBINFO "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -g -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
+
  set(PLATFORM_FLAGS)
  set(PLATFORM_CXX_FLAGS)
  set(PLATFORM_CMAKE_FLAGS)
--- a/build_files/build_environment/cmake/versions.cmake
+++ b/build_files/build_environment/cmake/versions.cmake
@@ -599,15 +599,15 @@ set(OPENPGL_HASH db63f5dac5cfa8c110ede241f0c413f00db0c4748697381c4fa23e0f9e82a75
 set(OPENPGL_HASH_TYPE SHA256)
 set(OPENPGL_FILE openpgl-${OPENPGL_VERSION}.tar.gz)

-set(LEVEL_ZERO_VERSION v1.8.5)
+set(LEVEL_ZERO_VERSION v1.8.8)
 set(LEVEL_ZERO_URI https://github.com/oneapi-src/level-zero/archive/refs/tags/${LEVEL_ZERO_VERSION}.tar.gz)
-set(LEVEL_ZERO_HASH b6e9663bbcc53c148d32376998298bec6f7c434ef2218c61fa708963e3a09394)
+set(LEVEL_ZERO_HASH 3553ae8fa0d2d69c4210a8f3428bd6612bd8bb8a627faf52c3658a01851e66d2)
 set(LEVEL_ZERO_HASH_TYPE SHA256)
 set(LEVEL_ZERO_FILE level-zero-${LEVEL_ZERO_VERSION}.tar.gz)

-set(DPCPP_VERSION 20221019)
-set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/sycl-nightly/${DPCPP_VERSION}.tar.gz)
-set(DPCPP_HASH 2f533946e91ce3829431758ea17b0b834b960c1a796e9e4563c86e03eb9603a2)
+set(DPCPP_VERSION 2022-12)
+set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/${DPCPP_VERSION}.tar.gz)
+set(DPCPP_HASH 13151d5ae79f7c9c4a9b072a0c486ae7b3c4993e301bb1268c92214451025790)
 set(DPCPP_HASH_TYPE SHA256)
 set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)

@@ -620,9 +620,9 @@ set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
 # will take care of building them, unpack is being done in dpcpp_deps.cmake

 # Source llvm/lib/SYCLLowerIR/CMakeLists.txt
-set(VCINTRINSICS_VERSION abce9184b7a3a7fe1b02289b9285610d9dc45465)
+set(VCINTRINSICS_VERSION 782fbf7301dc73acaa049a4324c976ad94f587f7)
 set(VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/${VCINTRINSICS_VERSION}.tar.gz)
-set(VCINTRINSICS_HASH 3e9fd471246b87633b26f7e15e17ab7733d357458c53d5c5881c03929d6c551f)
+set(VCINTRINSICS_HASH f4c0ccad8c1f77760364c551c65e8e1cf194d058889fa46d3b1b2d19ec4dc33f)
 set(VCINTRINSICS_HASH_TYPE SHA256)
 set(VCINTRINSICS_FILE vc-intrinsics-${VCINTRINSICS_VERSION}.tar.gz)

@@ -657,6 +657,13 @@ set(SPIRV_HEADERS_HASH ec8ecb471a62672697846c436501638ab25447ae9d4a6761e0bfe8a9a
 set(SPIRV_HEADERS_HASH_TYPE SHA256)
 set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)

+# Source llvm/sycl/plugins/unified_runtime/CMakeLists.txt
+set(UNIFIED_RUNTIME_VERSION fd711c920acc4434cb52ff18b078c082d9d7f44d)
+set(UNIFIED_RUNTIME_URI https://github.com/oneapi-src/unified-runtime/archive/${UNIFIED_RUNTIME_VERSION}.tar.gz)
+set(UNIFIED_RUNTIME_HASH 535ca2ee78f68c5e7e62b10f1bbabd909179488885566e6d9b1fc50e8a1be65f)
+set(UNIFIED_RUNTIME_HASH_TYPE SHA256)
+set(UNIFIED_RUNTIME_FILE unified-runtime-${UNIFIED_RUNTIME_VERSION}.tar.gz)
+
 ######################
 ### DPCPP DEPS END ###
 ######################
@@ -730,9 +737,9 @@ set(GMMLIB_HASH c1f33e1519edfc527127baeb0436b783430dfd256c643130169a3a71dc86aff9
 set(GMMLIB_HASH_TYPE SHA256)
 set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)

-set(OCLOC_VERSION 22.49.25018.21)
+set(OCLOC_VERSION 23.05.25593.18)
 set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
-set(OCLOC_HASH 92362dae08b503a34e5d3820ed284198c452bcd5e7504d90eb69887b20492c06)
+set(OCLOC_HASH 122415028e631922ae999c996954dfd98ce9a32decd564d5484c31476ec9306e)
 set(OCLOC_HASH_TYPE SHA256)
 set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)

--- a/build_files/build_environment/dependencies.dot
+++ b/build_files/build_environment/dependencies.dot
@@ -14,6 +14,7 @@ graph[autosize = false, size = "25.7,8.3!", resolution = 300];
 	external_dpcpp -- external_mp11;
 	external_dpcpp -- external_level_zero;
 	external_dpcpp -- external_spirvheaders;
+	external_dpcpp -- external_unifiedruntime;
 	external_embree -- external_tbb;
 	external_ffmpeg -- external_zlib;
 	external_ffmpeg -- external_openjpeg;
--- a/build_files/build_environment/install_linux_packages.py
+++ b/build_files/build_environment/install_linux_packages.py
@@ -1628,7 +1628,7 @@ DISTRO_IDS_INSTALLERS = {
 def get_distro(settings):
    if settings.distro_id is not ...:
        settings.logger.info(f"Distribution identifier forced by user to {settings.distro_id}.")
-        return
+        return settings.distro_id
    import platform
    info = platform.freedesktop_os_release()
    ids = [info["ID"]]
--- a/build_files/build_environment/patches/dpcpp.diff
+++ b/build_files/build_environment/patches/dpcpp.diff
@@ -34,3 +34,156 @@ diff -Naur llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice
   libsycldevice-obj
   libsycldevice-spv)
 
+diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp
+index 17eeaafae194..09e6d2217aaa 100644
+--- a/sycl/source/detail/program_manager/program_manager.cpp
+++ b/sycl/source/detail/program_manager/program_manager.cpp
+@@ -1647,46 +1647,120 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState(
+   }
+   assert(BinImages.size() > 0 && "Expected to find at least one device image");
+ 
+  // Ignore images with incompatible state. Image is considered compatible
+  // with a target state if an image is already in the target state or can
+  // be brought to target state by compiling/linking/building.
+  //
+  // Example: an image in "executable" state is not compatible with
+  // "input" target state - there is no operation to convert the image it
+  // to "input" state. An image in "input" state is compatible with
+  // "executable" target state because it can be built to get into
+  // "executable" state.
+  for (auto It = BinImages.begin(); It != BinImages.end();) {
+    if (getBinImageState(*It) > TargetState)
+      It = BinImages.erase(It);
+    else
+      ++It;
+  }
+
+   std::vector<device_image_plain> SYCLDeviceImages;
+-  for (RTDeviceBinaryImage *BinImage : BinImages) {
+-    const bundle_state ImgState = getBinImageState(BinImage);
+-
+-    // Ignore images with incompatible state. Image is considered compatible
+-    // with a target state if an image is already in the target state or can
+-    // be brought to target state by compiling/linking/building.
+-    //
+-    // Example: an image in "executable" state is not compatible with
+-    // "input" target state - there is no operation to convert the image it
+-    // to "input" state. An image in "input" state is compatible with
+-    // "executable" target state because it can be built to get into
+-    // "executable" state.
+-    if (ImgState > TargetState)
+-      continue;
+ 
+-    for (const sycl::device &Dev : Devs) {
+  // If a non-input state is requested, we can filter out some compatible
+  // images and return only those with the highest compatible state for each
+  // device-kernel pair. This map tracks how many kernel-device pairs need each
+  // image, so that any unneeded ones are skipped.
+  // TODO this has no effect if the requested state is input, consider having
+  // a separate branch for that case to avoid unnecessary tracking work.
+  struct DeviceBinaryImageInfo {
+    std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
+    bundle_state State = bundle_state::input;
+    int RequirementCounter = 0;
+  };
+  std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
+
+  for (const sycl::device &Dev : Devs) {
+    // Track the highest image state for each requested kernel.
+    using StateImagesPairT =
+        std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
+    using KernelImageMapT =
+        std::map<kernel_id, StateImagesPairT, LessByNameComp>;
+    KernelImageMapT KernelImageMap;
+    if (!KernelIDs.empty())
+      for (const kernel_id &KernelID : KernelIDs)
+        KernelImageMap.insert({KernelID, {}});
+
+    for (RTDeviceBinaryImage *BinImage : BinImages) {
+       if (!compatibleWithDevice(BinImage, Dev) ||
+           !doesDevSupportImgAspects(Dev, *BinImage))
+         continue;
+ 
+-      std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
+-      // Collect kernel names for the image
+-      {
+-        std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
+-        KernelIDs = m_BinImg2KernelIDs[BinImage];
+-        // If the image does not contain any non-service kernels we can skip it.
+-        if (!KernelIDs || KernelIDs->empty())
+-          continue;
+      auto InsertRes = ImageInfoMap.insert({BinImage, {}});
+      DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
+      if (InsertRes.second) {
+        ImgInfo.State = getBinImageState(BinImage);
+        // Collect kernel names for the image
+        {
+          std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
+          ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
+        }
+       }
+      const bundle_state ImgState = ImgInfo.State;
+      const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
+          ImgInfo.KernelIDs;
+      int &ImgRequirementCounter = ImgInfo.RequirementCounter;
+ 
+-      DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
+-          BinImage, Ctx, Devs, ImgState, KernelIDs, /*PIProgram=*/nullptr);
+      // If the image does not contain any non-service kernels we can skip it.
+      if (!ImageKernelIDs || ImageKernelIDs->empty())
+        continue;
+ 
+-      SYCLDeviceImages.push_back(
+-          createSyclObjFromImpl<device_image_plain>(Impl));
+-      break;
+      // Update tracked information.
+      for (kernel_id &KernelID : *ImageKernelIDs) {
+        StateImagesPairT *StateImagesPair;
+        // If only specific kernels are requested, ignore the rest.
+        if (!KernelIDs.empty()) {
+          auto It = KernelImageMap.find(KernelID);
+          if (It == KernelImageMap.end())
+            continue;
+          StateImagesPair = &It->second;
+        } else
+          StateImagesPair = &KernelImageMap[KernelID];
+
+        auto &[KernelImagesState, KernelImages] = *StateImagesPair;
+
+        if (KernelImages.empty()) {
+          KernelImagesState = ImgState;
+          KernelImages.push_back(BinImage);
+          ++ImgRequirementCounter;
+        } else if (KernelImagesState < ImgState) {
+          for (RTDeviceBinaryImage *Img : KernelImages) {
+            auto It = ImageInfoMap.find(Img);
+            assert(It != ImageInfoMap.end());
+            assert(It->second.RequirementCounter > 0);
+            --(It->second.RequirementCounter);
+          }
+          KernelImages.clear();
+          KernelImages.push_back(BinImage);
+          KernelImagesState = ImgState;
+          ++ImgRequirementCounter;
+        } else if (KernelImagesState == ImgState) {
+          KernelImages.push_back(BinImage);
+          ++ImgRequirementCounter;
+        }
+      }
+     }
+   }
+ 
+  for (const auto &ImgInfoPair : ImageInfoMap) {
+    if (ImgInfoPair.second.RequirementCounter == 0)
+      continue;
+
+    DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
+        ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
+        ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr);
+
+    SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
+  }
+
+   return SYCLDeviceImages;
+ }
+ 
--- a/build_files/build_environment/patches/embree.diff
+++ b/build_files/build_environment/patches/embree.diff
@@ -149,3 +149,19 @@ index 074f910a2..30f490818 100644
         return is_hit_first | is_hit_second;
       }
     };
+diff -ruN a/kernels/sycl/rthwif_embree_builder.cpp b/kernels/sycl/rthwif_embree_builder.cpp
+--- a/kernels/sycl/rthwif_embree_builder.cpp    2023-03-28 17:23:06.429190200 +0200
+++ b/kernels/sycl/rthwif_embree_builder.cpp    2023-03-28 17:35:01.291938600 +0200
+@@ -540,7 +540,12 @@
+       assert(offset <= geomDescrData.size());
+     }
+
+    /* Force running BVH building sequentially from the calling thread if using TBB < 2021, as it otherwise leads to runtime issues. */
+#if TBB_VERSION_MAJOR<2021
+    RTHWIF_PARALLEL_OPERATION parallelOperation = nullptr;
+#else
+     RTHWIF_PARALLEL_OPERATION parallelOperation = rthwifNewParallelOperation();
+#endif
+
+     /* estimate static accel size */
+     BBox1f time_range(0,1);
--- a/build_files/cmake/Modules/FindHIP.cmake
+++ b/build_files/cmake/Modules/FindHIP.cmake
@@ -37,18 +37,24 @@ elseif(HIP_HIPCC_EXECUTABLE)
  set(HIP_VERSION_MINOR 0)
  set(HIP_VERSION_PATCH 0)

+  if(WIN32)
+    set(_hipcc_executable ${HIP_HIPCC_EXECUTABLE}.bat)
+  else()
+    set(_hipcc_executable ${HIP_HIPCC_EXECUTABLE})
+  endif()
+
  # Get version from the output.
-  execute_process(COMMAND ${HIP_HIPCC_EXECUTABLE} --version
-                  OUTPUT_VARIABLE HIP_VERSION_RAW
+  execute_process(COMMAND ${_hipcc_executable} --version
+                  OUTPUT_VARIABLE _hip_version_raw
                  ERROR_QUIET
                  OUTPUT_STRIP_TRAILING_WHITESPACE)

  # Parse parts.
-  if(HIP_VERSION_RAW MATCHES "HIP version: .*")
+  if(_hip_version_raw MATCHES "HIP version: .*")
    # Strip the HIP prefix and get list of individual version components.
    string(REGEX REPLACE
           ".*HIP version: ([.0-9]+).*" "\\1"
-           HIP_SEMANTIC_VERSION "${HIP_VERSION_RAW}")
+           HIP_SEMANTIC_VERSION "${_hip_version_raw}")
    string(REPLACE "." ";" HIP_VERSION_PARTS "${HIP_SEMANTIC_VERSION}")
    list(LENGTH HIP_VERSION_PARTS NUM_HIP_VERSION_PARTS)

@@ -71,7 +77,13 @@ elseif(HIP_HIPCC_EXECUTABLE)

  # Construct full semantic version.
  set(HIP_VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_PATCH}")
-  unset(HIP_VERSION_RAW)
+  unset(_hip_version_raw)
+  unset(_hipcc_executable)
 else()
  set(HIP_FOUND FALSE)
 endif()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(HIP
+    REQUIRED_VARS HIP_HIPCC_EXECUTABLE
+    VERSION_VAR HIP_VERSION)
--- a/build_files/cmake/Modules/FindSYCL.cmake
+++ b/build_files/cmake/Modules/FindSYCL.cmake
@@ -108,7 +108,11 @@ FIND_PACKAGE_HANDLE_STANDARD_ARGS(SYCL

 IF(SYCL_FOUND)
  SET(SYCL_INCLUDE_DIR ${SYCL_INCLUDE_DIR} ${SYCL_INCLUDE_DIR}/sycl)
-  SET(SYCL_LIBRARIES ${SYCL_LIBRARY})
+  IF(WIN32 AND SYCL_LIBRARY_DEBUG)
+    SET(SYCL_LIBRARIES optimized ${SYCL_LIBRARY} debug ${SYCL_LIBRARY_DEBUG})
+  ELSE()
+    SET(SYCL_LIBRARIES ${SYCL_LIBRARY})
+  ENDIF()
 ELSE()
  SET(SYCL_SYCL_FOUND FALSE)
 ENDIF()
--- a/build_files/cmake/config/blender_full.cmake
+++ b/build_files/cmake/config/blender_full.cmake
@@ -26,11 +26,8 @@ set(WITH_HARU                ON  CACHE BOOL "" FORCE)
 set(WITH_IK_ITASC            ON  CACHE BOOL "" FORCE)
 set(WITH_IK_SOLVER           ON  CACHE BOOL "" FORCE)
 set(WITH_IMAGE_CINEON        ON  CACHE BOOL "" FORCE)
-set(WITH_IMAGE_DDS           ON  CACHE BOOL "" FORCE)
-set(WITH_IMAGE_HDR           ON  CACHE BOOL "" FORCE)
 set(WITH_IMAGE_OPENEXR       ON  CACHE BOOL "" FORCE)
 set(WITH_IMAGE_OPENJPEG      ON  CACHE BOOL "" FORCE)
-set(WITH_IMAGE_TIFF          ON  CACHE BOOL "" FORCE)
 set(WITH_IMAGE_WEBP          ON  CACHE BOOL "" FORCE)
 set(WITH_INPUT_NDOF          ON  CACHE BOOL "" FORCE)
 set(WITH_INPUT_IME           ON  CACHE BOOL "" FORCE)
--- a/build_files/cmake/config/blender_lite.cmake
+++ b/build_files/cmake/config/blender_lite.cmake
@@ -27,11 +27,8 @@ set(WITH_HARU                OFF CACHE BOOL "" FORCE)
 set(WITH_IK_ITASC            OFF CACHE BOOL "" FORCE)
 set(WITH_IK_SOLVER           OFF CACHE BOOL "" FORCE)
 set(WITH_IMAGE_CINEON        OFF CACHE BOOL "" FORCE)
-set(WITH_IMAGE_DDS           OFF CACHE BOOL "" FORCE)
-set(WITH_IMAGE_HDR           OFF CACHE BOOL "" FORCE)
 set(WITH_IMAGE_OPENEXR       OFF CACHE BOOL "" FORCE)
 set(WITH_IMAGE_OPENJPEG      OFF CACHE BOOL "" FORCE)
-set(WITH_IMAGE_TIFF          OFF CACHE BOOL "" FORCE)
 set(WITH_IMAGE_WEBP          OFF CACHE BOOL "" FORCE)
 set(WITH_INPUT_IME           OFF CACHE BOOL "" FORCE)
 set(WITH_INPUT_NDOF          OFF CACHE BOOL "" FORCE)
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -27,11 +27,8 @@ set(WITH_HARU                ON  CACHE BOOL "" FORCE)
 set(WITH_IK_ITASC            ON  CACHE BOOL "" FORCE)
 set(WITH_IK_SOLVER           ON  CACHE BOOL "" FORCE)
 set(WITH_IMAGE_CINEON        ON  CACHE BOOL "" FORCE)
-set(WITH_IMAGE_DDS           ON  CACHE BOOL "" FORCE)
-set(WITH_IMAGE_HDR           ON  CACHE BOOL "" FORCE)
 set(WITH_IMAGE_OPENEXR       ON  CACHE BOOL "" FORCE)
 set(WITH_IMAGE_OPENJPEG      ON  CACHE BOOL "" FORCE)
-set(WITH_IMAGE_TIFF          ON  CACHE BOOL "" FORCE)
 set(WITH_IMAGE_WEBP          ON  CACHE BOOL "" FORCE)
 set(WITH_INPUT_NDOF          ON  CACHE BOOL "" FORCE)
 set(WITH_INPUT_IME           ON  CACHE BOOL "" FORCE)
@@ -85,7 +82,7 @@ if(NOT APPLE)
  set(WITH_CYCLES_DEVICE_OPTIX    ON  CACHE BOOL "" FORCE)
  set(WITH_CYCLES_CUDA_BINARIES   ON  CACHE BOOL "" FORCE)
  set(WITH_CYCLES_CUBIN_COMPILER  OFF CACHE BOOL "" FORCE)
-  set(WITH_CYCLES_HIP_BINARIES    OFF CACHE BOOL "" FORCE)
+  set(WITH_CYCLES_HIP_BINARIES    ON  CACHE BOOL "" FORCE)
  set(WITH_CYCLES_DEVICE_ONEAPI   ON  CACHE BOOL "" FORCE)
  set(WITH_CYCLES_ONEAPI_BINARIES ON  CACHE BOOL "" FORCE)
 endif()
--- a/build_files/cmake/macros.cmake
+++ b/build_files/cmake/macros.cmake
@@ -1384,4 +1384,3 @@ macro(windows_process_platform_bundled_libraries library_deps)
    endforeach()
  endif()
 endmacro()
-
--- a/build_files/cmake/platform/platform_apple.cmake
+++ b/build_files/cmake/platform/platform_apple.cmake
@@ -174,7 +174,7 @@ if(SYSTEMSTUBS_LIBRARY)
  list(APPEND PLATFORM_LINKLIBS SystemStubs)
 endif()

-string(APPEND PLATFORM_CFLAGS " -pipe -funsigned-char -fno-strict-aliasing")
+string(APPEND PLATFORM_CFLAGS " -pipe -funsigned-char -fno-strict-aliasing -ffp-contract=off")
 set(PLATFORM_LINKFLAGS
  "-fexceptions -framework CoreServices -framework Foundation -framework IOKit -framework AppKit -framework Cocoa -framework Carbon -framework AudioUnit -framework AudioToolbox -framework CoreAudio -framework Metal -framework QuartzCore"
 )
@@ -221,10 +221,8 @@ find_package(PNG REQUIRED)
 set(JPEG_ROOT ${LIBDIR}/jpeg)
 find_package(JPEG REQUIRED)

-if(WITH_IMAGE_TIFF)
-  set(TIFF_ROOT ${LIBDIR}/tiff)
-  find_package(TIFF REQUIRED)
-endif()
+set(TIFF_ROOT ${LIBDIR}/tiff)
+find_package(TIFF REQUIRED)

 if(WITH_IMAGE_WEBP)
  set(WEBP_ROOT_DIR ${LIBDIR}/webp)
@@ -453,6 +451,31 @@ if(WITH_COMPILER_CCACHE)
  endif()
 endif()

+unset(_custom_LINKER_FUSE_FLAG)
+if(WITH_LINKER_LLD)
+  find_program(LLD_PROGRAM ld.lld)
+  if(LLD_PROGRAM)
+    set(_custom_LINKER_FUSE_FLAG "-fuse-ld=lld")
+  else()
+    message(WARNING "LLD linker NOT found, disabling WITH_LINKER_LLD")
+    set(WITH_LINKER_LLD OFF)
+  endif()
+endif()
+if(WITH_LINKER_MOLD)
+  find_program(MOLD_PROGRAM mold)
+  if(MOLD_PROGRAM)
+    set(_custom_LINKER_FUSE_FLAG "-fuse-ld=mold")
+  else()
+    message(WARNING "Mold linker NOT found, disabling WITH_LINKER_MOLD")
+    set(WITH_LINKER_MOLD OFF)
+  endif()
+endif()
+
+if(_custom_LINKER_FUSE_FLAG)
+  add_link_options(${_custom_LINKER_FUSE_FLAG})
+endif()
+
+
 if(WITH_COMPILER_ASAN)
  list(APPEND PLATFORM_BUNDLED_LIBRARIES ${COMPILER_ASAN_LIBRARY})
 endif()
--- a/build_files/cmake/platform/platform_unix.cmake
+++ b/build_files/cmake/platform/platform_unix.cmake
@@ -109,6 +109,10 @@ find_package_wrapper(ZLIB REQUIRED)
 find_package_wrapper(Zstd REQUIRED)
 find_package_wrapper(Epoxy REQUIRED)

+# XXX Linking errors with debian static tiff :/
+# find_package_wrapper(TIFF REQUIRED)
+find_package(TIFF)
+
 if(WITH_VULKAN_BACKEND)
  find_package_wrapper(Vulkan REQUIRED)
  find_package_wrapper(ShaderC REQUIRED)
@@ -190,13 +194,6 @@ if(WITH_IMAGE_OPENJPEG)
  set_and_warn_library_found("OpenJPEG" OPENJPEG_FOUND WITH_IMAGE_OPENJPEG)
 endif()

-if(WITH_IMAGE_TIFF)
-  # XXX Linking errors with debian static tiff :/
-#       find_package_wrapper(TIFF)
-  find_package(TIFF)
-  set_and_warn_library_found("TIFF" TIFF_FOUND WITH_IMAGE_TIFF)
-endif()
-
 if(WITH_OPENAL)
  find_package_wrapper(OpenAL)
  set_and_warn_library_found("OpenAL" OPENAL_FOUND WITH_OPENAL)
@@ -806,8 +803,7 @@ if(CMAKE_COMPILER_IS_GNUCC)
  # Automatically turned on when building with "-march=native". This is
  # explicitly turned off here as it will make floating point math give a bit
  # different results. This will lead to automated test failures. So disable
-  # this until we support it. Seems to default to off in clang and the intel
-  # compiler.
+  # this until we support it.
  set(PLATFORM_CFLAGS "-pipe -fPIC -funsigned-char -fno-strict-aliasing -ffp-contract=off")

  # `maybe-uninitialized` is unreliable in release builds, but fine in debug builds.
@@ -818,64 +814,49 @@ if(CMAKE_COMPILER_IS_GNUCC)
  string(PREPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO "${GCC_EXTRA_FLAGS_RELEASE} ")
  unset(GCC_EXTRA_FLAGS_RELEASE)

-  # NOTE(@campbellbarton): Eventually mold will be able to use `-fuse-ld=mold`,
-  # however at the moment this only works for GCC 12.1+ (unreleased at time of writing).
-  # So a workaround is used here "-B" which points to another path to find system commands
-  # such as `ld`.
  if(WITH_LINKER_MOLD AND _IS_LINKER_DEFAULT)
    find_program(MOLD_BIN "mold")
    mark_as_advanced(MOLD_BIN)
+
    if(NOT MOLD_BIN)
      message(STATUS "The \"mold\" binary could not be found, using system linker.")
      set(WITH_LINKER_MOLD OFF)
+    elseif(CMAKE_C_COMPILER_VERSION VERSION_LESS 12.1)
+      message(STATUS "GCC 12.1 or newer is required for th MOLD linker.")
+      set(WITH_LINKER_MOLD OFF)
    else()
-      # By default mold installs the binary to:
-      # - `{PREFIX}/bin/mold` as well as a symbolic-link in...
-      # - `{PREFIX}/lib/mold/ld`.
-      # (where `PREFIX` is typically `/usr/`).
-      #
-      # This block of code finds `{PREFIX}/lib/mold` from the `mold` binary.
-      # Other methods of searching for the path could also be made to work,
-      # we could even make our own directory and symbolic-link, however it's more
-      # convenient to use the one provided by mold.
-      #
-      # Use the binary path to "mold", to find the common prefix which contains "lib/mold".
-      # The parent directory: e.g. `/usr/bin/mold` -> `/usr/bin/`.
-      get_filename_component(MOLD_PREFIX "${MOLD_BIN}" DIRECTORY)
-      # The common prefix path: e.g. `/usr/bin/` -> `/usr/` to use as a hint.
-      get_filename_component(MOLD_PREFIX "${MOLD_PREFIX}" DIRECTORY)
-      # Find `{PREFIX}/lib/mold/ld`, store the directory component (without the `ld`).
-      # Then pass `-B {PREFIX}/lib/mold` to GCC so the `ld` located there overrides the default.
-      find_path(
-        MOLD_BIN_DIR "ld"
-        HINTS "${MOLD_PREFIX}"
-        # The default path is `libexec`, Arch Linux for e.g.
-        # replaces this with `lib` so check both.
-        PATH_SUFFIXES "libexec/mold" "lib/mold" "lib64/mold"
-        NO_DEFAULT_PATH
-        NO_CACHE
+      get_filename_component(MOLD_BIN_DIR "${MOLD_BIN}" DIRECTORY)
+      # Check if the `-B` argument is required.
+      # This will happen when `MOLD_BIN` points to a non-standard location.
+      # Keep this option as mold is not yet a standard system component and
+      # users may have it installed in some unexpected place.
+      set(_mold_args "-fuse-ld=mold")
+      execute_process(
+        COMMAND ${CMAKE_C_COMPILER} -B ${MOLD_BIN_DIR} ${_mold_args} -Wl,--version
+        ERROR_QUIET OUTPUT_VARIABLE LD_VERSION_WITH_DIR
      )
-      if(NOT MOLD_BIN_DIR)
-        message(STATUS
-          "The mold linker could not find the directory containing the linker command "
-          "(typically "
-          "\"${MOLD_PREFIX}/libexec/mold/ld\") or "
-          "\"${MOLD_PREFIX}/lib/mold/ld\") using system linker."
-        )
-        set(WITH_LINKER_MOLD OFF)
+      execute_process(
+        COMMAND ${CMAKE_C_COMPILER} ${_mold_args} -Wl,--version
+        ERROR_QUIET OUTPUT_VARIABLE LD_VERSION
+      )
+      if(NOT (LD_VERSION STREQUAL LD_VERSION_WITH_DIR))
+        string(PREPEND _mold_args "-B \"${MOLD_BIN_DIR}\" ")
+        set(LD_VERSION "${LD_VERSION_WITH_DIR}")
      endif()
-      unset(MOLD_PREFIX)
-    endif()

-    if(WITH_LINKER_MOLD)
-      # GCC will search for `ld` in this directory first.
-      string(APPEND CMAKE_EXE_LINKER_FLAGS    " -B \"${MOLD_BIN_DIR}\"")
-      string(APPEND CMAKE_SHARED_LINKER_FLAGS " -B \"${MOLD_BIN_DIR}\"")
-      string(APPEND CMAKE_MODULE_LINKER_FLAGS " -B \"${MOLD_BIN_DIR}\"")
-      set(_IS_LINKER_DEFAULT OFF)
+      if("${LD_VERSION}" MATCHES "mold ")
+        string(APPEND CMAKE_EXE_LINKER_FLAGS    " ${_mold_args}")
+        string(APPEND CMAKE_SHARED_LINKER_FLAGS " ${_mold_args}")
+        string(APPEND CMAKE_MODULE_LINKER_FLAGS " ${_mold_args}")
+        set(_IS_LINKER_DEFAULT OFF)
+      else()
+        message(STATUS "GNU mold linker isn't available, using the default system linker.")
+      endif()
+      unset(_mold_args)
+      unset(MOLD_BIN_DIR)
+      unset(LD_VERSION)
    endif()
    unset(MOLD_BIN)
-    unset(MOLD_BIN_DIR)
  endif()

  if(WITH_LINKER_GOLD AND _IS_LINKER_DEFAULT)
@@ -910,7 +891,7 @@ if(CMAKE_COMPILER_IS_GNUCC)

 # CLang is the same as GCC for now.
 elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
-  set(PLATFORM_CFLAGS "-pipe -fPIC -funsigned-char -fno-strict-aliasing")
+  set(PLATFORM_CFLAGS "-pipe -fPIC -funsigned-char -fno-strict-aliasing -ffp-contract=off")

  if(WITH_LINKER_MOLD AND _IS_LINKER_DEFAULT)
    find_program(MOLD_BIN "mold")
--- a/build_files/cmake/platform/platform_win32.cmake
+++ b/build_files/cmake/platform/platform_win32.cmake
@@ -487,14 +487,12 @@ if(WITH_IMAGE_OPENEXR)
  endif()
 endif()

-if(WITH_IMAGE_TIFF)
-  # Try to find tiff first then complain and set static and maybe wrong paths
-  windows_find_package(TIFF)
-  if(NOT TIFF_FOUND)
-    warn_hardcoded_paths(libtiff)
-    set(TIFF_LIBRARY ${LIBDIR}/tiff/lib/libtiff.lib)
-    set(TIFF_INCLUDE_DIR ${LIBDIR}/tiff/include)
-  endif()
+# Try to find tiff first then complain and set static and maybe wrong paths
+windows_find_package(TIFF)
+if(NOT TIFF_FOUND)
+  warn_hardcoded_paths(libtiff)
+  set(TIFF_LIBRARY ${LIBDIR}/tiff/lib/libtiff.lib)
+  set(TIFF_INCLUDE_DIR ${LIBDIR}/tiff/include)
 endif()

 if(WITH_JACK)
@@ -1088,7 +1086,7 @@ if(WITH_CYCLES AND (WITH_CYCLES_DEVICE_ONEAPI OR (WITH_CYCLES_EMBREE AND EMBREE_
    ${SYCL_ROOT_DIR}/bin/sycl[0-9].dll
  )
  foreach(sycl_runtime_library IN LISTS _sycl_runtime_libraries_glob)
-    string(REPLACE ".dll" "_d.dll" sycl_runtime_library_debug ${sycl_runtime_library})
+    string(REPLACE ".dll" "d.dll" sycl_runtime_library_debug ${sycl_runtime_library})
    list(APPEND _sycl_runtime_libraries RELEASE ${sycl_runtime_library})
    list(APPEND _sycl_runtime_libraries DEBUG ${sycl_runtime_library_debug})
  endforeach()
--- a/build_files/config/pipeline_config.yaml
+++ b/build_files/config/pipeline_config.yaml
@@ -9,7 +9,7 @@ buildbot:
    cuda11:
        version: '11.4.1'
    hip:
-        version: '5.3.22480'
+        version: '5.5.30571'
    optix:
        version: '7.3.0'
    ocloc:
--- a/build_files/utils/make_update.py
+++ b/build_files/utils/make_update.py
@@ -489,7 +489,8 @@ if __name__ == "__main__":
        branch = f"blender-v{major}.{minor}-release"
        release_version: Optional[str] = f"{major}.{minor}"
    else:
-        branch = 'main'
+        # TODO !!!!! remove this before merge !!!!!
+        branch = 'asset-shelf'
        release_version = None

    if not args.no_libraries:
--- a/doc/manpage/blender.1.py
+++ b/doc/manpage/blender.1.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0-or-later

-'''
+"""
 This script generates the blender.1 man page, embedding the help text
 from the Blender executable itself. Invoke it as follows:

@@ -9,7 +9,7 @@ from the Blender executable itself. Invoke it as follows:

 where <path-to-blender> is the path to the Blender executable,
 and <output-filename> is where to write the generated man page.
-'''
+"""

 import argparse
 import os
@@ -87,29 +87,29 @@ def man_page_from_blender_help(fh: TextIO, blender_bin: str, verbose: bool) -> N
        (blender_info["date"], blender_info["version"].replace(".", "\\&."))
    )

-    fh.write(r'''
+    fh.write(r"""
 .SH NAME
-blender \- a full-featured 3D application''')
+blender \- a full-featured 3D application""")

-    fh.write(r'''
+    fh.write(r"""
 .SH SYNOPSIS
-.B blender [args ...] [file] [args ...]''')
+.B blender [args ...] [file] [args ...]""")

-    fh.write(r'''
+    fh.write(r"""
 .br
 .SH DESCRIPTION
 .PP
 .B blender
-is a full-featured 3D application. It supports the entirety of the 3D pipeline - '''
-             '''modeling, rigging, animation, simulation, rendering, compositing, motion tracking, and video editing.
+is a full-featured 3D application. It supports the entirety of the 3D pipeline - """
+             """modeling, rigging, animation, simulation, rendering, compositing, motion tracking, and video editing.

-Use Blender to create 3D images and animations, films and commercials, content for games, '''
-             r'''architectural and industrial visualizations, and scientific visualizations.
+Use Blender to create 3D images and animations, films and commercials, content for games, """
+             r"""architectural and industrial visualizations, and scientific visualizations.

-https://www.blender.org''')
+https://www.blender.org""")

-    fh.write(r'''
-.SH OPTIONS''')
+    fh.write(r"""
+.SH OPTIONS""")

    fh.write("\n\n")

@@ -152,7 +152,7 @@ https://www.blender.org''')

    # Footer Content.

-    fh.write(r'''
+    fh.write(r"""
 .br
 .SH SEE ALSO
 .B luxrender(1)
@@ -162,7 +162,7 @@ https://www.blender.org''')
 This manpage was written for a Debian GNU/Linux system by Daniel Mester
 <mester@uni-bremen.de> and updated by Cyril Brulebois
 <cyril.brulebois@enst-bretagne.fr> and Dan Eicher <dan@trollwerks.org>.
-''')
+""")


 def create_argparse() -> argparse.ArgumentParser:
--- a/extern/audaspace/AUTHORS
+++ b/extern/audaspace/AUTHORS
@@ -14,3 +14,13 @@ The first three of them were employed by the Blender Foundation during that time
 Some features (random sounds, dynamic music, playback manager, convolution and HRTFs support) were added as part of the VALS (Virtual Alliances for Learning Society) project by

 - Juan Francisco Crespo Galán <dethon_5@outlook.com>
+
+The Equalizer sound effect has been added by
+
+- Marcos Perez
+
+Several people provided fixes:
+
+- Aaron Carlisle
+- Sebastian Parborg
+- Leon Zandman
--- a/extern/audaspace/CHANGES
+++ b/extern/audaspace/CHANGES
@@ -1,3 +1,104 @@
+Audaspace 1.4
+
+- Support for OS specific/native audio devices/backends has been added, that is PulseAudio (Linux), WASAPI (Windows) and CoreAudio (MacOS).
+- New sound effects have been added, namely Modulator and Equalizer. Thanks to Marcos Perez for contributing the Equalizer.
+- File stream info: if an audio file contains multiple streams you can choose which one to process instead of taking the first one (this feature is only supported by ffmpeg, not libsndfile).
+- API Change: double instead of float for time values for more precise timing control.
+- There have been lots of bugfixes, which are basically the majority of all changes.
+- And some other minor improvements were implemented as well.
+
+Detailed list of changes:
+
+d4042d9 Port changes in Blender to upstream.
+b60fb45 Equalizer
+ab04e84 Fixes
+8f0c305 Fix build error with MSVC 17.4+ ported from Blender.
+ce44342 Minor documentation update.
+cdcb3f4 Migrate from distuils to setuptools for python module.
+21eccef Fix FindFFTW to find the float version fftw3f.
+ab15e2f Bugfix: API change in new ffmpeg version.
+a097be8 Clang format file added, valid from now on.
+2fc9fb7 Porting bugfix from Blender upstream.
+bb655b7 Bugfix: wrong sample size computation for PulseAudio.
+a150495 Bugfix: Buffer did not support buffers > 2 GB.
+034645c Update for ffmpeg 5.
+932739c Bugfix: WASAPI hangs.
+4fcd47c WASAPI: fix bug when switching the default device while there is no playback.
+a16fbd2 Python API: fix to get convolution in the python API.
+27ac5c1 WASAPI: always switch to default audio device.
+1b03e6c Bugfix: catch exception if file cannot be read.
+369ff6e PulseAudio: remove unused underflow callback.
+2d8bf3a PulseAudio: improve synchronization accuracy.
+4868e14 Revert PulseAudioDevice back from ThreadedDevice to threaded mainloop.
+6a04446 Adding a jack style mixing thread with a ring buffer for pulseaudio.
+5d4b57b Implement RingBuffer class.
+e02d3aa FFMPEG: fix seeking and duration calculation.
+07b9fa0 Adding file stream functionality.
+5a8ad27 Porting changes from Blender.
+fa47258 Bugfix: PulseAudio writing to little data on request.
+ca3edb5 PulseAudio: increase buffersize.
+6d36f3e Pulseaudio: may fix crackling playback start bug.
+dbeac4b WASAPI: reinitialize device when lost.
+0cba4d3 Bugfix Pulseaudio: might hang.
+b73dc6d Bugfix: ffmpeg 4.4 requires channels to be set.
+f1ecbe0 Fixed typo.
+09e4f27 Rewrite PulseAudioDevice to use ThreadedDevice.
+9516924 WASAPI: refactor to simplify and use ThreadedDevice.
+749c974 Add ThreadedDevice.
+e68b355 Fix some locks in SoftwareDevice.
+44b57af Bugfix for deadlock in WASAPIDevice.
+8c4b266 CMake: fix ERROR to FATAL_ERROR in MESSAGE.
+5a17338 Rename NullDevice's reported name to None.
+cd138d7 PulseAudio: add dynamic loading and threading fixes.
+6e0250f CoreAudio: add CoreAudioClock as synchronizer.
+43aff35 Fix leakage in CoreAudioDevice.cpp
+7f6f059 Remove unnecessary cmake code.
+a5c1a02 Add CoreAudio device for Apple.
+af96f67 Indentation fix.
+eec8fd5 WASAPI: use padding also for first buffer submission.
+c63bd9b WASAPI: deal with IsFormatSupported case.
+079cccb Hide WITH_PULSEAUDIO when not on Linux.
+67b5013 Add mingw64 cross compilation toolchain on Linux.
+a7bfa58 Add WASAPI backend for Windows.
+be1cb25 Allow AUD_init with nullptr to use default device.
+fc68868 Compilation fix for mingw.
+bb79d25 Add a PulseAudio output device.
+a11f593 Bugfix for unwanted volume fading at the beginning of sounds.
+8510acf Bugfix: more accurate positioning of sequences.
+cb816c1 Fix API docs for python playback manager play function
+d125fa2 Add callback for mixing down audio.
+789832e Fix numpy import.
+9a6a802 Bugfix: JOS resampling type bugfix caused integer underflow.
+ece0842 Fix corrupted document in python bindings
+28b2ea2 SDL 2: support more audio formats.
+a39b7e3 Trying to fix Travis CI build for OS X.
+c924007 Some more changes of times from flaot to double.
+659afd4 Porting fixes from blender.
+8e5e2e6 Fix documentation warnings.
+7a6054f API: All times are now double instead of float.
+452a724 Mixer sample buffer added channels twice
+8ddb6c1 Docs: Cleanup Line Wrapping for python examples
+a0c37b2 Docs: Use class methods for api docs
+2f8b2e3 Bugfix for invalid offsets provided by ffmpeg's seeking code.
+20a7a28 Bug fixes for files with more than 8 channels.
+94dc527 Bugfix: Fading from full volume.
+2fb9862 Fix: Missing include in FileManager.h.
+afadb94 Minor CMakeLists.txt formatting fixes.
+734ef03 Add sample rate parameter to silence generation.
+ed50f3b Bugfix: Return correct length for modulator and superpose.
+cb7a314 Adding a modulator sound effect.
+101c714 Bugfix: don't add non-existing devices to the device manager.
+7ad99df OpenAL: recreate device if disconnected.
+a2ff4e8 Bugfix: memory leak in python API.
+5fb21bb Silence some warnings.
+9b38605 Some fixes backported from Blender.
+40a0a34 Udpate for travis.ci.
+212b4b6 Support newer ffmpeg versions.
+d27746c Build option: configure whether to build versioned plugins.
+19c8d9f Make fftw3 optional.
+aa11968 Bugfix for building with gcc7.
+10413c5 Fix for seeking with modified pitch.
+
 Audaspace 1.3
 =============

@@ -10,6 +111,8 @@ Audaspace 1.3
 - filter python API parameter check
 - finding ffmpeg with pkgconfig

+ Detailed list of changes:
+
 64884a7 Windows fixes.
 53ba3e6 Implemented JACK dynamic loading.
 5ee0ee1 Continues last commit.
@@ -46,6 +149,8 @@ Audaspace 1.2
 - assuring numpy is installed
 - building the Python module on Mac OS X with CMake

+ Detailed list of changes:
+
 a6b6e70 Changing default sample rate from 44.1 to 48 kHz.
 20f0164 Bugfix: CMake custom command for python module on OS X.
 98679a2 Bugfix: using standard library (s)rand.
--- a/extern/audaspace/CMakeLists.txt
+++ b/extern/audaspace/CMakeLists.txt
@@ -23,7 +23,7 @@ endif()

 project(audaspace)

-set(AUDASPACE_VERSION 1.3)
+set(AUDASPACE_VERSION 1.4)
 set(AUDASPACE_LONG_VERSION ${AUDASPACE_VERSION}.0)

 if(DEFINED AUDASPACE_CMAKE_CFG)
--- a/extern/audaspace/INSTALL
+++ b/extern/audaspace/INSTALL
@@ -15,7 +15,7 @@ Audaspace is written in C++ 11 so a fairly recent compiler (g++ 4.8.2, clang 3.3
 - Jack (output device)
 - libsndfile (file access)
 - ffmpeg (file access)
- Python (language binding)
+- Python (language binding, needs NumPy as well)

 Getting the Code
 ----------------
--- a/extern/audaspace/README.md
+++ b/extern/audaspace/README.md
@@ -32,7 +32,7 @@ The following (probably incomplete) features are supported by audaspace:
 License
 -------

-> Copyright © 2009-2015 Jörg Müller. All rights reserved.
+> Copyright © 2009-2023 Jörg Müller. All rights reserved.
 >
 > Licensed under the Apache License, Version 2.0 (the "License");
 > you may not use this file except in compliance with the License.
--- a/extern/audaspace/bindings/C/AUD_Sequence.cpp
+++ b/extern/audaspace/bindings/C/AUD_Sequence.cpp
@@ -165,6 +165,12 @@ AUD_API void AUD_SequenceEntry_move(AUD_SequenceEntry* entry, double begin, doub
 	(*entry)->move(begin, end, skip);
 }

+AUD_API void AUD_SequenceEntry_setConstantRangeAnimationData(AUD_SequenceEntry* entry, AUD_AnimateablePropertyType type, int frame_start, int frame_end, float* data)
+{
+	AnimateableProperty* prop = (*entry)->getAnimProperty(static_cast<AnimateablePropertyType>(type));
+	prop->writeConstantRange(data, frame_start, frame_end);
+}
+
 AUD_API void AUD_SequenceEntry_setAnimationData(AUD_SequenceEntry* entry, AUD_AnimateablePropertyType type, int frame, float* data, char animated)
 {
 	AnimateableProperty* prop = (*entry)->getAnimProperty(static_cast<AnimateablePropertyType>(type));
--- a/extern/audaspace/bindings/C/AUD_Sequence.h
+++ b/extern/audaspace/bindings/C/AUD_Sequence.h
@@ -68,6 +68,16 @@ extern AUD_API void AUD_Sequence_remove(AUD_Sound* sequence, AUD_SequenceEntry*
 * Writes animation data to a sequence.
 * \param sequence The sound scene.
 * \param type The type of animation data.
+ * \param frame_start Start of the frame range.
+ * \param frame_end End of the frame range.
+ * \param data The data to write.
+ */
+AUD_API void AUD_SequenceEntry_setConstantRangeAnimationData(AUD_SequenceEntry* entry, AUD_AnimateablePropertyType type, int frame_start, int frame_end, float* data);
+
+/**
+ * Writes animation data to a sequenced entry.
+ * \param entry The sequenced entry.
+ * \param type The type of animation data.
 * \param frame The frame this data is for.
 * \param data The data to write.
 * \param animated Whether the attribute is animated.
--- a/extern/audaspace/include/sequence/AnimateableProperty.h
+++ b/extern/audaspace/include/sequence/AnimateableProperty.h
@@ -112,6 +112,14 @@ public:
 	 */
 	void write(const float* data, int position, int count);

+	/**
+	 * Fills the properties frame range with constant value and marks it animated.
+	 * \param data The new value.
+	 * \param position_start The start position in the animation in frames.
+	 * \param position_end The end position in the animation in frames.
+	 */
+	void writeConstantRange(const float* data, int position_start, int position_end);
+
 	/**
 	 * Reads the properties value.
 	 * \param position The position in the animation in frames.
--- a/extern/audaspace/include/sequence/SequenceData.h
+++ b/extern/audaspace/include/sequence/SequenceData.h
@@ -198,12 +198,13 @@ public:
 	/**
 	 * Adds a new entry to the scene.
 	 * \param sound The sound this entry should play.
+	 * \param sequence_data Reference to sequence_data. Mainly needed to get the FPS of the scene.
 	 * \param begin The start time.
 	 * \param end The end time or a negative value if determined by the sound.
 	 * \param skip How much seconds should be skipped at the beginning.
 	 * \return The entry added.
 	 */
-	std::shared_ptr<SequenceEntry> add(std::shared_ptr<ISound> sound, double begin, double end, double skip);
+	std::shared_ptr<SequenceEntry> add(std::shared_ptr<ISound> sound, std::shared_ptr<SequenceData> sequence_data, double begin, double end, double skip);

 	/**
 	 * Removes an entry from the scene.
--- a/extern/audaspace/include/sequence/SequenceEntry.h
+++ b/extern/audaspace/include/sequence/SequenceEntry.h
@@ -23,6 +23,7 @@
 */

 #include "sequence/AnimateableProperty.h"
+#include "sequence/SequenceData.h"
 #include "util/ILockable.h"

 #include <mutex>
@@ -63,6 +64,9 @@ private:
 	/// How many seconds are skipped at the beginning.
 	double m_skip;

+	/// reference to sequence_data. Mainly needed to get the FPS of the scene.
+	std::shared_ptr<SequenceData> m_sequence_data;
+
 	/// Whether the entry is muted.
 	bool m_muted;

@@ -122,9 +126,10 @@ public:
 	 * \param begin The start time.
 	 * \param end The end time or a negative value if determined by the sound.
 	 * \param skip How much seconds should be skipped at the beginning.
+	 * \param sequence_data Reference to sequence_data. Mainly needed to get the FPS of the scene.
 	 * \param id The ID of the entry.
 	 */
-	SequenceEntry(std::shared_ptr<ISound> sound, double begin, double end, double skip, int id);
+	SequenceEntry(std::shared_ptr<ISound> sound, double begin, double end, double skip, std::shared_ptr<SequenceData> sequence_data, int id);
 	virtual ~SequenceEntry();

 	/**
--- a/extern/audaspace/src/sequence/AnimateableProperty.cpp
+++ b/extern/audaspace/src/sequence/AnimateableProperty.cpp
@@ -65,6 +65,19 @@ void AnimateableProperty::write(const float* data)
 	std::memcpy(getBuffer(), data, m_count * sizeof(float));
 }

+void AnimateableProperty::writeConstantRange(const float* data, int position_start, int position_end)
+{
+	assureSize(position_end * m_count * sizeof(float), true);
+	float* buffer = getBuffer();
+
+	for(int i = position_start; i < position_end; i++)
+	{
+		std::memcpy(buffer + i * m_count, data, m_count * sizeof(float));
+	}
+
+	m_isAnimated = true;
+}
+
 void AnimateableProperty::write(const float* data, int position, int count)
 {
 	std::lock_guard<std::recursive_mutex> lock(m_mutex);
--- a/extern/audaspace/src/sequence/Sequence.cpp
+++ b/extern/audaspace/src/sequence/Sequence.cpp
@@ -92,7 +92,7 @@ AnimateableProperty* Sequence::getAnimProperty(AnimateablePropertyType type)

 std::shared_ptr<SequenceEntry> Sequence::add(std::shared_ptr<ISound> sound, double begin, double end, double skip)
 {
-	return m_sequence->add(sound, begin, end, skip);
+	return m_sequence->add(sound, m_sequence, begin, end, skip);
 }

 void Sequence::remove(std::shared_ptr<SequenceEntry> entry)
--- a/extern/audaspace/src/sequence/SequenceData.cpp
+++ b/extern/audaspace/src/sequence/SequenceData.cpp
@@ -149,11 +149,11 @@ AnimateableProperty* SequenceData::getAnimProperty(AnimateablePropertyType type)
 	}
 }

-std::shared_ptr<SequenceEntry> SequenceData::add(std::shared_ptr<ISound> sound, double begin, double end, double skip)
+std::shared_ptr<SequenceEntry> SequenceData::add(std::shared_ptr<ISound> sound, std::shared_ptr<SequenceData> sequence_data, double begin, double end, double skip)
 {
 	std::lock_guard<std::recursive_mutex> lock(m_mutex);

-	std::shared_ptr<SequenceEntry> entry = std::shared_ptr<SequenceEntry>(new SequenceEntry(sound, begin, end, skip, m_id++));
+	std::shared_ptr<SequenceEntry> entry = std::shared_ptr<SequenceEntry>(new SequenceEntry(sound, begin, end, skip, sequence_data, m_id++));

 	m_entries.push_back(entry);
 	m_entry_status++;
--- a/extern/audaspace/src/sequence/SequenceEntry.cpp
+++ b/extern/audaspace/src/sequence/SequenceEntry.cpp
@@ -22,7 +22,7 @@

 AUD_NAMESPACE_BEGIN

-SequenceEntry::SequenceEntry(std::shared_ptr<ISound> sound, double begin, double end, double skip, int id) :
+SequenceEntry::SequenceEntry(std::shared_ptr<ISound> sound, double begin, double end, double skip, std::shared_ptr<SequenceData> sequence_data, int id) :
 	m_status(0),
 	m_pos_status(1),
 	m_sound_status(0),
@@ -31,6 +31,7 @@ SequenceEntry::SequenceEntry(std::shared_ptr<ISound> sound, double begin, double
 	m_begin(begin),
 	m_end(end),
 	m_skip(skip),
+	m_sequence_data(sequence_data),
 	m_muted(false),
 	m_relative(true),
 	m_volume_max(1.0f),
--- a/extern/audaspace/src/sequence/SequenceHandle.cpp
+++ b/extern/audaspace/src/sequence/SequenceHandle.cpp
@@ -241,10 +241,38 @@ bool SequenceHandle::seek(double position)
 		return false;

 	std::lock_guard<ILockable> lock(*m_entry);
-	double seekpos = position - m_entry->m_begin;
-	if(seekpos < 0)
-		seekpos = 0;
-	seekpos += m_entry->m_skip;
+
+	double seek_frame = (position - m_entry->m_begin) * m_entry->m_sequence_data->getFPS();
+
+	if(seek_frame < 0)
+		seek_frame = 0;
+
+	seek_frame += m_entry->m_skip * m_entry->m_sequence_data->getFPS();
+
+	AnimateableProperty* pitch_property = m_entry->getAnimProperty(AP_PITCH);
+
+	double target_frame = 0;
+
+	if(pitch_property != nullptr)
+	{
+		int frame_start = (m_entry->m_begin - m_entry->m_skip) * m_entry->m_sequence_data->getFPS();
+
+		for(int i = 0; seek_frame > 0; i++)
+		{
+			float pitch;
+			pitch_property->read(frame_start + i, &pitch);
+			const double factor = seek_frame > 1.0 ? 1.0 : seek_frame;
+			target_frame += pitch * factor;
+			seek_frame--;
+		}
+	}
+	else
+	{
+		target_frame = seek_frame;
+	}
+
+	double seekpos = target_frame / m_entry->m_sequence_data->getFPS();
+
 	m_handle->setPitch(1.0f);
 	m_handle->seek(seekpos);

--- a/extern/mantaflow/preprocessed/plugin/advection.cpp
+++ b/extern/mantaflow/preprocessed/plugin/advection.cpp
@@ -572,7 +572,7 @@ template<class T> inline bool cmpMinMax(T &minv, T &maxv, const T &val)
 }
 template<> inline bool cmpMinMax<Vec3>(Vec3 &minv, Vec3 &maxv, const Vec3 &val)
 {
-  return (cmpMinMax(minv.x, maxv.x, val.x) | cmpMinMax(minv.y, maxv.y, val.y) |
+  return (cmpMinMax(minv.x, maxv.x, val.x) || cmpMinMax(minv.y, maxv.y, val.y) ||
          cmpMinMax(minv.z, maxv.z, val.z));
 }

--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -281,6 +281,9 @@ endif()

 if(WITH_CYCLES_EMBREE)
  add_definitions(-DWITH_EMBREE)
+  if(WITH_CYCLES_DEVICE_ONEAPI AND EMBREE_SYCL_SUPPORT)
+    add_definitions(-DWITH_EMBREE_GPU)
+  endif()
  add_definitions(-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION})
  include_directories(
    SYSTEM
--- a/intern/cycles/blender/addon/init.py
+++ b/intern/cycles/blender/addon/init.py
@@ -106,7 +106,7 @@ class CyclesRender(bpy.types.RenderEngine):
            from . import osl
            osl.update_script_node(node, self.report)
        else:
-            self.report({'ERROR'}, "OSL support disabled in this build.")
+            self.report({'ERROR'}, "OSL support disabled in this build")

    def update_render_passes(self, scene, srl):
        engine.register_passes(self, scene, srl)
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -172,6 +172,8 @@ def system_info():


 def list_render_passes(scene, srl):
+    import _cycles
+
    crl = srl.cycles

    # Combined pass.
@@ -250,6 +252,12 @@ def list_render_passes(scene, srl):
    for lightgroup in srl.lightgroups:
        yield ("Combined_%s" % lightgroup.name, "RGB", 'COLOR')

+    # Path guiding debug passes.
+    if _cycles.with_debug:
+        yield ("Guiding Color", "RGB", 'COLOR')
+        yield ("Guiding Probability", "X", 'VALUE')
+        yield ("Guiding Average Roughness", "X", 'VALUE')
+

 def register_passes(engine, scene, view_layer):
    for name, channelids, channeltype in list_render_passes(scene, view_layer):
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -1544,6 +1544,13 @@ class CyclesPreferences(bpy.types.AddonPreferences):
        default=False,
    )

+    use_oneapirt: BoolProperty(
+        name="Embree on GPU (Experimental)",
+        description="Embree GPU execution will allow to use hardware ray tracing on Intel GPUs, which will provide better performance. "
+                    "However this support is experimental and some scenes may render incorrectly",
+        default=False,
+    )
+
    kernel_optimization_level: EnumProperty(
        name="Kernel Optimization",
        description="Kernels can be optimized based on scene content. Optimized kernels are requested at the start of a render. "
@@ -1676,16 +1683,16 @@ class CyclesPreferences(bpy.types.AddonPreferences):
                col.label(text=iface_("and NVIDIA driver version %s or newer") % driver_version,
                          icon='BLANK1', translate=False)
            elif device_type == 'HIP':
-                if True:
-                    col.label(text="HIP temporarily disabled due to compiler bugs", icon='BLANK1')
-                else:
-                    import sys
-                    if sys.platform[:3] == "win":
-                        driver_version = "21.Q4"
-                        col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
-                        col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
-                                  icon='BLANK1', translate=False)
-                    elif sys.platform.startswith("linux"):
+                import sys
+                if sys.platform[:3] == "win":
+                    driver_version = "21.Q4"
+                    col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
+                    col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
+                              icon='BLANK1', translate=False)
+                elif sys.platform.startswith("linux"):
+                    if True:
+                        col.label(text="HIP temporarily disabled due to compiler bugs", icon='BLANK1')
+                    else:
                        driver_version = "22.10"
                        col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
                        col.label(text=iface_("and AMD driver version %s or newer") % driver_version, icon='BLANK1',
@@ -1763,6 +1770,11 @@ class CyclesPreferences(bpy.types.AddonPreferences):
                    col.prop(self, "kernel_optimization_level")
                col.prop(self, "use_metalrt")

+        if compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
+            row = layout.row()
+            row.use_property_split = True
+            row.prop(self, "use_oneapirt")
+
    def draw(self, context):
        self.draw_impl(self.layout, context)

--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -3,6 +3,7 @@
 from __future__ import annotations

 import bpy
+from bpy.app.translations import contexts as i18n_contexts
 from bpy_extras.node_utils import find_node_input
 from bl_ui.utils import PresetPanel

@@ -318,7 +319,7 @@ class CYCLES_RENDER_PT_sampling_path_guiding(CyclesButtonsPanel, Panel):

        col = layout.column(align=True)
        col.prop(cscene, "use_surface_guiding", text="Surface")
-        col.prop(cscene, "use_volume_guiding", text="Volume")
+        col.prop(cscene, "use_volume_guiding", text="Volume", text_ctxt=i18n_contexts.id_id)


 class CYCLES_RENDER_PT_sampling_path_guiding_debug(CyclesDebugButtonsPanel, Panel):
@@ -530,7 +531,7 @@ class CYCLES_RENDER_PT_light_paths_max_bounces(CyclesButtonsPanel, Panel):
        col.prop(cscene, "diffuse_bounces", text="Diffuse")
        col.prop(cscene, "glossy_bounces", text="Glossy")
        col.prop(cscene, "transmission_bounces", text="Transmission")
-        col.prop(cscene, "volume_bounces", text="Volume")
+        col.prop(cscene, "volume_bounces", text="Volume", text_ctxt=i18n_contexts.id_id)

        col = layout.column(align=True)
        col.prop(cscene, "transparent_max_bounces", text="Transparent")
@@ -980,7 +981,7 @@ class CYCLES_RENDER_PT_passes_light(CyclesButtonsPanel, Panel):
        col.prop(view_layer, "use_pass_transmission_indirect", text="Indirect")
        col.prop(view_layer, "use_pass_transmission_color", text="Color")

-        col = layout.column(heading="Volume", align=True)
+        col = layout.column(heading="Volume", heading_ctxt=i18n_contexts.id_id, align=True)
        col.prop(cycles_view_layer, "use_pass_volume_direct", text="Direct")
        col.prop(cycles_view_layer, "use_pass_volume_indirect", text="Indirect")

@@ -1577,6 +1578,7 @@ class CYCLES_WORLD_PT_surface(CyclesButtonsPanel, Panel):

 class CYCLES_WORLD_PT_volume(CyclesButtonsPanel, Panel):
    bl_label = "Volume"
+    bl_translation_context = i18n_contexts.id_id
    bl_context = "world"
    bl_options = {'DEFAULT_CLOSED'}

@@ -1696,6 +1698,7 @@ class CYCLES_WORLD_PT_settings_surface(CyclesButtonsPanel, Panel):

 class CYCLES_WORLD_PT_settings_volume(CyclesButtonsPanel, Panel):
    bl_label = "Volume"
+    bl_translation_context = i18n_contexts.id_id
    bl_parent_id = "CYCLES_WORLD_PT_settings"
    bl_context = "world"

@@ -1791,6 +1794,7 @@ class CYCLES_MATERIAL_PT_surface(CyclesButtonsPanel, Panel):

 class CYCLES_MATERIAL_PT_volume(CyclesButtonsPanel, Panel):
    bl_label = "Volume"
+    bl_translation_context = i18n_contexts.id_id
    bl_context = "material"
    bl_options = {'DEFAULT_CLOSED'}

@@ -1874,6 +1878,7 @@ class CYCLES_MATERIAL_PT_settings_surface(CyclesButtonsPanel, Panel):

 class CYCLES_MATERIAL_PT_settings_volume(CyclesButtonsPanel, Panel):
    bl_label = "Volume"
+    bl_translation_context = i18n_contexts.id_id
    bl_parent_id = "CYCLES_MATERIAL_PT_settings"
    bl_context = "material"

--- a/intern/cycles/blender/curves.cpp
+++ b/intern/cycles/blender/curves.cpp
@@ -609,59 +609,60 @@ void BlenderSync::sync_particle_hair(
  }
 }

-static std::optional<BL::FloatAttribute> find_curves_radius_attribute(BL::Curves b_curves)
+static const float *find_radius_attribute(BL::Curves b_curves)
 {
  for (BL::Attribute &b_attribute : b_curves.attributes) {
    if (b_attribute.name() != "radius") {
      continue;
    }
-    if (b_attribute.domain() != BL::Attribute::domain_POINT) {
-      continue;
-    }
    if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT) {
      continue;
    }
-    return BL::FloatAttribute{b_attribute};
+    BL::FloatAttribute b_float_attribute{b_attribute};
+    if (b_float_attribute.data.length() == 0) {
+      return nullptr;
+    }
+    return static_cast<const float *>(b_float_attribute.data[0].ptr.data);
  }
-  return std::nullopt;
+  return nullptr;
 }

-static BL::FloatVectorAttribute find_curves_position_attribute(BL::Curves b_curves)
+static const float (*find_position_attribute(BL::Curves b_curves))[3]
 {
  for (BL::Attribute &b_attribute : b_curves.attributes) {
    if (b_attribute.name() != "position") {
      continue;
    }
-    if (b_attribute.domain() != BL::Attribute::domain_POINT) {
-      continue;
-    }
    if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT_VECTOR) {
      continue;
    }
-    return BL::FloatVectorAttribute{b_attribute};
+    BL::FloatVectorAttribute b_float3_attribute{b_attribute};
+    if (b_float3_attribute.data.length() == 0) {
+      return nullptr;
+    }
+    return static_cast<const float(*)[3]>(b_float3_attribute.data[0].ptr.data);
  }
  /* The position attribute must exist. */
  assert(false);
-  return BL::FloatVectorAttribute{b_curves.attributes[0]};
+  return nullptr;
 }

 template<typename TypeInCycles, typename GetValueAtIndex>
-static void fill_generic_attribute(BL::Curves &b_curves,
+static void fill_generic_attribute(const int num_curves,
+                                   const int num_points,
                                   TypeInCycles *data,
                                   const AttributeElement element,
                                   const GetValueAtIndex &get_value_at_index)
 {
  switch (element) {
    case ATTR_ELEMENT_CURVE_KEY: {
-      const int num_points = b_curves.points.length();
      for (int i = 0; i < num_points; i++) {
        data[i] = get_value_at_index(i);
      }
      break;
    }
    case ATTR_ELEMENT_CURVE: {
-      const int num_verts = b_curves.curves.length();
-      for (int i = 0; i < num_verts; i++) {
+      for (int i = 0; i < num_curves; i++) {
        data[i] = get_value_at_index(i);
      }
      break;
@@ -681,6 +682,7 @@ static void attr_create_motion(Hair *hair, BL::Attribute &b_attribute, const flo
  }

  BL::FloatVectorAttribute b_vector_attribute(b_attribute);
+  const float(*src)[3] = static_cast<const float(*)[3]>(b_vector_attribute.data[0].ptr.data);
  const int num_curve_keys = hair->get_curve_keys().size();

  /* Find or add attribute */
@@ -698,23 +700,24 @@ static void attr_create_motion(Hair *hair, BL::Attribute &b_attribute, const flo
    float3 *mP = attr_mP->data_float3() + step * num_curve_keys;

    for (int i = 0; i < num_curve_keys; i++) {
-      mP[i] = P[i] + get_float3(b_vector_attribute.data[i].vector()) * relative_time;
+      mP[i] = P[i] + make_float3(src[i][0], src[i][1], src[i][2]) * relative_time;
    }
  }
 }

 static void attr_create_uv(AttributeSet &attributes,
-                           BL::Curves &b_curves,
+                           const int num_curves,
+                           const int num_points,
                           BL::Attribute &b_attribute,
                           const ustring name)
 {
  BL::Float2Attribute b_float2_attribute{b_attribute};
+  const float(*src)[2] = static_cast<const float(*)[2]>(b_float2_attribute.data[0].ptr.data);
  Attribute *attr = attributes.add(ATTR_STD_UV, name);

  float2 *data = attr->data_float2();
-  fill_generic_attribute(b_curves, data, ATTR_ELEMENT_CURVE, [&](int i) {
-    BL::Array<float, 2> v = b_float2_attribute.data[i].vector();
-    return make_float2(v[0], v[1]);
+  fill_generic_attribute(num_curves, num_points, data, ATTR_ELEMENT_CURVE, [&](int i) {
+    return make_float2(src[i][0], src[i][1]);
  });
 }

@@ -724,6 +727,9 @@ static void attr_create_generic(Scene *scene,
                                const bool need_motion,
                                const float motion_scale)
 {
+  const int num_keys = b_curves.points.length();
+  const int num_curves = b_curves.curves.length();
+
  AttributeSet &attributes = hair->attributes;
  static const ustring u_velocity("velocity");
  const bool need_uv = hair->need_attribute(scene, ATTR_STD_UV);
@@ -743,7 +749,7 @@ static void attr_create_generic(Scene *scene,
    /* Weak, use first float2 attribute as standard UV. */
    if (need_uv && !have_uv && b_data_type == BL::Attribute::data_type_FLOAT2 &&
        b_domain == BL::Attribute::domain_CURVE) {
-      attr_create_uv(attributes, b_curves, b_attribute, name);
+      attr_create_uv(attributes, num_curves, num_keys, b_attribute, name);
      have_uv = true;
      continue;
    }
@@ -773,57 +779,80 @@ static void attr_create_generic(Scene *scene,
    switch (b_data_type) {
      case BL::Attribute::data_type_FLOAT: {
        BL::FloatAttribute b_float_attribute{b_attribute};
+        const float *src = static_cast<const float *>(b_float_attribute.data[0].ptr.data);
        Attribute *attr = attributes.add(name, TypeFloat, element);
        float *data = attr->data_float();
-        fill_generic_attribute(
-            b_curves, data, element, [&](int i) { return b_float_attribute.data[i].value(); });
+        fill_generic_attribute(num_curves, num_keys, data, element, [&](int i) { return src[i]; });
        break;
      }
      case BL::Attribute::data_type_BOOLEAN: {
        BL::BoolAttribute b_bool_attribute{b_attribute};
+        const bool *src = static_cast<const bool *>(b_bool_attribute.data[0].ptr.data);
        Attribute *attr = attributes.add(name, TypeFloat, element);
        float *data = attr->data_float();
-        fill_generic_attribute(b_curves, data, element, [&](int i) {
-          return (float)b_bool_attribute.data[i].value();
-        });
+        fill_generic_attribute(
+            num_curves, num_keys, data, element, [&](int i) { return float(src[i]); });
        break;
      }
      case BL::Attribute::data_type_INT: {
        BL::IntAttribute b_int_attribute{b_attribute};
+        const int *src = static_cast<const int *>(b_int_attribute.data[0].ptr.data);
        Attribute *attr = attributes.add(name, TypeFloat, element);
        float *data = attr->data_float();
-        fill_generic_attribute(b_curves, data, element, [&](int i) {
-          return (float)b_int_attribute.data[i].value();
+        fill_generic_attribute(
+            num_curves, num_keys, data, element, [&](int i) { return float(src[i]); });
+        break;
+      }
+      case BL::Attribute::data_type_INT32_2D: {
+        BL::Int2Attribute b_int2_attribute{b_attribute};
+        const int2 *src = static_cast<const int2 *>(b_int2_attribute.data[0].ptr.data);
+        Attribute *attr = attributes.add(name, TypeFloat2, element);
+        float2 *data = attr->data_float2();
+        fill_generic_attribute(num_curves, num_keys, data, element, [&](int i) {
+          return make_float2(float(src[i][0]), float(src[i][1]));
        });
        break;
      }
      case BL::Attribute::data_type_FLOAT_VECTOR: {
        BL::FloatVectorAttribute b_vector_attribute{b_attribute};
+        const float(*src)[3] = static_cast<const float(*)[3]>(b_vector_attribute.data[0].ptr.data);
        Attribute *attr = attributes.add(name, TypeVector, element);
        float3 *data = attr->data_float3();
-        fill_generic_attribute(b_curves, data, element, [&](int i) {
-          BL::Array<float, 3> v = b_vector_attribute.data[i].vector();
-          return make_float3(v[0], v[1], v[2]);
+        fill_generic_attribute(num_curves, num_keys, data, element, [&](int i) {
+          return make_float3(src[i][0], src[i][1], src[i][2]);
+        });
+        break;
+      }
+      case BL::Attribute::data_type_BYTE_COLOR: {
+        BL::ByteColorAttribute b_color_attribute{b_attribute};
+        const uchar(*src)[4] = static_cast<const uchar(*)[4]>(b_color_attribute.data[0].ptr.data);
+        Attribute *attr = attributes.add(name, TypeRGBA, element);
+        float4 *data = attr->data_float4();
+        fill_generic_attribute(num_curves, num_keys, data, element, [&](int i) {
+          return make_float4(color_srgb_to_linear(byte_to_float(src[i][0])),
+                             color_srgb_to_linear(byte_to_float(src[i][1])),
+                             color_srgb_to_linear(byte_to_float(src[i][2])),
+                             color_srgb_to_linear(byte_to_float(src[i][3])));
        });
        break;
      }
      case BL::Attribute::data_type_FLOAT_COLOR: {
        BL::FloatColorAttribute b_color_attribute{b_attribute};
+        const float(*src)[4] = static_cast<const float(*)[4]>(b_color_attribute.data[0].ptr.data);
        Attribute *attr = attributes.add(name, TypeRGBA, element);
        float4 *data = attr->data_float4();
-        fill_generic_attribute(b_curves, data, element, [&](int i) {
-          BL::Array<float, 4> v = b_color_attribute.data[i].color();
-          return make_float4(v[0], v[1], v[2], v[3]);
+        fill_generic_attribute(num_curves, num_keys, data, element, [&](int i) {
+          return make_float4(src[i][0], src[i][1], src[i][2], src[i][3]);
        });
        break;
      }
      case BL::Attribute::data_type_FLOAT2: {
        BL::Float2Attribute b_float2_attribute{b_attribute};
+        const float(*src)[2] = static_cast<const float(*)[2]>(b_float2_attribute.data[0].ptr.data);
        Attribute *attr = attributes.add(name, TypeFloat2, element);
        float2 *data = attr->data_float2();
-        fill_generic_attribute(b_curves, data, element, [&](int i) {
-          BL::Array<float, 2> v = b_float2_attribute.data[i].vector();
-          return make_float2(v[0], v[1]);
+        fill_generic_attribute(num_curves, num_keys, data, element, [&](int i) {
+          return make_float2(src[i][0], src[i][1]);
        });
        break;
      }
@@ -834,27 +863,28 @@ static void attr_create_generic(Scene *scene,
  }
 }

-static float4 hair_point_as_float4(BL::FloatVectorAttribute b_attr_position,
-                                   std::optional<BL::FloatAttribute> b_attr_radius,
-                                   const int index)
+static float4 curve_point_as_float4(const float (*b_attr_position)[3],
+                                    const float *b_attr_radius,
+                                    const int index)
 {
-  float4 mP = float3_to_float4(get_float3(b_attr_position.data[index].vector()));
-  mP.w = b_attr_radius ? b_attr_radius->data[index].value() : 0.005f;
+  float4 mP = make_float4(
+      b_attr_position[index][0], b_attr_position[index][1], b_attr_position[index][2], 0.0f);
+  mP.w = b_attr_radius ? b_attr_radius[index] : 0.005f;
  return mP;
 }

-static float4 interpolate_hair_points(BL::FloatVectorAttribute b_attr_position,
-                                      std::optional<BL::FloatAttribute> b_attr_radius,
-                                      const int first_point_index,
-                                      const int num_points,
-                                      const float step)
+static float4 interpolate_curve_points(const float (*b_attr_position)[3],
+                                       const float *b_attr_radius,
+                                       const int first_point_index,
+                                       const int num_points,
+                                       const float step)
 {
  const float curve_t = step * (num_points - 1);
  const int point_a = clamp((int)curve_t, 0, num_points - 1);
  const int point_b = min(point_a + 1, num_points - 1);
  const float t = curve_t - (float)point_a;
-  return lerp(hair_point_as_float4(b_attr_position, b_attr_radius, first_point_index + point_a),
-              hair_point_as_float4(b_attr_position, b_attr_radius, first_point_index + point_b),
+  return lerp(curve_point_as_float4(b_attr_position, b_attr_radius, first_point_index + point_a),
+              curve_point_as_float4(b_attr_position, b_attr_radius, first_point_index + point_b),
              t);
 }

@@ -864,8 +894,6 @@ static void export_hair_curves(Scene *scene,
                               const bool need_motion,
                               const float motion_scale)
 {
-  /* TODO: optimize so we can straight memcpy arrays from Blender? */
-
  const int num_keys = b_curves.points.length();
  const int num_curves = b_curves.curves.length();

@@ -879,7 +907,6 @@ static void export_hair_curves(Scene *scene,
  /* Add requested attributes. */
  float *attr_intercept = NULL;
  float *attr_length = NULL;
-  float *attr_random = NULL;

  if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT)) {
    attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT)->data_float();
@@ -888,28 +915,40 @@ static void export_hair_curves(Scene *scene,
    attr_length = hair->attributes.add(ATTR_STD_CURVE_LENGTH)->data_float();
  }
  if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM)) {
-    attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM)->data_float();
+    float *attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM)->data_float();
+    for (int i = 0; i < num_curves; i++) {
+      attr_random[i] = hash_uint2_to_float(i, 0);
+    }
  }

-  BL::FloatVectorAttribute b_attr_position = find_curves_position_attribute(b_curves);
-  std::optional<BL::FloatAttribute> b_attr_radius = find_curves_radius_attribute(b_curves);
+  const int *point_offsets = static_cast<const int *>(b_curves.curve_offset_data[0].ptr.data);
+  const float(*b_attr_position)[3] = find_position_attribute(b_curves);
+  const float *b_attr_radius = find_radius_attribute(b_curves);
+
+  std::copy(point_offsets, point_offsets + num_curves, curve_first_key);
+  std::fill(curve_shader, curve_shader + num_curves, 0);
+  if (b_attr_radius) {
+    std::copy(b_attr_radius, b_attr_radius + num_keys, curve_radius);
+  }
+  else {
+    std::fill(curve_radius, curve_radius + num_keys, 0.005f);
+  }

  /* Export curves and points. */
  for (int i = 0; i < num_curves; i++) {
-    const int first_point_index = b_curves.curve_offset_data[i].value();
-    const int num_points = b_curves.curve_offset_data[i + 1].value() - first_point_index;
+    const int first_point_index = point_offsets[i];
+    const int num_points = point_offsets[i + 1] - first_point_index;

    float3 prev_co = zero_float3();
    float length = 0.0f;

    /* Position and radius. */
    for (int j = 0; j < num_points; j++) {
-      const int point_offset = first_point_index + j;
-      const float3 co = get_float3(b_attr_position.data[point_offset].vector());
-      const float radius = b_attr_radius ? b_attr_radius->data[point_offset].value() : 0.005f;
+      const int point = first_point_index + j;
+      const float3 co = make_float3(
+          b_attr_position[point][0], b_attr_position[point][1], b_attr_position[point][2]);

-      curve_keys[point_offset] = co;
-      curve_radius[point_offset] = radius;
+      curve_keys[point] = co;

      if (attr_length || attr_intercept) {
        if (j > 0) {
@@ -918,7 +957,7 @@ static void export_hair_curves(Scene *scene,
        prev_co = co;

        if (attr_intercept) {
-          attr_intercept[point_offset] = length;
+          attr_intercept[point] = length;
        }
      }
    }
@@ -926,8 +965,8 @@ static void export_hair_curves(Scene *scene,
    /* Normalized 0..1 attribute along curve. */
    if (attr_intercept && length > 0.0f) {
      for (int j = 1; j < num_points; j++) {
-        const int point_offset = first_point_index + j;
-        attr_intercept[point_offset] /= length;
+        const int point = first_point_index + j;
+        attr_intercept[point] /= length;
      }
    }

@@ -935,15 +974,6 @@ static void export_hair_curves(Scene *scene,
    if (attr_length) {
      attr_length[i] = length;
    }
-
-    /* Random number per curve. */
-    if (attr_random != NULL) {
-      attr_random[i] = hash_uint2_to_float(i, 0);
-    }
-
-    /* Curve. */
-    curve_shader[i] = 0;
-    curve_first_key[i] = first_point_index;
  }

  attr_create_generic(scene, hair, b_curves, need_motion, motion_scale);
@@ -968,12 +998,13 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
  int num_motion_keys = 0;
  int curve_index = 0;

-  BL::FloatVectorAttribute b_attr_position = find_curves_position_attribute(b_curves);
-  std::optional<BL::FloatAttribute> b_attr_radius = find_curves_radius_attribute(b_curves);
+  const int *point_offsets = static_cast<const int *>(b_curves.curve_offset_data[0].ptr.data);
+  const float(*b_attr_position)[3] = find_position_attribute(b_curves);
+  const float *b_attr_radius = find_radius_attribute(b_curves);

  for (int i = 0; i < num_curves; i++) {
-    const int first_point_index = b_curves.curve_offset_data[i].value();
-    const int num_points = b_curves.curve_offset_data[i + 1].value() - first_point_index;
+    const int first_point_index = point_offsets[i];
+    const int num_points = point_offsets[i + 1] - first_point_index;

    Hair::Curve curve = hair->get_curve(curve_index);
    curve_index++;
@@ -981,10 +1012,10 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
    if (num_points == curve.num_keys) {
      /* Number of keys matches. */
      for (int i = 0; i < num_points; i++) {
-        int point_index = first_point_index + i;
+        int point = first_point_index + i;

-        if (point_index < num_keys) {
-          mP[num_motion_keys] = hair_point_as_float4(b_attr_position, b_attr_radius, point_index);
+        if (point < num_keys) {
+          mP[num_motion_keys] = curve_point_as_float4(b_attr_position, b_attr_radius, point);
          num_motion_keys++;

          if (!have_motion) {
@@ -1003,7 +1034,7 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
      const float step_size = curve.num_keys > 1 ? 1.0f / (curve.num_keys - 1) : 0.0f;
      for (int i = 0; i < curve.num_keys; i++) {
        const float step = i * step_size;
-        mP[num_motion_keys] = interpolate_hair_points(
+        mP[num_motion_keys] = interpolate_curve_points(
            b_attr_position, b_attr_radius, first_point_index, num_points, step);
        num_motion_keys++;
      }
--- a/intern/cycles/blender/device.cpp
+++ b/intern/cycles/blender/device.cpp
@@ -112,9 +112,26 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences,
    device.has_peer_memory = false;
  }

-  if (get_boolean(cpreferences, "use_metalrt")) {
-    device.use_metalrt = true;
+  bool accumulated_use_hardware_raytracing = false;
+  foreach (
+      DeviceInfo &info,
+      (device.multi_devices.size() != 0 ? device.multi_devices : vector<DeviceInfo>({device}))) {
+    if (info.type == DEVICE_METAL && !get_boolean(cpreferences, "use_metalrt")) {
+      info.use_hardware_raytracing = false;
+    }
+
+    if (info.type == DEVICE_ONEAPI && !get_boolean(cpreferences, "use_oneapirt")) {
+      info.use_hardware_raytracing = false;
+    }
+
+    /* There is an accumulative logic here, because Multi-devices are support only for
+     * the same backend + CPU in Blender right now, and both oneAPI and Metal have a
+     * global boolean backend setting (see above) for enabling/disabling HW RT,
+     * so all sub-devices in the multi-device should enable (or disable) HW RT
+     * simultaneously (and CPU device are expected to ignore `use_hardware_raytracing` setting). */
+    accumulated_use_hardware_raytracing |= info.use_hardware_raytracing;
  }
+  device.use_hardware_raytracing = accumulated_use_hardware_raytracing;

  if (preview) {
    /* Disable specialization for preview renders. */
--- a/intern/cycles/blender/mesh.cpp
+++ b/intern/cycles/blender/mesh.cpp
@@ -280,7 +280,7 @@ static void fill_generic_attribute(BL::Mesh &b_mesh,
        assert(0);
      }
      else {
-        const MEdge *edges = static_cast<const MEdge *>(b_mesh.edges[0].ptr.data);
+        const int2 *edges = static_cast<const int2 *>(b_mesh.edges[0].ptr.data);
        const size_t verts_num = b_mesh.vertices.length();
        vector<int> count(verts_num, 0);

@@ -288,11 +288,11 @@ static void fill_generic_attribute(BL::Mesh &b_mesh,
        for (int i = 0; i < edges_num; i++) {
          TypeInCycles value = get_value_at_index(i);

-          const MEdge &b_edge = edges[i];
-          data[b_edge.v1] += value;
-          data[b_edge.v2] += value;
-          count[b_edge.v1]++;
-          count[b_edge.v2]++;
+          const int2 &b_edge = edges[i];
+          data[b_edge[0]] += value;
+          data[b_edge[1]] += value;
+          count[b_edge[0]]++;
+          count[b_edge[1]]++;
        }

        for (size_t i = 0; i < verts_num; i++) {
@@ -528,6 +528,19 @@ static void attr_create_generic(Scene *scene,
        });
        break;
      }
+      case BL::Attribute::data_type_INT32_2D: {
+        BL::Int2Attribute b_int2_attribute{b_attribute};
+        if (b_int2_attribute.data.length() == 0) {
+          continue;
+        }
+        const int2 *src = static_cast<const int2 *>(b_int2_attribute.data[0].ptr.data);
+        Attribute *attr = attributes.add(name, TypeFloat2, element);
+        float2 *data = attr->data_float2();
+        fill_generic_attribute(b_mesh, data, b_domain, subdivision, [&](int i) {
+          return make_float2(float(src[i][0]), float(src[i][1]));
+        });
+        break;
+      }
      default:
        /* Not supported. */
        break;
@@ -783,13 +796,13 @@ static void attr_create_pointiness(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, b
  EdgeMap visited_edges;
  memset(&counter[0], 0, sizeof(int) * counter.size());

-  const MEdge *edges = static_cast<MEdge *>(b_mesh.edges[0].ptr.data);
+  const int2 *edges = static_cast<int2 *>(b_mesh.edges[0].ptr.data);
  const int edges_num = b_mesh.edges.length();

  for (int i = 0; i < edges_num; i++) {
-    const MEdge &b_edge = edges[i];
-    const int v0 = vert_orig_index[b_edge.v1];
-    const int v1 = vert_orig_index[b_edge.v2];
+    const int2 &b_edge = edges[i];
+    const int v0 = vert_orig_index[b_edge[0]];
+    const int v1 = vert_orig_index[b_edge[1]];
    if (visited_edges.exists(v0, v1)) {
      continue;
    }
@@ -825,9 +838,9 @@ static void attr_create_pointiness(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, b
  memset(&counter[0], 0, sizeof(int) * counter.size());
  visited_edges.clear();
  for (int i = 0; i < edges_num; i++) {
-    const MEdge &b_edge = edges[i];
-    const int v0 = vert_orig_index[b_edge.v1];
-    const int v1 = vert_orig_index[b_edge.v2];
+    const int2 &b_edge = edges[i];
+    const int v0 = vert_orig_index[b_edge[0]];
+    const int v1 = vert_orig_index[b_edge[1]];
    if (visited_edges.exists(v0, v1)) {
      continue;
    }
@@ -894,12 +907,12 @@ static void attr_create_random_per_island(Scene *scene,

  DisjointSet vertices_sets(number_of_vertices);

-  const MEdge *edges = static_cast<MEdge *>(b_mesh.edges[0].ptr.data);
+  const int2 *edges = static_cast<int2 *>(b_mesh.edges[0].ptr.data);
  const int edges_num = b_mesh.edges.length();
  const int *corner_verts = find_corner_vert_attribute(b_mesh);

  for (int i = 0; i < edges_num; i++) {
-    vertices_sets.join(edges[i].v1, edges[i].v2);
+    vertices_sets.join(edges[i][0], edges[i][1]);
  }

  AttributeSet &attributes = (subdivision) ? mesh->subd_attributes : mesh->attributes;
@@ -1221,12 +1234,12 @@ static void create_subd_mesh(Scene *scene,

    mesh->reserve_subd_creases(num_creases);

-    const MEdge *edges = static_cast<MEdge *>(b_mesh.edges[0].ptr.data);
+    const int2 *edges = static_cast<int2 *>(b_mesh.edges[0].ptr.data);
    for (int i = 0; i < edges_num; i++) {
      const float crease = creases[i];
      if (crease != 0.0f) {
-        const MEdge &b_edge = edges[i];
-        mesh->add_edge_crease(b_edge.v1, b_edge.v2, crease);
+        const int2 &b_edge = edges[i];
+        mesh->add_edge_crease(b_edge[0], b_edge[1], crease);
      }
    }
  }
--- a/intern/cycles/blender/pointcloud.cpp
+++ b/intern/cycles/blender/pointcloud.cpp
@@ -10,22 +10,12 @@
 #include "blender/sync.h"
 #include "blender/util.h"

+#include "util/color.h"
 #include "util/foreach.h"
 #include "util/hash.h"

 CCL_NAMESPACE_BEGIN

-template<typename TypeInCycles, typename GetValueAtIndex>
-static void fill_generic_attribute(BL::PointCloud &b_pointcloud,
-                                   TypeInCycles *data,
-                                   const GetValueAtIndex &get_value_at_index)
-{
-  const int num_points = b_pointcloud.points.length();
-  for (int i = 0; i < num_points; i++) {
-    data[i] = get_value_at_index(i);
-  }
-}
-
 static void attr_create_motion(PointCloud *pointcloud,
                               BL::Attribute &b_attribute,
                               const float motion_scale)
@@ -63,6 +53,11 @@ static void copy_attributes(PointCloud *pointcloud,
                            const bool need_motion,
                            const float motion_scale)
 {
+  const int num_points = b_pointcloud.points.length();
+  if (num_points == 0) {
+    return;
+  }
+
  AttributeSet &attributes = pointcloud->attributes;
  static const ustring u_velocity("velocity");
  for (BL::Attribute &b_attribute : b_pointcloud.attributes) {
@@ -81,56 +76,83 @@ static void copy_attributes(PointCloud *pointcloud,
    switch (b_data_type) {
      case BL::Attribute::data_type_FLOAT: {
        BL::FloatAttribute b_float_attribute{b_attribute};
+        const float *src = static_cast<const float *>(b_float_attribute.data[0].ptr.data);
        Attribute *attr = attributes.add(name, TypeFloat, element);
        float *data = attr->data_float();
-        fill_generic_attribute(
-            b_pointcloud, data, [&](int i) { return b_float_attribute.data[i].value(); });
+        std::copy(src, src + num_points, data);
        break;
      }
      case BL::Attribute::data_type_BOOLEAN: {
        BL::BoolAttribute b_bool_attribute{b_attribute};
+        const bool *src = static_cast<const bool *>(b_bool_attribute.data[0].ptr.data);
        Attribute *attr = attributes.add(name, TypeFloat, element);
        float *data = attr->data_float();
-        fill_generic_attribute(
-            b_pointcloud, data, [&](int i) { return (float)b_bool_attribute.data[i].value(); });
+        for (int i = 0; i < num_points; i++) {
+          data[i] = float(src[i]);
+        }
        break;
      }
      case BL::Attribute::data_type_INT: {
        BL::IntAttribute b_int_attribute{b_attribute};
+        const int *src = static_cast<const int *>(b_int_attribute.data[0].ptr.data);
        Attribute *attr = attributes.add(name, TypeFloat, element);
        float *data = attr->data_float();
-        fill_generic_attribute(
-            b_pointcloud, data, [&](int i) { return (float)b_int_attribute.data[i].value(); });
+        for (int i = 0; i < num_points; i++) {
+          data[i] = float(src[i]);
+        }
+        break;
+      }
+      case BL::Attribute::data_type_INT32_2D: {
+        BL::Int2Attribute b_int2_attribute{b_attribute};
+        const int2 *src = static_cast<const int2 *>(b_int2_attribute.data[0].ptr.data);
+        Attribute *attr = attributes.add(name, TypeFloat2, element);
+        float2 *data = attr->data_float2();
+        for (int i = 0; i < num_points; i++) {
+          data[i] = make_float2(float(src[i][0]), float(src[i][1]));
+        }
        break;
      }
      case BL::Attribute::data_type_FLOAT_VECTOR: {
        BL::FloatVectorAttribute b_vector_attribute{b_attribute};
+        const float(*src)[3] = static_cast<const float(*)[3]>(b_vector_attribute.data[0].ptr.data);
        Attribute *attr = attributes.add(name, TypeVector, element);
        float3 *data = attr->data_float3();
-        fill_generic_attribute(b_pointcloud, data, [&](int i) {
-          BL::Array<float, 3> v = b_vector_attribute.data[i].vector();
-          return make_float3(v[0], v[1], v[2]);
-        });
+        for (int i = 0; i < num_points; i++) {
+          data[i] = make_float3(src[i][0], src[i][1], src[i][2]);
+        }
+        break;
+      }
+      case BL::Attribute::data_type_BYTE_COLOR: {
+        BL::ByteColorAttribute b_color_attribute{b_attribute};
+        const uchar(*src)[4] = static_cast<const uchar(*)[4]>(b_color_attribute.data[0].ptr.data);
+        Attribute *attr = attributes.add(name, TypeRGBA, element);
+        float4 *data = attr->data_float4();
+        for (int i = 0; i < num_points; i++) {
+          data[i] = make_float4(color_srgb_to_linear(byte_to_float(src[i][0])),
+                                color_srgb_to_linear(byte_to_float(src[i][1])),
+                                color_srgb_to_linear(byte_to_float(src[i][2])),
+                                color_srgb_to_linear(byte_to_float(src[i][3])));
+        }
        break;
      }
      case BL::Attribute::data_type_FLOAT_COLOR: {
        BL::FloatColorAttribute b_color_attribute{b_attribute};
+        const float(*src)[4] = static_cast<const float(*)[4]>(b_color_attribute.data[0].ptr.data);
        Attribute *attr = attributes.add(name, TypeRGBA, element);
        float4 *data = attr->data_float4();
-        fill_generic_attribute(b_pointcloud, data, [&](int i) {
-          BL::Array<float, 4> v = b_color_attribute.data[i].color();
-          return make_float4(v[0], v[1], v[2], v[3]);
-        });
+        for (int i = 0; i < num_points; i++) {
+          data[i] = make_float4(src[i][0], src[i][1], src[i][2], src[i][3]);
+        }
        break;
      }
      case BL::Attribute::data_type_FLOAT2: {
        BL::Float2Attribute b_float2_attribute{b_attribute};
+        const float(*src)[2] = static_cast<const float(*)[2]>(b_float2_attribute.data[0].ptr.data);
        Attribute *attr = attributes.add(name, TypeFloat2, element);
        float2 *data = attr->data_float2();
-        fill_generic_attribute(b_pointcloud, data, [&](int i) {
-          BL::Array<float, 2> v = b_float2_attribute.data[i].vector();
-          return make_float2(v[0], v[1]);
-        });
+        for (int i = 0; i < num_points; i++) {
+          data[i] = make_float2(src[i][0], src[i][1]);
+        }
        break;
      }
      default:
@@ -140,7 +162,7 @@ static void copy_attributes(PointCloud *pointcloud,
  }
 }

-static std::optional<BL::FloatAttribute> find_radius_attribute(BL::PointCloud b_pointcloud)
+static const float *find_radius_attribute(BL::PointCloud b_pointcloud)
 {
  for (BL::Attribute &b_attribute : b_pointcloud.attributes) {
    if (b_attribute.name() != "radius") {
@@ -149,12 +171,16 @@ static std::optional<BL::FloatAttribute> find_radius_attribute(BL::PointCloud b_
    if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT) {
      continue;
    }
-    return BL::FloatAttribute{b_attribute};
+    BL::FloatAttribute b_float_attribute{b_attribute};
+    if (b_float_attribute.data.length() == 0) {
+      return nullptr;
+    }
+    return static_cast<const float *>(b_float_attribute.data[0].ptr.data);
  }
-  return std::nullopt;
+  return nullptr;
 }

-static BL::FloatVectorAttribute find_position_attribute(BL::PointCloud b_pointcloud)
+static const float (*find_position_attribute(BL::PointCloud b_pointcloud))[3]
 {
  for (BL::Attribute &b_attribute : b_pointcloud.attributes) {
    if (b_attribute.name() != "position") {
@@ -163,11 +189,15 @@ static BL::FloatVectorAttribute find_position_attribute(BL::PointCloud b_pointcl
    if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT_VECTOR) {
      continue;
    }
-    return BL::FloatVectorAttribute{b_attribute};
+    BL::FloatVectorAttribute b_float3_attribute{b_attribute};
+    if (b_float3_attribute.data.length() == 0) {
+      return nullptr;
+    }
+    return static_cast<const float(*)[3]>(b_float3_attribute.data[0].ptr.data);
  }
  /* The position attribute must exist. */
  assert(false);
-  return BL::FloatVectorAttribute{b_pointcloud.attributes[0]};
+  return nullptr;
 }

 static void export_pointcloud(Scene *scene,
@@ -176,34 +206,36 @@ static void export_pointcloud(Scene *scene,
                              const bool need_motion,
                              const float motion_scale)
 {
-  /* TODO: optimize so we can straight memcpy arrays from Blender? */
+  const int num_points = b_pointcloud.points.length();
+  pointcloud->resize(num_points);

-  /* Add requested attributes. */
-  Attribute *attr_random = NULL;
-  if (pointcloud->need_attribute(scene, ATTR_STD_POINT_RANDOM)) {
-    attr_random = pointcloud->attributes.add(ATTR_STD_POINT_RANDOM);
+  const float(*b_attr_position)[3] = find_position_attribute(b_pointcloud);
+  float3 *points = pointcloud->get_points().data();
+
+  for (int i = 0; i < num_points; i++) {
+    points[i] = make_float3(b_attr_position[i][0], b_attr_position[i][1], b_attr_position[i][2]);
  }

-  /* Reserve memory. */
-  const int num_points = b_pointcloud.points.length();
-  pointcloud->reserve(num_points);
+  const float *b_attr_radius = find_radius_attribute(b_pointcloud);
+  float *radius = pointcloud->get_radius().data();
+  if (b_attr_radius) {
+    std::copy(b_attr_radius, b_attr_radius + num_points, radius);
+  }
+  else {
+    std::fill(radius, radius + num_points, 0.01f);
+  }

-  BL::FloatVectorAttribute b_attr_position = find_position_attribute(b_pointcloud);
-  std::optional<BL::FloatAttribute> b_attr_radius = find_radius_attribute(b_pointcloud);
+  int *shader = pointcloud->get_shader().data();
+  std::fill(shader, shader + num_points, 0);

-  /* Export points. */
-  for (int i = 0; i < num_points; i++) {
-    const float3 co = get_float3(b_attr_position.data[i].vector());
-    const float radius = b_attr_radius ? b_attr_radius->data[i].value() : 0.01f;
-    pointcloud->add_point(co, radius);
-
-    /* Random number per point. */
-    if (attr_random != NULL) {
-      attr_random->add(hash_uint2_to_float(i, 0));
+  if (pointcloud->need_attribute(scene, ATTR_STD_POINT_RANDOM)) {
+    Attribute *attr_random = pointcloud->attributes.add(ATTR_STD_POINT_RANDOM);
+    float *data = attr_random->data_float();
+    for (int i = 0; i < num_points; i++) {
+      data[i] = hash_uint2_to_float(i, 0);
    }
  }

-  /* Export attributes */
  copy_attributes(pointcloud, b_pointcloud, need_motion, motion_scale);
 }

@@ -220,22 +252,22 @@ static void export_pointcloud_motion(PointCloud *pointcloud,
    new_attribute = true;
  }

-  /* Export motion points. */
  const int num_points = pointcloud->num_points();
-  // Point cloud attributes are stored as float4 with the radius
-  // in the w element. This is explict now as float3 is no longer
-  // interchangeable with float4 as it is packed now.
+  /* Point cloud attributes are stored as float4 with the radius in the w element.
+   * This is explicit now as float3 is no longer interchangeable with float4 as it
+   * is packed now. */
  float4 *mP = attr_mP->data_float4() + motion_step * num_points;
  bool have_motion = false;
  const array<float3> &pointcloud_points = pointcloud->get_points();

  const int b_points_num = b_pointcloud.points.length();
-  BL::FloatVectorAttribute b_attr_position = find_position_attribute(b_pointcloud);
-  std::optional<BL::FloatAttribute> b_attr_radius = find_radius_attribute(b_pointcloud);
+  const float(*b_attr_position)[3] = find_position_attribute(b_pointcloud);
+  const float *b_attr_radius = find_radius_attribute(b_pointcloud);

  for (int i = 0; i < std::min(num_points, b_points_num); i++) {
-    const float3 P = get_float3(b_attr_position.data[i].vector());
-    const float radius = b_attr_radius ? b_attr_radius->data[i].value() : 0.01f;
+    const float3 P = make_float3(
+        b_attr_position[i][0], b_attr_position[i][1], b_attr_position[i][2]);
+    const float radius = b_attr_radius ? b_attr_radius[i] : 0.01f;
    mP[i] = make_float4(P.x, P.y, P.z, radius);
    have_motion = have_motion || (P != pointcloud_points[i]);
  }
@@ -277,7 +309,7 @@ void BlenderSync::sync_pointcloud(PointCloud *pointcloud, BObjectInfo &b_ob_info
                                 0.0f;
  export_pointcloud(scene, &new_pointcloud, b_pointcloud, need_motion, motion_scale);

-  /* update original sockets */
+  /* Update original sockets. */
  for (const SocketType &socket : new_pointcloud.type->inputs) {
    /* Those sockets are updated in sync_object, so do not modify them. */
    if (socket.name == "use_motion_blur" || socket.name == "motion_steps" ||
@@ -292,7 +324,7 @@ void BlenderSync::sync_pointcloud(PointCloud *pointcloud, BObjectInfo &b_ob_info
    pointcloud->attributes.attributes.push_back(std::move(attr));
  }

-  /* tag update */
+  /* Tag update. */
  const bool rebuild = (pointcloud && old_numpoints != pointcloud->num_points());
  pointcloud->tag_update(scene, rebuild);
 }
--- a/intern/cycles/blender/python.cpp
+++ b/intern/cycles/blender/python.cpp
@@ -1034,6 +1034,14 @@ void *CCL_python_module_init()
  Py_INCREF(Py_False);
 #endif /* WITH_EMBREE */

+#ifdef WITH_EMBREE_GPU
+  PyModule_AddObject(mod, "with_embree_gpu", Py_True);
+  Py_INCREF(Py_True);
+#else  /* WITH_EMBREE_GPU */
+  PyModule_AddObject(mod, "with_embree_gpu", Py_False);
+  Py_INCREF(Py_False);
+#endif /* WITH_EMBREE_GPU */
+
  if (ccl::openimagedenoise_supported()) {
    PyModule_AddObject(mod, "with_openimagedenoise", Py_True);
    Py_INCREF(Py_True);
--- a/intern/cycles/blender/session.cpp
+++ b/intern/cycles/blender/session.cpp
@@ -1061,7 +1061,7 @@ void BlenderSession::ensure_display_driver_if_needed()
  unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(
      b_engine, b_scene, background);
  display_driver_ = display_driver.get();
-  session->set_display_driver(move(display_driver));
+  session->set_display_driver(std::move(display_driver));
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/blender/shader.cpp
+++ b/intern/cycles/blender/shader.cpp
@@ -981,22 +981,8 @@ static ShaderNode *add_node(Scene *scene,
    sky->set_sun_disc(b_sky_node.sun_disc());
    sky->set_sun_size(b_sky_node.sun_size());
    sky->set_sun_intensity(b_sky_node.sun_intensity());
-    /* Patch sun position to be able to animate daylight cycle while keeping the shading code
-     * simple. */
-    float sun_rotation = b_sky_node.sun_rotation();
-    /* Wrap into [-2PI..2PI] range. */
-    float sun_elevation = fmodf(b_sky_node.sun_elevation(), M_2PI_F);
-    /* Wrap into [-PI..PI] range. */
-    if (fabsf(sun_elevation) >= M_PI_F) {
-      sun_elevation -= copysignf(2.0f, sun_elevation) * M_PI_F;
-    }
-    /* Wrap into [-PI/2..PI/2] range while keeping the same absolute position. */
-    if (sun_elevation >= M_PI_2_F || sun_elevation <= -M_PI_2_F) {
-      sun_elevation = copysignf(M_PI_F, sun_elevation) - sun_elevation;
-      sun_rotation += M_PI_F;
-    }
-    sky->set_sun_elevation(sun_elevation);
-    sky->set_sun_rotation(sun_rotation);
+    sky->set_sun_elevation(b_sky_node.sun_elevation());
+    sky->set_sun_rotation(b_sky_node.sun_rotation());
    sky->set_altitude(b_sky_node.altitude());
    sky->set_air_density(b_sky_node.air_density());
    sky->set_dust_density(b_sky_node.dust_density());
--- a/intern/cycles/blender/sync.cpp
+++ b/intern/cycles/blender/sync.cpp
@@ -634,6 +634,10 @@ static bool get_known_pass_type(BL::RenderPass &b_pass, PassType &type, PassMode
  MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER, false);
  MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT, false);

+  MAP_PASS("Guiding Color", PASS_GUIDING_COLOR, false);
+  MAP_PASS("Guiding Probability", PASS_GUIDING_PROBABILITY, false);
+  MAP_PASS("Guiding Average Roughness", PASS_GUIDING_AVG_ROUGHNESS, false);
+
  if (string_startswith(name, cryptomatte_prefix)) {
    type = PASS_CRYPTOMATTE;
    mode = PassMode::DENOISED;
@@ -684,18 +688,6 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v
  }
  scene->film->set_cryptomatte_passes(cryptomatte_passes);

-  /* Path guiding debug passes. */
-#ifdef WITH_CYCLES_DEBUG
-  b_engine.add_pass("Guiding Color", 3, "RGB", b_view_layer.name().c_str());
-  pass_add(scene, PASS_GUIDING_COLOR, "Guiding Color", PassMode::NOISY);
-
-  b_engine.add_pass("Guiding Probability", 1, "X", b_view_layer.name().c_str());
-  pass_add(scene, PASS_GUIDING_PROBABILITY, "Guiding Probability", PassMode::NOISY);
-
-  b_engine.add_pass("Guiding Average Roughness", 1, "X", b_view_layer.name().c_str());
-  pass_add(scene, PASS_GUIDING_AVG_ROUGHNESS, "Guiding Average Roughness", PassMode::NOISY);
-#endif
-
  unordered_set<string> expected_passes;

  /* Custom AOV passes. */
--- a/intern/cycles/bvh/build.cpp
+++ b/intern/cycles/bvh/build.cpp
@@ -527,7 +527,7 @@ BVHNode *BVHBuild::run()
    if (progress.get_cancel()) {
      rootnode->deleteSubtree();
      rootnode = NULL;
-      VLOG_WORK << "BVH build cancelled.";
+      VLOG_WORK << "BVH build canceled.";
    }
    else {
      /*rotate(rootnode, 4, 5);*/
@@ -1167,8 +1167,8 @@ BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector<BVHRefer

 void BVHBuild::rotate(BVHNode *node, int max_depth, int iterations)
 {
-  /* in tested scenes, this resulted in slightly slower raytracing, so disabled
-   * it for now. could be implementation bug, or depend on the scene */
+  /* In tested scenes, this resulted in slightly slower ray-tracing, so disabled
+   * it for now. could be implementation bug, or depend on the scene. */
  if (node)
    for (int i = 0; i < iterations; i++)
      rotate(node, max_depth);
--- a/intern/cycles/bvh/bvh2.cpp
+++ b/intern/cycles/bvh/bvh2.cpp
@@ -606,7 +606,7 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
      int4 *bvh_nodes = &bvh->pack.nodes[0];
      size_t bvh_nodes_size = bvh->pack.nodes.size();

-      for (size_t i = 0, j = 0; i < bvh_nodes_size; j++) {
+      for (size_t i = 0; i < bvh_nodes_size;) {
        size_t nsize, nsize_bbox;
        if (bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) {
          nsize = BVH_UNALIGNED_NODE_SIZE;
--- a/intern/cycles/bvh/embree.cpp
+++ b/intern/cycles/bvh/embree.cpp
@@ -111,9 +111,13 @@ BVHEmbree::~BVHEmbree()
  }
 }

-void BVHEmbree::build(Progress &progress, Stats *stats, RTCDevice rtc_device_)
+void BVHEmbree::build(Progress &progress,
+                      Stats *stats,
+                      RTCDevice rtc_device_,
+                      const bool rtc_device_is_sycl_)
 {
  rtc_device = rtc_device_;
+  rtc_device_is_sycl = rtc_device_is_sycl_;
  assert(rtc_device);

  rtcSetDeviceErrorFunction(rtc_device, rtc_error_func, NULL);
@@ -266,15 +270,29 @@ void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
  rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);

  const int *triangles = mesh->get_triangles().data();
-  rtcSetSharedGeometryBuffer(geom_id,
-                             RTC_BUFFER_TYPE_INDEX,
-                             0,
-                             RTC_FORMAT_UINT3,
-                             triangles,
-                             0,
-                             sizeof(int) * 3,
-                             num_triangles);
-
+  if (!rtc_device_is_sycl) {
+    rtcSetSharedGeometryBuffer(geom_id,
+                               RTC_BUFFER_TYPE_INDEX,
+                               0,
+                               RTC_FORMAT_UINT3,
+                               triangles,
+                               0,
+                               sizeof(int) * 3,
+                               num_triangles);
+  }
+  else {
+    /* NOTE(sirgienko): If the Embree device is a SYCL device, then Embree execution will
+     * happen on GPU, and we cannot use standard host pointers at this point. So instead
+     * of making a shared geometry buffer - a new Embree buffer will be created and data
+     * will be copied. */
+    int *triangles_buffer = (int *)rtcSetNewGeometryBuffer(
+        geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(int) * 3, num_triangles);
+    assert(triangles_buffer);
+    if (triangles_buffer) {
+      static_assert(sizeof(int) == sizeof(uint));
+      std::memcpy(triangles_buffer, triangles, sizeof(int) * 3 * (num_triangles));
+    }
+  }
  set_tri_vertex_buffer(geom_id, mesh, false);

  rtcSetGeometryUserData(geom_id, (void *)prim_offset);
@@ -323,14 +341,38 @@ void BVHEmbree::set_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh, con
      rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
    }
    else {
-      rtcSetSharedGeometryBuffer(geom_id,
-                                 RTC_BUFFER_TYPE_VERTEX,
-                                 t,
-                                 RTC_FORMAT_FLOAT3,
-                                 verts,
-                                 0,
-                                 sizeof(float3),
-                                 num_verts + 1);
+      if (!rtc_device_is_sycl) {
+        rtcSetSharedGeometryBuffer(geom_id,
+                                   RTC_BUFFER_TYPE_VERTEX,
+                                   t,
+                                   RTC_FORMAT_FLOAT3,
+                                   verts,
+                                   0,
+                                   sizeof(float3),
+                                   num_verts + 1);
+      }
+      else {
+        /* NOTE(sirgienko): If the Embree device is a SYCL device, then Embree execution will
+         * happen on GPU, and we cannot use standard host pointers at this point. So instead
+         * of making a shared geometry buffer - a new Embree buffer will be created and data
+         * will be copied. */
+        /* As float3 is packed on GPU side, we map it to packed_float3. */
+        packed_float3 *verts_buffer = (packed_float3 *)rtcSetNewGeometryBuffer(
+            geom_id,
+            RTC_BUFFER_TYPE_VERTEX,
+            t,
+            RTC_FORMAT_FLOAT3,
+            sizeof(packed_float3),
+            num_verts + 1);
+        assert(verts_buffer);
+        if (verts_buffer) {
+          for (size_t i = (size_t)0; i < num_verts + 1; ++i) {
+            verts_buffer[i].x = verts[i].x;
+            verts_buffer[i].y = verts[i].y;
+            verts_buffer[i].z = verts[i].z;
+          }
+        }
+      }
    }
  }
 }
--- a/intern/cycles/bvh/embree.h
+++ b/intern/cycles/bvh/embree.h
@@ -29,7 +29,10 @@ class PointCloud;

 class BVHEmbree : public BVH {
 public:
-  void build(Progress &progress, Stats *stats, RTCDevice rtc_device);
+  void build(Progress &progress,
+             Stats *stats,
+             RTCDevice rtc_device,
+             const bool isSyclEmbreeDevice = false);
  void refit(Progress &progress);

  RTCScene scene;
@@ -55,6 +58,7 @@ class BVHEmbree : public BVH {
                               const bool update);

  RTCDevice rtc_device;
+  bool rtc_device_is_sycl;
  enum RTCBuildQuality build_quality;
 };

--- a/intern/cycles/cmake/external_libs.cmake
+++ b/intern/cycles/cmake/external_libs.cmake
@@ -42,15 +42,19 @@ endif()
 ###########################################################################

 if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
-  set(WITH_CYCLES_HIP_BINARIES OFF)
-  message(STATUS "HIP temporarily disabled due to compiler bugs")
+  if(UNIX)
+    # Disabled until there is a HIP 5.5 release for Linux.
+    set(WITH_CYCLES_HIP_BINARIES OFF)
+    message(STATUS "HIP temporarily disabled due to compiler bugs")
+  else()
+    # Need at least HIP 5.5 to solve compiler bug affecting the kernel.
+    find_package(HIP 5.5.0)
+    set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)

-  # find_package(HIP)
-  # set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
-
-  # if(HIP_FOUND)
-  #   message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
-  # endif()
+    if(HIP_FOUND)
+      message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
+    endif()
+  endif()
 endif()

 if(NOT WITH_HIP_DYNLOAD)
--- a/intern/cycles/device/cpu/device_impl.cpp
+++ b/intern/cycles/device/cpu/device_impl.cpp
@@ -84,7 +84,7 @@ CPUDevice::~CPUDevice()
  texture_info.free();
 }

-BVHLayoutMask CPUDevice::get_bvh_layout_mask() const
+BVHLayoutMask CPUDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
 {
  BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
 #ifdef WITH_EMBREE
--- a/intern/cycles/device/cpu/device_impl.h
+++ b/intern/cycles/device/cpu/device_impl.h
@@ -56,7 +56,7 @@ class CPUDevice : public Device {
  CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
  ~CPUDevice();

-  virtual BVHLayoutMask get_bvh_layout_mask() const override;
+  virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;

  /* Returns true if the texture info was copied to the device (meaning, some more
   * re-initialization might be needed). */
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -35,7 +35,7 @@ bool CUDADevice::have_precompiled_kernels()
  return path_exists(cubins_path);
 }

-BVHLayoutMask CUDADevice::get_bvh_layout_mask() const
+BVHLayoutMask CUDADevice::get_bvh_layout_mask(uint /*kernel_features*/) const
 {
  return BVH_LAYOUT_BVH2;
 }
--- a/intern/cycles/device/cuda/device_impl.h
+++ b/intern/cycles/device/cuda/device_impl.h
@@ -38,7 +38,7 @@ class CUDADevice : public GPUDevice {

  static bool have_precompiled_kernels();

-  virtual BVHLayoutMask get_bvh_layout_mask() const override;
+  virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;

  void set_error(const string &error) override;

--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -354,7 +354,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
  info.has_guiding = true;
  info.has_profiling = true;
  info.has_peer_memory = false;
-  info.use_metalrt = false;
+  info.use_hardware_raytracing = false;
  info.denoisers = DENOISER_ALL;

  foreach (const DeviceInfo &device, subdevices) {
@@ -403,7 +403,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
    info.has_guiding &= device.has_guiding;
    info.has_profiling &= device.has_profiling;
    info.has_peer_memory |= device.has_peer_memory;
-    info.use_metalrt |= device.use_metalrt;
+    info.use_hardware_raytracing |= device.use_hardware_raytracing;
    info.denoisers &= device.denoisers;
  }

--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -71,15 +71,16 @@ class DeviceInfo {
  string description;
  string id; /* used for user preferences, should stay fixed with changing hardware config */
  int num;
-  bool display_device;  /* GPU is used as a display device. */
-  bool has_nanovdb;     /* Support NanoVDB volumes. */
-  bool has_light_tree;  /* Support light tree. */
-  bool has_osl;         /* Support Open Shading Language. */
-  bool has_guiding;     /* Support path guiding. */
-  bool has_profiling;   /* Supports runtime collection of profiling info. */
-  bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
-  bool has_gpu_queue;   /* Device supports GPU queue. */
-  bool use_metalrt;     /* Use MetalRT to accelerate ray queries (Metal only). */
+  bool display_device;          /* GPU is used as a display device. */
+  bool has_nanovdb;             /* Support NanoVDB volumes. */
+  bool has_light_tree;          /* Support light tree. */
+  bool has_osl;                 /* Support Open Shading Language. */
+  bool has_guiding;             /* Support path guiding. */
+  bool has_profiling;           /* Supports runtime collection of profiling info. */
+  bool has_peer_memory;         /* GPU has P2P access to memory of another GPU. */
+  bool has_gpu_queue;           /* Device supports GPU queue. */
+  bool use_hardware_raytracing; /* Use hardware ray tracing to accelerate ray queries in a backend.
+                                 */
  KernelOptimizationLevel kernel_optimization_level; /* Optimization level applied to path tracing
                                                      * kernels (Metal only). */
  DenoiserTypeMask denoisers;                        /* Supported denoiser types. */
@@ -101,7 +102,7 @@ class DeviceInfo {
    has_profiling = false;
    has_peer_memory = false;
    has_gpu_queue = false;
-    use_metalrt = false;
+    use_hardware_raytracing = false;
    denoisers = DENOISER_NONE;
  }

@@ -157,7 +158,7 @@ class Device {
    fprintf(stderr, "%s\n", error.c_str());
    fflush(stderr);
  }
-  virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
+  virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const = 0;

  /* statistics */
  Stats &stats;
--- a/intern/cycles/device/dummy/device.cpp
+++ b/intern/cycles/device/dummy/device.cpp
@@ -20,7 +20,7 @@ class DummyDevice : public Device {

  ~DummyDevice() {}

-  virtual BVHLayoutMask get_bvh_layout_mask() const override
+  virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override
  {
    return 0;
  }
--- a/intern/cycles/device/hip/device.cpp
+++ b/intern/cycles/device/hip/device.cpp
@@ -137,7 +137,7 @@ void device_hip_info(vector<DeviceInfo> &devices)
    info.num = num;

    info.has_nanovdb = true;
-    info.has_light_tree = false;
+    info.has_light_tree = true;
    info.denoisers = 0;

    info.has_gpu_queue = true;
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@@ -35,7 +35,7 @@ bool HIPDevice::have_precompiled_kernels()
  return path_exists(fatbins_path);
 }

-BVHLayoutMask HIPDevice::get_bvh_layout_mask() const
+BVHLayoutMask HIPDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
 {
  return BVH_LAYOUT_BVH2;
 }
--- a/intern/cycles/device/hip/device_impl.h
+++ b/intern/cycles/device/hip/device_impl.h
@@ -35,7 +35,7 @@ class HIPDevice : public GPUDevice {

  static bool have_precompiled_kernels();

-  virtual BVHLayoutMask get_bvh_layout_mask() const override;
+  virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;

  void set_error(const string &error) override;

--- a/intern/cycles/device/kernel.cpp
+++ b/intern/cycles/device/kernel.cpp
@@ -3,7 +3,9 @@

 #include "device/kernel.h"

-#include "util/log.h"
+#ifndef __KERNEL_ONEAPI__
+#  include "util/log.h"
+#endif

 CCL_NAMESPACE_BEGIN

@@ -153,10 +155,13 @@ const char *device_kernel_as_string(DeviceKernel kernel)
    case DEVICE_KERNEL_NUM:
      break;
  };
+#ifndef __KERNEL_ONEAPI__
  LOG(FATAL) << "Unhandled kernel " << static_cast<int>(kernel) << ", should never happen.";
+#endif
  return "UNKNOWN";
 }

+#ifndef __KERNEL_ONEAPI__
 std::ostream &operator<<(std::ostream &os, DeviceKernel kernel)
 {
  os << device_kernel_as_string(kernel);
@@ -178,5 +183,6 @@ string device_kernel_mask_as_string(DeviceKernelMask mask)

  return str;
 }
+#endif

 CCL_NAMESPACE_END
--- a/intern/cycles/device/kernel.h
+++ b/intern/cycles/device/kernel.h
@@ -3,11 +3,13 @@

 #pragma once

-#include "kernel/types.h"
+#ifndef __KERNEL_ONEAPI__
+#  include "kernel/types.h"

-#include "util/string.h"
+#  include "util/string.h"

-#include <ostream>  // NOLINT
+#  include <ostream>  // NOLINT
+#endif

 CCL_NAMESPACE_BEGIN

@@ -15,9 +17,12 @@ bool device_kernel_has_shading(DeviceKernel kernel);
 bool device_kernel_has_intersection(DeviceKernel kernel);

 const char *device_kernel_as_string(DeviceKernel kernel);
+
+#ifndef __KERNEL_ONEAPI__
 std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);

 typedef uint64_t DeviceKernelMask;
 string device_kernel_mask_as_string(DeviceKernelMask mask);
+#endif

 CCL_NAMESPACE_END
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -100,7 +100,7 @@ class MetalDevice : public Device {

  virtual void cancel() override;

-  virtual BVHLayoutMask get_bvh_layout_mask() const override;
+  virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;

  void set_error(const string &error) override;

--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -39,7 +39,7 @@ bool MetalDevice::is_device_cancelled(int ID)
  return get_device_by_ID(ID, lock) == nullptr;
 }

-BVHLayoutMask MetalDevice::get_bvh_layout_mask() const
+BVHLayoutMask MetalDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
 {
  return use_metalrt ? BVH_LAYOUT_METAL : BVH_LAYOUT_BVH2;
 }
@@ -100,12 +100,12 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
    }
    case METAL_GPU_AMD: {
      max_threads_per_threadgroup = 128;
-      use_metalrt = info.use_metalrt;
+      use_metalrt = info.use_hardware_raytracing;
      break;
    }
    case METAL_GPU_APPLE: {
      max_threads_per_threadgroup = 512;
-      use_metalrt = info.use_metalrt;
+      use_metalrt = info.use_hardware_raytracing;
      break;
    }
  }
--- a/intern/cycles/device/multi/device.cpp
+++ b/intern/cycles/device/multi/device.cpp
@@ -96,12 +96,13 @@ class MultiDevice : public Device {
    return error_msg;
  }

-  virtual BVHLayoutMask get_bvh_layout_mask() const override
+  virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override
  {
    BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
    BVHLayoutMask bvh_layout_mask_all = BVH_LAYOUT_NONE;
    foreach (const SubDevice &sub_device, devices) {
-      BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask();
+      BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask(
+          kernel_features);
      bvh_layout_mask &= device_bvh_layout_mask;
      bvh_layout_mask_all |= device_bvh_layout_mask;
    }
--- a/intern/cycles/device/oneapi/device.cpp
+++ b/intern/cycles/device/oneapi/device.cpp
@@ -40,12 +40,12 @@ bool device_oneapi_init()
  if (getenv("SYCL_CACHE_TRESHOLD") == nullptr) {
    _putenv_s("SYCL_CACHE_THRESHOLD", "0");
  }
-  if (getenv("SYCL_DEVICE_FILTER") == nullptr) {
+  if (getenv("ONEAPI_DEVICE_SELECTOR") == nullptr) {
    if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
-      _putenv_s("SYCL_DEVICE_FILTER", "level_zero");
+      _putenv_s("ONEAPI_DEVICE_SELECTOR", "level_zero:*");
    }
    else {
-      _putenv_s("SYCL_DEVICE_FILTER", "level_zero,cuda,hip");
+      _putenv_s("ONEAPI_DEVICE_SELECTOR", "!opencl:*");
    }
  }
  if (getenv("SYCL_ENABLE_PCI") == nullptr) {
@@ -58,10 +58,10 @@ bool device_oneapi_init()
  setenv("SYCL_CACHE_PERSISTENT", "1", false);
  setenv("SYCL_CACHE_THRESHOLD", "0", false);
  if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
-    setenv("SYCL_DEVICE_FILTER", "level_zero", false);
+    setenv("ONEAPI_DEVICE_SELECTOR", "level_zero:*", false);
  }
  else {
-    setenv("SYCL_DEVICE_FILTER", "level_zero,cuda,hip", false);
+    setenv("ONEAPI_DEVICE_SELECTOR", "!opencl:*", false);
  }
  setenv("SYCL_ENABLE_PCI", "1", false);
  setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
@@ -87,7 +87,8 @@ Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &pro
 }

 #ifdef WITH_ONEAPI
-static void device_iterator_cb(const char *id, const char *name, int num, void *user_ptr)
+static void device_iterator_cb(
+    const char *id, const char *name, int num, bool hwrt_support, void *user_ptr)
 {
  vector<DeviceInfo> *devices = (vector<DeviceInfo> *)user_ptr;

@@ -112,6 +113,13 @@ static void device_iterator_cb(const char *id, const char *name, int num, void *
  /* NOTE(@nsirgien): Seems not possible to know from SYCL/oneAPI or Level0. */
  info.display_device = false;

+#  ifdef WITH_EMBREE_GPU
+  info.use_hardware_raytracing = hwrt_support;
+#  else
+  info.use_hardware_raytracing = false;
+  (void)hwrt_support;
+#  endif
+
  devices->push_back(info);
  VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
 }
--- a/intern/cycles/device/oneapi/device_impl.cpp
+++ b/intern/cycles/device/oneapi/device_impl.cpp
@@ -8,7 +8,19 @@
 #  include "util/debug.h"
 #  include "util/log.h"

+#  ifdef WITH_EMBREE_GPU
+#    include "bvh/embree.h"
+#  endif
+
 #  include "kernel/device/oneapi/globals.h"
+#  include "kernel/device/oneapi/kernel.h"
+
+#  if defined(WITH_EMBREE_GPU) && defined(EMBREE_SYCL_SUPPORT) && !defined(SYCL_LANGUAGE_VERSION)
+/* These declarations are missing from embree headers when compiling from a compiler that doesn't
+ * support SYCL. */
+extern "C" RTCDevice rtcNewSYCLDevice(sycl::context context, const char *config);
+extern "C" bool rtcIsSYCLDeviceSupported(const sycl::device sycl_device);
+#  endif

 CCL_NAMESPACE_BEGIN

@@ -22,16 +34,29 @@ static void queue_error_cb(const char *message, void *user_ptr)
 OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
    : Device(info, stats, profiler),
      device_queue_(nullptr),
+#  ifdef WITH_EMBREE_GPU
+      embree_device(nullptr),
+      embree_scene(nullptr),
+#  endif
      texture_info_(this, "texture_info", MEM_GLOBAL),
      kg_memory_(nullptr),
      kg_memory_device_(nullptr),
      kg_memory_size_(0)
 {
  need_texture_info_ = false;
+  use_hardware_raytracing = info.use_hardware_raytracing;

  oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);

-  bool is_finished_ok = create_queue(device_queue_, info.num);
+  bool is_finished_ok = create_queue(device_queue_,
+                                     info.num,
+#  ifdef WITH_EMBREE_GPU
+                                     use_hardware_raytracing ? &embree_device : nullptr
+#  else
+                                     nullptr
+#  endif
+  );
+
  if (is_finished_ok == false) {
    set_error("oneAPI queue initialization error: got runtime exception \"" +
              oneapi_error_string_ + "\"");
@@ -42,6 +67,16 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi
    assert(device_queue_);
  }

+#  ifdef WITH_EMBREE_GPU
+  use_hardware_raytracing = use_hardware_raytracing && (embree_device != nullptr);
+#  else
+  use_hardware_raytracing = false;
+#  endif
+
+  if (use_hardware_raytracing) {
+    VLOG_INFO << "oneAPI will use hardware ray tracing for intersection acceleration.";
+  }
+
  size_t globals_segment_size;
  is_finished_ok = kernel_globals_size(globals_segment_size);
  if (is_finished_ok == false) {
@@ -64,6 +99,11 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi

 OneapiDevice::~OneapiDevice()
 {
+#  ifdef WITH_EMBREE_GPU
+  if (embree_device)
+    rtcReleaseDevice(embree_device);
+#  endif
+
  texture_info_.free();
  usm_free(device_queue_, kg_memory_);
  usm_free(device_queue_, kg_memory_device_);
@@ -80,15 +120,47 @@ bool OneapiDevice::check_peer_access(Device * /*peer_device*/)
  return false;
 }

-BVHLayoutMask OneapiDevice::get_bvh_layout_mask() const
+bool OneapiDevice::can_use_hardware_raytracing_for_features(uint requested_features) const
 {
-  return BVH_LAYOUT_BVH2;
+  /* MNEE and Ray-trace kernels currently don't work correctly with HWRT. */
+  return !(requested_features & (KERNEL_FEATURE_MNEE | KERNEL_FEATURE_NODE_RAYTRACE));
 }

+BVHLayoutMask OneapiDevice::get_bvh_layout_mask(uint requested_features) const
+{
+  return (use_hardware_raytracing &&
+          can_use_hardware_raytracing_for_features(requested_features)) ?
+             BVH_LAYOUT_EMBREE :
+             BVH_LAYOUT_BVH2;
+}
+
+#  ifdef WITH_EMBREE_GPU
+void OneapiDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
+{
+  if (embree_device && bvh->params.bvh_layout == BVH_LAYOUT_EMBREE) {
+    BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
+    if (refit) {
+      bvh_embree->refit(progress);
+    }
+    else {
+      bvh_embree->build(progress, &stats, embree_device, true);
+    }
+    if (bvh->params.top_level) {
+      embree_scene = bvh_embree->scene;
+    }
+  }
+  else {
+    Device::build_bvh(bvh, progress, refit);
+  }
+}
+#  endif
+
 bool OneapiDevice::load_kernels(const uint requested_features)
 {
  assert(device_queue_);

+  kernel_features = requested_features;
+
  bool is_finished_ok = oneapi_run_test_kernel(device_queue_);
  if (is_finished_ok == false) {
    set_error("oneAPI test kernel execution: got a runtime exception \"" + oneapi_error_string_ +
@@ -100,7 +172,14 @@ bool OneapiDevice::load_kernels(const uint requested_features)
    assert(device_queue_);
  }

-  is_finished_ok = oneapi_load_kernels(device_queue_, (const unsigned int)requested_features);
+  if (use_hardware_raytracing && !can_use_hardware_raytracing_for_features(requested_features)) {
+    VLOG_INFO
+        << "Hardware ray tracing disabled, not supported yet by oneAPI for requested features.";
+    use_hardware_raytracing = false;
+  }
+
+  is_finished_ok = oneapi_load_kernels(
+      device_queue_, (const unsigned int)requested_features, use_hardware_raytracing);
  if (is_finished_ok == false) {
    set_error("oneAPI kernels loading: got a runtime exception \"" + oneapi_error_string_ + "\"");
  }
@@ -327,6 +406,16 @@ void OneapiDevice::const_copy_to(const char *name, void *host, size_t size)
             << string_human_readable_number(size) << " bytes. ("
             << string_human_readable_size(size) << ")";

+#  ifdef WITH_EMBREE_GPU
+  if (strcmp(name, "data") == 0) {
+    assert(size <= sizeof(KernelData));
+
+    /* Update scene handle(since it is different for each device on multi devices) */
+    KernelData *const data = (KernelData *)host;
+    data->device_bvh = embree_scene;
+  }
+#  endif
+
  ConstMemMap::iterator i = const_mem_map_.find(name);
  device_vector<uchar> *data;

@@ -446,7 +535,9 @@ void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_
 #  endif
 }

-bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index)
+bool OneapiDevice::create_queue(SyclQueue *&external_queue,
+                                int device_index,
+                                void *embree_device_pointer)
 {
  bool finished_correct = true;
  try {
@@ -457,6 +548,13 @@ bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index)
    sycl::queue *created_queue = new sycl::queue(devices[device_index],
                                                 sycl::property::queue::in_order());
    external_queue = reinterpret_cast<SyclQueue *>(created_queue);
+#  ifdef WITH_EMBREE_GPU
+    if (embree_device_pointer) {
+      *((RTCDevice *)embree_device_pointer) = rtcNewSYCLDevice(created_queue->get_context(), "");
+    }
+#  else
+    (void)embree_device_pointer;
+#  endif
  }
  catch (sycl::exception const &e) {
    finished_correct = false;
@@ -625,7 +723,8 @@ bool OneapiDevice::enqueue_kernel(KernelContext *kernel_context,
                                  size_t global_size,
                                  void **args)
 {
-  return oneapi_enqueue_kernel(kernel_context, kernel, global_size, args);
+  return oneapi_enqueue_kernel(
+      kernel_context, kernel, global_size, kernel_features, use_hardware_raytracing, args);
 }

 /* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
@@ -767,9 +866,9 @@ char *OneapiDevice::device_capabilities()

    sycl::id<3> max_work_item_sizes =
        device.get_info<sycl::info::device::max_work_item_sizes<3>>();
-    WRITE_ATTR("max_work_item_sizes_dim0", ((size_t)max_work_item_sizes.get(0)))
-    WRITE_ATTR("max_work_item_sizes_dim1", ((size_t)max_work_item_sizes.get(1)))
-    WRITE_ATTR("max_work_item_sizes_dim2", ((size_t)max_work_item_sizes.get(2)))
+    WRITE_ATTR(max_work_item_sizes_dim0, ((size_t)max_work_item_sizes.get(0)))
+    WRITE_ATTR(max_work_item_sizes_dim1, ((size_t)max_work_item_sizes.get(1)))
+    WRITE_ATTR(max_work_item_sizes_dim2, ((size_t)max_work_item_sizes.get(2)))

    GET_NUM_ATTR(max_work_group_size)
    GET_NUM_ATTR(max_num_sub_groups)
@@ -792,7 +891,7 @@ char *OneapiDevice::device_capabilities()
    GET_NUM_ATTR(native_vector_width_half)

    size_t max_clock_frequency = device.get_info<sycl::info::device::max_clock_frequency>();
-    WRITE_ATTR("max_clock_frequency", max_clock_frequency)
+    WRITE_ATTR(max_clock_frequency, max_clock_frequency)

    GET_NUM_ATTR(address_bits)
    GET_NUM_ATTR(max_mem_alloc_size)
@@ -801,7 +900,7 @@ char *OneapiDevice::device_capabilities()
     * supported so we always return false, even if device supports HW texture usage acceleration.
     */
    bool image_support = false;
-    WRITE_ATTR("image_support", (size_t)image_support)
+    WRITE_ATTR(image_support, (size_t)image_support)

    GET_NUM_ATTR(max_parameter_size)
    GET_NUM_ATTR(mem_base_addr_align)
@@ -830,12 +929,17 @@ void OneapiDevice::iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_p
    std::string name = device.get_info<sycl::info::device::name>();
 #  else
    std::string name = "SYCL Host Task (Debug)";
+#  endif
+#  ifdef WITH_EMBREE_GPU
+    bool hwrt_support = rtcIsSYCLDeviceSupported(device);
+#  else
+    bool hwrt_support = false;
 #  endif
    std::string id = "ONEAPI_" + platform_name + "_" + name;
    if (device.has(sycl::aspect::ext_intel_pci_address)) {
      id.append("_" + device.get_info<sycl::ext::intel::info::device::pci_address>());
    }
-    (cb)(id.c_str(), name.c_str(), num, user_ptr);
+    (cb)(id.c_str(), name.c_str(), num, hwrt_support, user_ptr);
    num++;
  }
 }
--- a/intern/cycles/device/oneapi/device_impl.h
+++ b/intern/cycles/device/oneapi/device_impl.h
@@ -16,15 +16,16 @@ CCL_NAMESPACE_BEGIN

 class DeviceQueue;

-typedef void (*OneAPIDeviceIteratorCallback)(const char *id,
-                                             const char *name,
-                                             int num,
-                                             void *user_ptr);
+typedef void (*OneAPIDeviceIteratorCallback)(
+    const char *id, const char *name, int num, bool hwrt_support, void *user_ptr);

 class OneapiDevice : public Device {
 private:
  SyclQueue *device_queue_;
-
+#  ifdef WITH_EMBREE_GPU
+  RTCDevice embree_device;
+  RTCScene embree_scene;
+#  endif
  using ConstMemMap = map<string, device_vector<uchar> *>;
  ConstMemMap const_mem_map_;
  device_vector<TextureInfo> texture_info_;
@@ -34,17 +35,21 @@ class OneapiDevice : public Device {
  size_t kg_memory_size_ = (size_t)0;
  size_t max_memory_on_device_ = (size_t)0;
  std::string oneapi_error_string_;
+  bool use_hardware_raytracing = false;
+  unsigned int kernel_features = 0;

 public:
-  virtual BVHLayoutMask get_bvh_layout_mask() const override;
+  virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override;

  OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);

  virtual ~OneapiDevice();
-
+#  ifdef WITH_EMBREE_GPU
+  void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
+#  endif
  bool check_peer_access(Device *peer_device) override;

-  bool load_kernels(const uint requested_features) override;
+  bool load_kernels(const uint kernel_features) override;

  void load_texture_info();

@@ -113,8 +118,9 @@ class OneapiDevice : public Device {
  SyclQueue *sycl_queue();

 protected:
+  bool can_use_hardware_raytracing_for_features(uint kernel_features) const;
  void check_usm(SyclQueue *queue, const void *usm_ptr, bool allow_host);
-  bool create_queue(SyclQueue *&external_queue, int device_index);
+  bool create_queue(SyclQueue *&external_queue, int device_index, void *embree_device);
  void free_queue(SyclQueue *queue);
  void *usm_aligned_alloc_host(SyclQueue *queue, size_t memory_size, size_t alignment);
  void *usm_alloc_device(SyclQueue *queue, size_t memory_size);
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -151,7 +151,7 @@ unique_ptr<DeviceQueue> OptiXDevice::gpu_queue_create()
  return make_unique<OptiXDeviceQueue>(this);
 }

-BVHLayoutMask OptiXDevice::get_bvh_layout_mask() const
+BVHLayoutMask OptiXDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
 {
  /* OptiX has its own internal acceleration structure format. */
  return BVH_LAYOUT_OPTIX;
@@ -181,7 +181,7 @@ string OptiXDevice::compile_kernel_get_common_cflags(const uint kernel_features)
  /* Add OptiX SDK include directory to include paths. */
  common_cflags += string_printf(" -I\"%s\"", get_optix_include_dir().c_str());

-  /* Specialization for shader raytracing. */
+  /* Specialization for shader ray-tracing. */
  if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
    common_cflags += " --keep-device-functions";
  }
@@ -483,7 +483,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
    group_descs[PG_HITL].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_local_hit";
  }

-  /* Shader raytracing replaces some functions with direct callables. */
+  /* Shader ray-tracing replaces some functions with direct callables. */
  if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
    group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
    group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.module = optix_module;
@@ -584,7 +584,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
    load_osl_kernels();
  }
  else if (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) {
-    /* Create shader raytracing and MNEE pipeline. */
+    /* Create shader ray-tracing and MNEE pipeline. */
    vector<OptixProgramGroup> pipeline_groups;
    pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
    if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
--- a/intern/cycles/device/optix/device_impl.h
+++ b/intern/cycles/device/optix/device_impl.h
@@ -88,7 +88,7 @@ class OptiXDevice : public CUDADevice {
  OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
  ~OptiXDevice();

-  BVHLayoutMask get_bvh_layout_mask() const override;
+  BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;

  string compile_kernel_get_common_cflags(const uint kernel_features);

--- a/intern/cycles/integrator/pass_accessor.h
+++ b/intern/cycles/integrator/pass_accessor.h
@@ -109,6 +109,11 @@ class PassAccessor {
  /* Set pass data for the given render buffers. Used for baking to read from passes. */
  bool set_render_tile_pixels(RenderBuffers *render_buffers, const Source &source);

+  const PassAccessInfo &get_pass_access_info() const
+  {
+    return pass_access_info_;
+  }
+
 protected:
  virtual void init_kernel_film_convert(KernelFilmConvert *kfilm_convert,
                                        const BufferParams &buffer_params,
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -574,7 +574,7 @@ void PathTrace::denoise(const RenderWork &render_work)

 void PathTrace::set_output_driver(unique_ptr<OutputDriver> driver)
 {
-  output_driver_ = move(driver);
+  output_driver_ = std::move(driver);
 }

 void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
@@ -585,7 +585,7 @@ void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
  destroy_gpu_resources();

  if (driver) {
-    display_ = make_unique<PathTraceDisplay>(move(driver));
+    display_ = make_unique<PathTraceDisplay>(std::move(driver));
  }
  else {
    display_ = nullptr;
@@ -1036,7 +1036,12 @@ bool PathTrace::get_render_tile_pixels(const PassAccessor &pass_accessor,
  }

  if (big_tile_denoise_work_ && render_state_.has_denoised_result) {
-    return big_tile_denoise_work_->get_render_tile_pixels(pass_accessor, destination);
+    /* Only use the big tile denoised buffer to access the denoised passes.
+     * The guiding passes are allowed to be modified in-place for the needs of the denoiser,
+     * so copy those from the original devices buffers. */
+    if (pass_accessor.get_pass_access_info().mode == PassMode::DENOISED) {
+      return big_tile_denoise_work_->get_render_tile_pixels(pass_accessor, destination);
+    }
  }

  bool success = true;
--- a/intern/cycles/integrator/path_trace_display.cpp
+++ b/intern/cycles/integrator/path_trace_display.cpp
@@ -9,7 +9,9 @@

 CCL_NAMESPACE_BEGIN

-PathTraceDisplay::PathTraceDisplay(unique_ptr<DisplayDriver> driver) : driver_(move(driver)) {}
+PathTraceDisplay::PathTraceDisplay(unique_ptr<DisplayDriver> driver) : driver_(std::move(driver))
+{
+}

 void PathTraceDisplay::reset(const BufferParams &buffer_params, const bool reset_rendering)
 {
--- a/intern/cycles/integrator/path_trace_work_cpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_cpu.cpp
@@ -357,8 +357,12 @@ void PathTraceWorkCPU::guiding_push_sample_data_to_global_storage(
 #  if PATH_GUIDING_LEVEL >= 2
  const bool use_direct_light = kernel_data.integrator.use_guiding_direct_light;
  const bool use_mis_weights = kernel_data.integrator.use_guiding_mis_weights;
+#    if OPENPGL_VERSION_MINOR >= 5
+  kg->opgl_path_segment_storage->PrepareSamples(use_mis_weights, use_direct_light, false);
+#    else
  kg->opgl_path_segment_storage->PrepareSamples(
      false, nullptr, use_mis_weights, use_direct_light, false);
+#    endif
 #  endif

 #  ifdef WITH_CYCLES_DEBUG
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -28,6 +28,7 @@ static size_t estimate_single_state_size(const uint kernel_features)
 #define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) \
  state_size += (kernel_features & (feature)) ? sizeof(type) : 0;
 #define KERNEL_STRUCT_END(name) \
+  (void)array_index; \
  break; \
  }
 #define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
@@ -139,6 +140,7 @@ void PathTraceWorkGPU::alloc_integrator_soa()
    integrator_state_gpu_.parent_struct[array_index].name = (type *)array->device_pointer; \
  }
 #define KERNEL_STRUCT_END(name) \
+  (void)array_index; \
  break; \
  }
 #define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
@@ -299,8 +301,8 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
   * become busy after adding new tiles). This is especially important for the shadow catcher which
   * schedules work in halves of available number of paths. */
  work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
-  work_tile_scheduler_.set_accelerated_rt((device_->get_bvh_layout_mask() & BVH_LAYOUT_OPTIX) !=
-                                          0);
+  work_tile_scheduler_.set_accelerated_rt(
+      (device_->get_bvh_layout_mask(device_scene_->data.kernel_features) & BVH_LAYOUT_OPTIX) != 0);
  work_tile_scheduler_.reset(effective_buffer_params_,
                             start_sample,
                             samples_num,
--- a/intern/cycles/integrator/work_tile_scheduler.cpp
+++ b/intern/cycles/integrator/work_tile_scheduler.cpp
@@ -55,21 +55,29 @@ void WorkTileScheduler::reset_scheduler_state()

  VLOG_WORK << "Will schedule tiles of size " << tile_size_;

-  if (VLOG_IS_ON(3)) {
-    /* The logging is based on multiple tiles scheduled, ignoring overhead of multi-tile scheduling
-     * and purely focusing on the number of used path states. */
-    const int num_path_states_in_tile = tile_size_.width * tile_size_.height *
-                                        tile_size_.num_samples;
-    const int num_tiles = max_num_path_states_ / num_path_states_in_tile;
-    VLOG_WORK << "Number of unused path states: "
-              << max_num_path_states_ - num_tiles * num_path_states_in_tile;
+  const int num_path_states_in_tile = tile_size_.width * tile_size_.height *
+                                      tile_size_.num_samples;
+
+  if (num_path_states_in_tile == 0) {
+    num_tiles_x_ = 0;
+    num_tiles_y_ = 0;
+    num_tiles_per_sample_range_ = 0;
+  }
+  else {
+    if (VLOG_IS_ON(3)) {
+      /* The logging is based on multiple tiles scheduled, ignoring overhead of multi-tile
+       * scheduling and purely focusing on the number of used path states. */
+      const int num_tiles = max_num_path_states_ / num_path_states_in_tile;
+      VLOG_WORK << "Number of unused path states: "
+                << max_num_path_states_ - num_tiles * num_path_states_in_tile;
+    }
+
+    num_tiles_x_ = divide_up(image_size_px_.x, tile_size_.width);
+    num_tiles_y_ = divide_up(image_size_px_.y, tile_size_.height);
+    num_tiles_per_sample_range_ = divide_up(samples_num_, tile_size_.num_samples);
  }

-  num_tiles_x_ = divide_up(image_size_px_.x, tile_size_.width);
-  num_tiles_y_ = divide_up(image_size_px_.y, tile_size_.height);
-
  total_tiles_num_ = num_tiles_x_ * num_tiles_y_;
-  num_tiles_per_sample_range_ = divide_up(samples_num_, tile_size_.num_samples);

  next_work_index_ = 0;
  total_work_size_ = total_tiles_num_ * num_tiles_per_sample_range_;
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -96,10 +96,13 @@ set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS
  device/oneapi/compat.h
  device/oneapi/context_begin.h
  device/oneapi/context_end.h
+  device/oneapi/context_intersect_begin.h
+  device/oneapi/context_intersect_end.h
  device/oneapi/globals.h
  device/oneapi/image.h
  device/oneapi/kernel.h
  device/oneapi/kernel_templates.h
+  device/cpu/bvh.h
 )

 set(SRC_KERNEL_CLOSURE_HEADERS
@@ -764,7 +767,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)

  # Set defaults for spir64 and spir64_gen options
  if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
-    set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'")
+    set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-regular-grf-kernel integrator_intersect -ze-opt-large-grf-kernel shade -ze-opt-no-local-to-generic'")
  endif()
  if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen)
    set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}" CACHE STRING "Extra build options for spir64_gen target")
@@ -775,8 +778,6 @@ if(WITH_CYCLES_DEVICE_ONEAPI)

  # Host execution won't use GPU binaries, no need to compile them.
  if(WITH_CYCLES_ONEAPI_BINARIES AND NOT WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
-    # AoT binaries aren't currently reused when calling sycl::build.
-    list(APPEND sycl_compiler_flags -DSYCL_SKIP_KERNELS_PRELOAD)
    # Iterate over all targest and their options
    list(JOIN CYCLES_ONEAPI_SYCL_TARGETS "," targets_string)
    list(APPEND sycl_compiler_flags -fsycl-targets=${targets_string})
@@ -798,6 +799,59 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
      -I"${NANOVDB_INCLUDE_DIR}")
  endif()

+  if(WITH_CYCLES_EMBREE AND EMBREE_SYCL_SUPPORT)
+    list(APPEND sycl_compiler_flags
+      -DWITH_EMBREE
+      -DWITH_EMBREE_GPU
+      -DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION}
+      -I"${EMBREE_INCLUDE_DIRS}")
+
+    if(WIN32)
+      list(APPEND sycl_compiler_flags
+        -ladvapi32.lib
+      )
+    endif()
+
+    set(next_library_mode "")
+    foreach(library ${EMBREE_LIBRARIES})
+      string(TOLOWER "${library}" library_lower)
+      if(("${library_lower}" STREQUAL "optimized") OR
+         ("${library_lower}" STREQUAL "debug"))
+        set(next_library_mode "${library_lower}")
+      else()
+        if(next_library_mode STREQUAL "")
+          list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
+          list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
+        else()
+          list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
+        endif()
+        set(next_library_mode "")
+      endif()
+    endforeach()
+
+    foreach(library ${TBB_LIBRARIES})
+      string(TOLOWER "${library}" library_lower)
+      if(("${library_lower}" STREQUAL "optimized") OR
+         ("${library_lower}" STREQUAL "debug"))
+        set(next_library_mode "${library_lower}")
+      else()
+        if(next_library_mode STREQUAL "")
+          list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
+          list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
+        else()
+          list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
+        endif()
+        set(next_library_mode "")
+      endif()
+    endforeach()
+      list(APPEND sycl_compiler_flags
+        "$<$<CONFIG:Release>:${EMBREE_TBB_LIBRARIES_optimized}>"
+        "$<$<CONFIG:RelWithDebInfo>:${EMBREE_TBB_LIBRARIES_optimized}>"
+        "$<$<CONFIG:MinSizeRel>:${EMBREE_TBB_LIBRARIES_optimized}>"
+        "$<$<CONFIG:Debug>:${EMBREE_TBB_LIBRARIES_debug}>"
+      )
+  endif()
+
  if(WITH_CYCLES_DEBUG)
    list(APPEND sycl_compiler_flags -DWITH_CYCLES_DEBUG)
  endif()
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -21,6 +21,28 @@
 #  define __BVH2__
 #endif

+#if defined(__KERNEL_ONEAPI__) && defined(WITH_EMBREE_GPU)
+/* bool is apparently not tested for specialization constants:
+ * https://github.com/intel/llvm/blob/39d1c65272a786b2b13a6f094facfddf9408406d/sycl/test/basic_tests/SYCL-2020-spec-constants.cpp#L25-L27
+ * Instead of adding one more bool specialization constant, we reuse existing embree_features one
+ * and use RTC_FEATURE_FLAG_NONE as value to test for avoiding to call Embree on GPU.
+ */
+/* We set it to RTC_FEATURE_FLAG_NONE by default so AoT binaries contain MNE and ray-trace kernels
+ * pre-compiled without Embree.
+ * Changing this default value would require updating the logic in oneapi_load_kernels(). */
+static constexpr sycl::specialization_id<RTCFeatureFlags> oneapi_embree_features{
+    RTC_FEATURE_FLAG_NONE};
+#  define IF_USING_EMBREE \
+    if (kernel_handler.get_specialization_constant<oneapi_embree_features>() != \
+        RTC_FEATURE_FLAG_NONE)
+#  define IF_NOT_USING_EMBREE \
+    if (kernel_handler.get_specialization_constant<oneapi_embree_features>() == \
+        RTC_FEATURE_FLAG_NONE)
+#else
+#  define IF_USING_EMBREE
+#  define IF_NOT_USING_EMBREE
+#endif
+
 CCL_NAMESPACE_BEGIN

 #ifdef __BVH2__
@@ -74,30 +96,39 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
  }

 #  ifdef __EMBREE__
-  if (kernel_data.device_bvh) {
-    return kernel_embree_intersect(kg, ray, visibility, isect);
+  IF_USING_EMBREE
+  {
+    if (kernel_data.device_bvh) {
+      return kernel_embree_intersect(kg, ray, visibility, isect);
+    }
  }
 #  endif

+  IF_NOT_USING_EMBREE
+  {
 #  ifdef __OBJECT_MOTION__
-  if (kernel_data.bvh.have_motion) {
+    if (kernel_data.bvh.have_motion) {
 #    ifdef __HAIR__
-    if (kernel_data.bvh.have_curves) {
-      return bvh_intersect_hair_motion(kg, ray, isect, visibility);
-    }
+      if (kernel_data.bvh.have_curves) {
+        return bvh_intersect_hair_motion(kg, ray, isect, visibility);
+      }
 #    endif /* __HAIR__ */

-    return bvh_intersect_motion(kg, ray, isect, visibility);
-  }
+      return bvh_intersect_motion(kg, ray, isect, visibility);
+    }
 #  endif /* __OBJECT_MOTION__ */

 #  ifdef __HAIR__
-  if (kernel_data.bvh.have_curves) {
-    return bvh_intersect_hair(kg, ray, isect, visibility);
-  }
+    if (kernel_data.bvh.have_curves) {
+      return bvh_intersect_hair(kg, ray, isect, visibility);
+    }
 #  endif /* __HAIR__ */

-  return bvh_intersect(kg, ray, isect, visibility);
+    return bvh_intersect(kg, ray, isect, visibility);
+  }
+
+  kernel_assert(false);
+  return false;
 }

 /* Single object BVH traversal, for SSS/AO/bevel. */
@@ -129,17 +160,27 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
  }

 #    ifdef __EMBREE__
-  if (kernel_data.device_bvh) {
-    return kernel_embree_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
+  IF_USING_EMBREE
+  {
+    if (kernel_data.device_bvh) {
+      return kernel_embree_intersect_local(
+          kg, ray, local_isect, local_object, lcg_state, max_hits);
+    }
  }
 #    endif

+  IF_NOT_USING_EMBREE
+  {
 #    ifdef __OBJECT_MOTION__
-  if (kernel_data.bvh.have_motion) {
-    return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
-  }
+    if (kernel_data.bvh.have_motion) {
+      return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
+    }
 #    endif /* __OBJECT_MOTION__ */
-  return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
+    return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
+  }
+
+  kernel_assert(false);
+  return false;
 }
 #  endif

@@ -184,35 +225,44 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
  }

 #    ifdef __EMBREE__
-  if (kernel_data.device_bvh) {
-    return kernel_embree_intersect_shadow_all(
-        kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
+  IF_USING_EMBREE
+  {
+    if (kernel_data.device_bvh) {
+      return kernel_embree_intersect_shadow_all(
+          kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
+    }
  }
 #    endif

+  IF_NOT_USING_EMBREE
+  {
 #    ifdef __OBJECT_MOTION__
-  if (kernel_data.bvh.have_motion) {
+    if (kernel_data.bvh.have_motion) {
 #      ifdef __HAIR__
-    if (kernel_data.bvh.have_curves) {
-      return bvh_intersect_shadow_all_hair_motion(
-          kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
-    }
+      if (kernel_data.bvh.have_curves) {
+        return bvh_intersect_shadow_all_hair_motion(
+            kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
+      }
 #      endif /* __HAIR__ */

-    return bvh_intersect_shadow_all_motion(
-        kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
-  }
+      return bvh_intersect_shadow_all_motion(
+          kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
+    }
 #    endif /* __OBJECT_MOTION__ */

 #    ifdef __HAIR__
-  if (kernel_data.bvh.have_curves) {
-    return bvh_intersect_shadow_all_hair(
-        kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
-  }
+    if (kernel_data.bvh.have_curves) {
+      return bvh_intersect_shadow_all_hair(
+          kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
+    }
 #    endif /* __HAIR__ */

-  return bvh_intersect_shadow_all(
-      kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
+    return bvh_intersect_shadow_all(
+        kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
+  }
+
+  kernel_assert(false);
+  return false;
 }
 #  endif /* __SHADOW_RECORD_ALL__ */

@@ -239,13 +289,28 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
    return false;
  }

-#    ifdef __OBJECT_MOTION__
-  if (kernel_data.bvh.have_motion) {
-    return bvh_intersect_volume_motion(kg, ray, isect, visibility);
+#    ifdef __EMBREE__
+  IF_USING_EMBREE
+  {
+    if (kernel_data.device_bvh) {
+      return kernel_embree_intersect_volume(kg, ray, isect, visibility);
+    }
  }
+#    endif
+
+  IF_NOT_USING_EMBREE
+  {
+#    ifdef __OBJECT_MOTION__
+    if (kernel_data.bvh.have_motion) {
+      return bvh_intersect_volume_motion(kg, ray, isect, visibility);
+    }
 #    endif /* __OBJECT_MOTION__ */

-  return bvh_intersect_volume(kg, ray, isect, visibility);
+    return bvh_intersect_volume(kg, ray, isect, visibility);
+  }
+
+  kernel_assert(false);
+  return false;
 }
 #  endif /* defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) */

@@ -275,18 +340,27 @@ ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg,
  }

 #    ifdef __EMBREE__
-  if (kernel_data.device_bvh) {
-    return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
+  IF_USING_EMBREE
+  {
+    if (kernel_data.device_bvh) {
+      return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
+    }
  }
 #    endif

+  IF_NOT_USING_EMBREE
+  {
 #    ifdef __OBJECT_MOTION__
-  if (kernel_data.bvh.have_motion) {
-    return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
-  }
+    if (kernel_data.bvh.have_motion) {
+      return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
+    }
 #    endif /* __OBJECT_MOTION__ */

-  return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
+    return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
+  }
+
+  kernel_assert(false);
+  return false;
 }

 #  endif /* defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) */
--- a/intern/cycles/kernel/bvh/volume_all.h
+++ b/intern/cycles/kernel/bvh/volume_all.h
@@ -51,8 +51,6 @@ ccl_device_inline
  int object = OBJECT_NONE;
  float isect_t = ray->tmax;

-  int num_hits_in_instance = 0;
-
  uint num_hits = 0;
  isect_array->t = ray->tmax;

@@ -152,7 +150,6 @@ ccl_device_inline
                  /* Move on to next entry in intersections array. */
                  isect_array++;
                  num_hits++;
-                  num_hits_in_instance++;
                  isect_array->t = isect_t;
                  if (num_hits == max_hits) {
                    return num_hits;
@@ -193,7 +190,6 @@ ccl_device_inline
                  /* Move on to next entry in intersections array. */
                  isect_array++;
                  num_hits++;
-                  num_hits_in_instance++;
                  isect_array->t = isect_t;
                  if (num_hits == max_hits) {
                    return num_hits;
@@ -219,7 +215,6 @@ ccl_device_inline
            bvh_instance_push(kg, object, ray, &P, &dir, &idir);
 #endif

-            num_hits_in_instance = 0;
            isect_array->t = isect_t;

            ++stack_ptr;
--- a/intern/cycles/kernel/data_arrays.h
+++ b/intern/cycles/kernel/data_arrays.h
@@ -64,6 +64,7 @@ KERNEL_DATA_ARRAY(float2, light_background_conditional_cdf)
 KERNEL_DATA_ARRAY(KernelLightTreeNode, light_tree_nodes)
 KERNEL_DATA_ARRAY(KernelLightTreeEmitter, light_tree_emitters)
 KERNEL_DATA_ARRAY(uint, light_to_tree)
+KERNEL_DATA_ARRAY(uint, object_to_tree)
 KERNEL_DATA_ARRAY(uint, object_lookup_offset)
 KERNEL_DATA_ARRAY(uint, triangle_to_tree)

--- a/intern/cycles/kernel/data_template.h
+++ b/intern/cycles/kernel/data_template.h
@@ -20,6 +20,7 @@ KERNEL_STRUCT_BEGIN(KernelBackground, background)
 /* xyz store direction, w the angle. float4 instead of float3 is used
 * to ensure consistent padding/alignment across devices. */
 KERNEL_STRUCT_MEMBER(background, float4, sun)
+KERNEL_STRUCT_MEMBER(background, int, use_sun_guiding)
 /* Only shader index. */
 KERNEL_STRUCT_MEMBER(background, int, surface_shader)
 KERNEL_STRUCT_MEMBER(background, int, volume_shader)
@@ -39,6 +40,10 @@ KERNEL_STRUCT_MEMBER(background, int, use_mis)
 KERNEL_STRUCT_MEMBER(background, int, lightgroup)
 /* Light Index. */
 KERNEL_STRUCT_MEMBER(background, int, light_index)
+/* Padding. */
+KERNEL_STRUCT_MEMBER(background, int, pad1)
+KERNEL_STRUCT_MEMBER(background, int, pad2)
+KERNEL_STRUCT_MEMBER(background, int, pad3)
 KERNEL_STRUCT_END(KernelBackground)

 /* BVH: own BVH2 if no native device acceleration struct used. */
--- a/intern/cycles/kernel/device/cpu/bvh.h
+++ b/intern/cycles/kernel/device/cpu/bvh.h
@@ -13,8 +13,13 @@
 #  include <embree3/rtcore_scene.h>
 #endif

-#include "kernel/device/cpu/compat.h"
-#include "kernel/device/cpu/globals.h"
+#ifdef __KERNEL_ONEAPI__
+#  include "kernel/device/oneapi/compat.h"
+#  include "kernel/device/oneapi/globals.h"
+#else
+#  include "kernel/device/cpu/compat.h"
+#  include "kernel/device/cpu/globals.h"
+#endif

 #include "kernel/bvh/types.h"
 #include "kernel/bvh/util.h"
@@ -33,11 +38,16 @@ using numhit_t = uint8_t;
 using numhit_t = uint32_t;
 #endif

-#define CYCLES_EMBREE_USED_FEATURES \
-  (RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
-                    RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
-                    RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
-                    RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
+#ifdef __KERNEL_ONEAPI__
+#  define CYCLES_EMBREE_USED_FEATURES \
+    (kernel_handler.get_specialization_constant<oneapi_embree_features>())
+#else
+#  define CYCLES_EMBREE_USED_FEATURES \
+    (RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
+                      RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
+                      RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
+                      RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
+#endif

 #define EMBREE_IS_HAIR(x) (x & 1)

@@ -99,7 +109,9 @@ struct CCLVolumeContext
 #if EMBREE_MAJOR_VERSION >= 4
  KernelGlobals kg;
  const Ray *ray;
+#  ifdef __VOLUME_RECORD_ALL__
  numhit_t max_hits;
+#  endif
  numhit_t num_hits;
 #endif
  Intersection *vol_isect;
@@ -252,7 +264,8 @@ ccl_device_inline void kernel_embree_convert_sss_hit(KernelGlobals kg,
 * Things like recording subsurface or shadow hits for later evaluation
 * as well as filtering for volume objects happen here.
 * Cycles' own BVH does that directly inside the traversal calls. */
-ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNArguments *args)
+ccl_device_forceinline void kernel_embree_filter_intersection_func_impl(
+    const RTCFilterFunctionNArguments *args)
 {
  /* Current implementation in Cycles assumes only single-ray intersection queries. */
  assert(args->N == 1);
@@ -263,7 +276,11 @@ ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNA
 #else
  CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
 #endif
+#ifdef __KERNEL_ONEAPI__
+  KernelGlobalsGPU *kg = nullptr;
+#else
  const KernelGlobalsCPU *kg = ctx->kg;
+#endif
  const Ray *cray = ctx->ray;

  if (kernel_embree_is_self_intersection(
@@ -277,7 +294,7 @@ ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNA
 * as well as filtering for volume objects happen here.
 * Cycles' own BVH does that directly inside the traversal calls.
 */
-ccl_device void kernel_embree_filter_occluded_shadow_all_func(
+ccl_device_forceinline void kernel_embree_filter_occluded_shadow_all_func_impl(
    const RTCFilterFunctionNArguments *args)
 {
  /* Current implementation in Cycles assumes only single-ray intersection queries. */
@@ -290,7 +307,11 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
 #else
  CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
 #endif
+#ifdef __KERNEL_ONEAPI__
+  KernelGlobalsGPU *kg = nullptr;
+#else
  const KernelGlobalsCPU *kg = ctx->kg;
+#endif
  const Ray *cray = ctx->ray;

  Intersection current_isect;
@@ -326,7 +347,7 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
  }

  /* Test if we need to record this transparent intersection. */
-  const numhit_t max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+  const numhit_t max_record_hits = min(ctx->max_hits, numhit_t(INTEGRATOR_SHADOW_ISECT_SIZE));
  if (ctx->num_recorded_hits < max_record_hits) {
    /* If maximum number of hits was reached, replace the intersection with the
     * highest distance. We want to find the N closest intersections. */
@@ -363,7 +384,7 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
  *args->valid = 0;
 }

-ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
+ccl_device_forceinline void kernel_embree_filter_occluded_local_func_impl(
    const RTCFilterFunctionNArguments *args)
 {
  /* Current implementation in Cycles assumes only single-ray intersection queries. */
@@ -376,7 +397,11 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
 #else
  CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
 #endif
+#ifdef __KERNEL_ONEAPI__
+  KernelGlobalsGPU *kg = nullptr;
+#else
  const KernelGlobalsCPU *kg = ctx->kg;
+#endif
  const Ray *cray = ctx->ray;

  /* Check if it's hitting the correct object. */
@@ -462,7 +487,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
  *args->valid = 0;
 }

-ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
+ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func_impl(
    const RTCFilterFunctionNArguments *args)
 {
  /* Current implementation in Cycles assumes only single-ray intersection queries. */
@@ -475,11 +500,17 @@ ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
 #else
  CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
 #endif
+#ifdef __KERNEL_ONEAPI__
+  KernelGlobalsGPU *kg = nullptr;
+#else
  const KernelGlobalsCPU *kg = ctx->kg;
+#endif
  const Ray *cray = ctx->ray;

+#ifdef __VOLUME_RECORD_ALL__
  /* Append the intersection to the end of the array. */
  if (ctx->num_hits < ctx->max_hits) {
+#endif
    Intersection current_isect;
    kernel_embree_convert_hit(
        kg, ray, hit, &current_isect, reinterpret_cast<intptr_t>(args->geometryUserPtr));
@@ -496,10 +527,17 @@ ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
    int object_flag = kernel_data_fetch(object_flag, tri_object);
    if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
      --ctx->num_hits;
+#ifndef __VOLUME_RECORD_ALL__
+      /* Without __VOLUME_RECORD_ALL__ we need only a first counted hit, so we will
+       * continue tracing only if a current hit is not counted. */
+      *args->valid = 0;
+#endif
    }
+#ifdef __VOLUME_RECORD_ALL__
    /* This tells Embree to continue tracing. */
    *args->valid = 0;
  }
+#endif
 }

 #if EMBREE_MAJOR_VERSION < 4
@@ -513,14 +551,14 @@ ccl_device_forceinline void kernel_embree_filter_occluded_func(

  switch (ctx->type) {
    case CCLIntersectContext::RAY_SHADOW_ALL:
-      kernel_embree_filter_occluded_shadow_all_func(args);
+      kernel_embree_filter_occluded_shadow_all_func_impl(args);
      break;
    case CCLIntersectContext::RAY_LOCAL:
    case CCLIntersectContext::RAY_SSS:
-      kernel_embree_filter_occluded_local_func(args);
+      kernel_embree_filter_occluded_local_func_impl(args);
      break;
    case CCLIntersectContext::RAY_VOLUME_ALL:
-      kernel_embree_filter_occluded_volume_all_func(args);
+      kernel_embree_filter_occluded_volume_all_func_impl(args);
      break;

    case CCLIntersectContext::RAY_REGULAR:
@@ -569,7 +607,63 @@ ccl_device void kernel_embree_filter_occluded_func_backface_cull(

  kernel_embree_filter_occluded_func(args);
 }
+#endif

+#ifdef __KERNEL_ONEAPI__
+/* Static wrappers so we can call the callbacks from out side the ONEAPIKernelContext class */
+RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
+kernel_embree_filter_intersection_func_static(const RTCFilterFunctionNArguments *args)
+{
+  RTCHit *hit = (RTCHit *)args->hit;
+  CCLFirstHitContext *ctx = (CCLFirstHitContext *)(args->context);
+  ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
+  context->kernel_embree_filter_intersection_func_impl(args);
+}
+
+RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
+kernel_embree_filter_occluded_shadow_all_func_static(const RTCFilterFunctionNArguments *args)
+{
+  RTCHit *hit = (RTCHit *)args->hit;
+  CCLShadowContext *ctx = (CCLShadowContext *)(args->context);
+  ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
+  context->kernel_embree_filter_occluded_shadow_all_func_impl(args);
+}
+
+RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
+kernel_embree_filter_occluded_local_func_static(const RTCFilterFunctionNArguments *args)
+{
+  RTCHit *hit = (RTCHit *)args->hit;
+  CCLLocalContext *ctx = (CCLLocalContext *)(args->context);
+  ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
+  context->kernel_embree_filter_occluded_local_func_impl(args);
+}
+
+RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
+kernel_embree_filter_occluded_volume_all_func_static(const RTCFilterFunctionNArguments *args)
+{
+  RTCHit *hit = (RTCHit *)args->hit;
+  CCLVolumeContext *ctx = (CCLVolumeContext *)(args->context);
+  ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
+  context->kernel_embree_filter_occluded_volume_all_func_impl(args);
+}
+
+#  define kernel_embree_filter_intersection_func \
+    ONEAPIKernelContext::kernel_embree_filter_intersection_func_static
+#  define kernel_embree_filter_occluded_shadow_all_func \
+    ONEAPIKernelContext::kernel_embree_filter_occluded_shadow_all_func_static
+#  define kernel_embree_filter_occluded_local_func \
+    ONEAPIKernelContext::kernel_embree_filter_occluded_local_func_static
+#  define kernel_embree_filter_occluded_volume_all_func \
+    ONEAPIKernelContext::kernel_embree_filter_occluded_volume_all_func_static
+#else
+#  define kernel_embree_filter_intersection_func kernel_embree_filter_intersection_func_impl
+#  if EMBREE_MAJOR_VERSION >= 4
+#    define kernel_embree_filter_occluded_shadow_all_func \
+      kernel_embree_filter_occluded_shadow_all_func_impl
+#    define kernel_embree_filter_occluded_local_func kernel_embree_filter_occluded_local_func_impl
+#    define kernel_embree_filter_occluded_volume_all_func \
+      kernel_embree_filter_occluded_volume_all_func_impl
+#  endif
 #endif

 /* Scene intersection. */
@@ -583,7 +677,15 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
 #if EMBREE_MAJOR_VERSION >= 4
  CCLFirstHitContext ctx;
  rtcInitRayQueryContext(&ctx);
+#  ifdef __KERNEL_ONEAPI__
+  /* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
+   * uses global device allocation (CUDA, Optix, HIP) or passes all needed data
+   * as a class context (Metal, oneAPI). So we need to pass this context here
+   * in order to have an access to it later in Embree filter functions on GPU. */
+  ctx.kg = (KernelGlobals)this;
+#  else
  ctx.kg = kg;
+#  endif
 #else
  CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
  rtcInitIntersectContext(&ctx);
@@ -596,7 +698,7 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
 #if EMBREE_MAJOR_VERSION >= 4
  RTCIntersectArguments args;
  rtcInitIntersectArguments(&args);
-  args.filter = (RTCFilterFunctionN)kernel_embree_filter_intersection_func;
+  args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_intersection_func);
  args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
  args.context = &ctx;
  rtcIntersect1(kernel_data.device_bvh, &ray_hit, &args);
@@ -625,7 +727,15 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
 #  if EMBREE_MAJOR_VERSION >= 4
  CCLLocalContext ctx;
  rtcInitRayQueryContext(&ctx);
+#    ifdef __KERNEL_ONEAPI__
+  /* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
+   * uses global device allocation (CUDA, Optix, HIP) or passes all needed data
+   * as a class context (Metal, oneAPI). So we need to pass this context here
+   * in order to have an access to it later in Embree filter functions on GPU. */
+  ctx.kg = (KernelGlobals)this;
+#    else
  ctx.kg = kg;
+#    endif
 #  else
  CCLIntersectContext ctx(kg,
                          has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
@@ -646,7 +756,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
 #  if EMBREE_MAJOR_VERSION >= 4
  RTCOccludedArguments args;
  rtcInitOccludedArguments(&args);
-  args.filter = (RTCFilterFunctionN)(kernel_embree_filter_occluded_local_func);
+  args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_occluded_local_func);
  args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
  args.context = &ctx;
 #  endif
@@ -692,7 +802,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,

 #ifdef __SHADOW_RECORD_ALL__
 ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
-                                                             IntegratorShadowStateCPU *state,
+                                                             IntegratorShadowState state,
                                                             ccl_private const Ray *ray,
                                                             uint visibility,
                                                             uint max_hits,
@@ -702,7 +812,15 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
 #  if EMBREE_MAJOR_VERSION >= 4
  CCLShadowContext ctx;
  rtcInitRayQueryContext(&ctx);
+#    ifdef __KERNEL_ONEAPI__
+  /* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
+   * uses global device allocation (CUDA, Optix, HIP) or passes all needed data
+   * as a class context (Metal, oneAPI). So we need to pass this context here
+   * in order to have an access to it later in Embree filter functions on GPU. */
+  ctx.kg = (KernelGlobals)this;
+#    else
  ctx.kg = kg;
+#    endif
 #  else
  CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
  rtcInitIntersectContext(&ctx);
@@ -718,7 +836,8 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
 #  if EMBREE_MAJOR_VERSION >= 4
  RTCOccludedArguments args;
  rtcInitOccludedArguments(&args);
-  args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_shadow_all_func;
+  args.filter = reinterpret_cast<RTCFilterFunctionN>(
+      kernel_embree_filter_occluded_shadow_all_func);
  args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
  args.context = &ctx;
  rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
@@ -736,19 +855,31 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
 ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
                                                         ccl_private const Ray *ray,
                                                         ccl_private Intersection *isect,
+#  ifdef __VOLUME_RECORD_ALL__
                                                         const uint max_hits,
+#  endif
                                                         const uint visibility)
 {
 #  if EMBREE_MAJOR_VERSION >= 4
  CCLVolumeContext ctx;
  rtcInitRayQueryContext(&ctx);
+#    ifdef __KERNEL_ONEAPI__
+  /* NOTE(sirgienko) Cycles GPU back-ends passes NULL to KernelGlobals and
+   * uses global device allocation (CUDA, Optix, HIP) or passes all needed data
+   * as a class context (Metal, oneAPI). So we need to pass this context here
+   * in order to have an access to it later in Embree filter functions on GPU. */
+  ctx.kg = (KernelGlobals)this;
+#    else
  ctx.kg = kg;
+#    endif
 #  else
  CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
  rtcInitIntersectContext(&ctx);
 #  endif
  ctx.vol_isect = isect;
+#  ifdef __VOLUME_RECORD_ALL__
  ctx.max_hits = numhit_t(max_hits);
+#  endif
  ctx.num_hits = numhit_t(0);
  ctx.ray = ray;
  RTCRay rtc_ray;
@@ -756,7 +887,8 @@ ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
 #  if EMBREE_MAJOR_VERSION >= 4
  RTCOccludedArguments args;
  rtcInitOccludedArguments(&args);
-  args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_volume_all_func;
+  args.filter = reinterpret_cast<RTCFilterFunctionN>(
+      kernel_embree_filter_occluded_volume_all_func);
  args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
  args.context = &ctx;
  rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -128,6 +128,12 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
 }
 ccl_gpu_kernel_postfix

+/* Intersection kernels need access to the kernel handler for specialization constants to work
+ * properly. */
+#ifdef __KERNEL_ONEAPI__
+#  include "kernel/device/oneapi/context_intersect_begin.h"
+#endif
+
 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
    ccl_gpu_kernel_signature(integrator_intersect_closest,
                             ccl_global const int *path_index_array,
@@ -185,6 +191,10 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
 }
 ccl_gpu_kernel_postfix

+#ifdef __KERNEL_ONEAPI__
+#  include "kernel/device/oneapi/context_intersect_end.h"
+#endif
+
 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
    ccl_gpu_kernel_signature(integrator_shade_background,
                             ccl_global const int *path_index_array,
@@ -249,6 +259,12 @@ ccl_gpu_kernel_postfix
 constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]];
 #endif

+/* Kernels using intersections need access to the kernel handler for specialization constants to
+ * work properly. */
+#ifdef __KERNEL_ONEAPI__
+#  include "kernel/device/oneapi/context_intersect_begin.h"
+#endif
+
 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
    ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
                             ccl_global const int *path_index_array,
@@ -287,6 +303,9 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
  }
 }
 ccl_gpu_kernel_postfix
+#ifdef __KERNEL_ONEAPI__
+#  include "kernel/device/oneapi/context_intersect_end.h"
+#endif

 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
    ccl_gpu_kernel_signature(integrator_shade_volume,
--- a/intern/cycles/kernel/device/oneapi/compat.h
+++ b/intern/cycles/kernel/device/oneapi/compat.h
@@ -5,6 +5,11 @@

 #define __KERNEL_GPU__
 #define __KERNEL_ONEAPI__
+#define __KERNEL_64_BIT__
+
+#ifdef WITH_EMBREE_GPU
+#  define __KERNEL_GPU_RAYTRACING__
+#endif

 #define CCL_NAMESPACE_BEGIN
 #define CCL_NAMESPACE_END
@@ -57,17 +62,19 @@
 #define ccl_gpu_kernel_threads(block_num_threads)

 #ifndef WITH_ONEAPI_SYCL_HOST_TASK
-#  define ccl_gpu_kernel_signature(name, ...) \
+#  define __ccl_gpu_kernel_signature(name, ...) \
 void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
                          size_t kernel_global_size, \
                          size_t kernel_local_size, \
                          sycl::handler &cgh, \
                          __VA_ARGS__) { \
      (kg); \
-      cgh.parallel_for<class kernel_##name>( \
+      cgh.parallel_for( \
          sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
          [=](sycl::nd_item<1> item) {

+#  define ccl_gpu_kernel_signature __ccl_gpu_kernel_signature
+
 #  define ccl_gpu_kernel_postfix \
          }); \
    }
--- a/intern/cycles/kernel/device/oneapi/context_intersect_begin.h
+++ b/intern/cycles/kernel/device/oneapi/context_intersect_begin.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2023 Intel Corporation */
+
+#if !defined(WITH_ONEAPI_SYCL_HOST_TASK) && defined(WITH_EMBREE_GPU)
+#  undef ccl_gpu_kernel_signature
+#  define ccl_gpu_kernel_signature(name, ...) \
+    void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
+                              size_t kernel_global_size, \
+                              size_t kernel_local_size, \
+                              sycl::handler &cgh, \
+                              __VA_ARGS__) \
+    { \
+      (kg); \
+      cgh.parallel_for( \
+          sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
+          [=](sycl::nd_item<1> item, sycl::kernel_handler oneapi_kernel_handler) { \
+            ((ONEAPIKernelContext*)kg)->kernel_handler = oneapi_kernel_handler;
+#endif
--- a/intern/cycles/kernel/device/oneapi/context_intersect_end.h
+++ b/intern/cycles/kernel/device/oneapi/context_intersect_end.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2023 Intel Corporation */
+
+#if !defined(WITH_ONEAPI_SYCL_HOST_TASK) && defined(WITH_EMBREE_GPU)
+#  undef ccl_gpu_kernel_signature
+#  define ccl_gpu_kernel_signature __ccl_gpu_kernel_signature
+#endif
--- a/intern/cycles/kernel/device/oneapi/globals.h
+++ b/intern/cycles/kernel/device/oneapi/globals.h
@@ -31,6 +31,8 @@ typedef struct KernelGlobalsGPU {
  size_t nd_item_group_range_0;
  size_t nd_item_global_id_0;
  size_t nd_item_global_range_0;
+#else
+  sycl::kernel_handler kernel_handler;
 #endif
 } KernelGlobalsGPU;

--- a/intern/cycles/kernel/device/oneapi/kernel.cpp
+++ b/intern/cycles/kernel/device/oneapi/kernel.cpp
@@ -16,9 +16,22 @@

 #  include "kernel/device/gpu/kernel.h"

+#  include "device/kernel.cpp"
+
 static OneAPIErrorCallback s_error_cb = nullptr;
 static void *s_error_user_ptr = nullptr;

+#  ifdef WITH_EMBREE_GPU
+static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_BASIC_FEATURES =
+    (const RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE |
+                            RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS |
+                            RTC_FEATURE_FLAG_POINT | RTC_FEATURE_FLAG_MOTION_BLUR);
+static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_ALL_FEATURES =
+    (const RTCFeatureFlags)(CYCLES_ONEAPI_EMBREE_BASIC_FEATURES |
+                            RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE |
+                            RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE);
+#  endif
+
 void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr)
 {
  s_error_cb = cb;
@@ -142,15 +155,99 @@ size_t oneapi_kernel_preferred_local_size(SyclQueue *queue,
  return std::min(limit_work_group_size, preferred_work_group_size);
 }

-bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
+bool oneapi_kernel_is_required_for_features(const std::string &kernel_name,
+                                            const uint kernel_features)
+{
+  if ((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0 &&
+      kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE)) !=
+          std::string::npos)
+    return false;
+  if ((kernel_features & KERNEL_FEATURE_MNEE) == 0 &&
+      kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE)) !=
+          std::string::npos)
+    return false;
+  if ((kernel_features & KERNEL_FEATURE_VOLUME) == 0 &&
+      kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK)) !=
+          std::string::npos)
+    return false;
+
+  return true;
+}
+
+bool oneapi_kernel_is_raytrace_or_mnee(const std::string &kernel_name)
+{
+  return (kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE)) !=
+          std::string::npos) ||
+         (kernel_name.find(device_kernel_as_string(
+              DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE)) != std::string::npos);
+}
+
+bool oneapi_kernel_is_using_embree(const std::string &kernel_name)
+{
+#  ifdef WITH_EMBREE_GPU
+  /* MNEE and Ray-trace kernels aren't yet enabled to use Embree. */
+  for (int i = 0; i < (int)DEVICE_KERNEL_NUM; i++) {
+    DeviceKernel kernel = (DeviceKernel)i;
+    if (device_kernel_has_intersection(kernel)) {
+      if (kernel_name.find(device_kernel_as_string(kernel)) != std::string::npos) {
+        return !oneapi_kernel_is_raytrace_or_mnee(kernel_name);
+      }
+    }
+  }
+#  endif
+  return false;
+}
+
+bool oneapi_load_kernels(SyclQueue *queue_,
+                         const uint kernel_features,
+                         bool use_hardware_raytracing)
 {
-#  ifdef SYCL_SKIP_KERNELS_PRELOAD
-  (void)queue_;
-  (void)requested_features;
-#  else
  assert(queue_);
  sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);

+#  ifdef WITH_EMBREE_GPU
+  /* For best performance, we always JIT compile the kernels that are using Embree. */
+  if (use_hardware_raytracing) {
+    try {
+      sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
+          sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
+                                                             {queue->get_device()});
+
+      for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
+        const std::string &kernel_name = kernel_id.get_name();
+
+        if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
+            !oneapi_kernel_is_using_embree(kernel_name)) {
+          continue;
+        }
+
+        sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
+            sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
+
+        /* Hair requires embree curves support. */
+        if (kernel_features & KERNEL_FEATURE_HAIR) {
+          one_kernel_bundle_input
+              .set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
+                  CYCLES_ONEAPI_EMBREE_ALL_FEATURES);
+          sycl::build(one_kernel_bundle_input);
+        }
+        else {
+          one_kernel_bundle_input
+              .set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
+                  CYCLES_ONEAPI_EMBREE_BASIC_FEATURES);
+          sycl::build(one_kernel_bundle_input);
+        }
+      }
+    }
+    catch (sycl::exception const &e) {
+      if (s_error_cb) {
+        s_error_cb(e.what(), s_error_user_ptr);
+      }
+      return false;
+    }
+  }
+#  endif
+
  try {
    sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
        sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
@@ -159,27 +256,29 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
    for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
      const std::string &kernel_name = kernel_id.get_name();

-      /* NOTE(@nsirgien): Names in this conditions below should match names from
-       * oneapi_call macro in oneapi_enqueue_kernel below */
-      if (((requested_features & KERNEL_FEATURE_VOLUME) == 0) &&
-          kernel_name.find("oneapi_kernel_integrator_shade_volume") != std::string::npos) {
+      /* In case HWRT is on, compilation of kernels using Embree is already handled in previous
+       * block. */
+      if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
+          (use_hardware_raytracing && oneapi_kernel_is_using_embree(kernel_name))) {
        continue;
      }

-      if (((requested_features & KERNEL_FEATURE_MNEE) == 0) &&
-          kernel_name.find("oneapi_kernel_integrator_shade_surface_mnee") != std::string::npos) {
+#  ifdef WITH_EMBREE_GPU
+      if (oneapi_kernel_is_using_embree(kernel_name) ||
+          oneapi_kernel_is_raytrace_or_mnee(kernel_name)) {
+        sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
+            sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
+        one_kernel_bundle_input
+            .set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
+                RTC_FEATURE_FLAG_NONE);
+        sycl::build(one_kernel_bundle_input);
        continue;
      }
-
-      if (((requested_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0) &&
-          kernel_name.find("oneapi_kernel_integrator_shade_surface_raytrace") !=
-              std::string::npos) {
-        continue;
-      }
-
-      sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle =
-          sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
-      sycl::build(one_kernel_bundle);
+#  endif
+      /* This call will ensure that AoT or cached JIT binaries are available
+       * for execution. It will trigger compilation if it is not already the case. */
+      (void)sycl::get_kernel_bundle<sycl::bundle_state::executable>(queue->get_context(),
+                                                                    {kernel_id});
    }
  }
  catch (sycl::exception const &e) {
@@ -188,13 +287,14 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
    }
    return false;
  }
-#  endif
  return true;
 }

 bool oneapi_enqueue_kernel(KernelContext *kernel_context,
                           int kernel,
                           size_t global_size,
+                           const uint kernel_features,
+                           bool use_hardware_raytracing,
                           void **args)
 {
  bool success = true;
@@ -248,6 +348,21 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,

  try {
    queue->submit([&](sycl::handler &cgh) {
+#  ifdef WITH_EMBREE_GPU
+      /* Spec says it has no effect if the called kernel doesn't support the below specialization
+       * constant but it can still trigger a recompilation, so we set it only if needed. */
+      if (device_kernel_has_intersection(device_kernel)) {
+        const RTCFeatureFlags used_embree_features = !use_hardware_raytracing ?
+                                                         RTC_FEATURE_FLAG_NONE :
+                                                     !(kernel_features & KERNEL_FEATURE_HAIR) ?
+                                                         CYCLES_ONEAPI_EMBREE_BASIC_FEATURES :
+                                                         CYCLES_ONEAPI_EMBREE_ALL_FEATURES;
+        cgh.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
+            used_embree_features);
+      }
+#  else
+      (void)kernel_features;
+#  endif
      switch (device_kernel) {
        case DEVICE_KERNEL_INTEGRATOR_RESET: {
          oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_reset);
@@ -549,4 +664,5 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
 #  endif
  return success;
 }
+
 #endif /* WITH_ONEAPI */
--- a/intern/cycles/kernel/device/oneapi/kernel.h
+++ b/intern/cycles/kernel/device/oneapi/kernel.h
@@ -47,10 +47,14 @@ CYCLES_KERNEL_ONEAPI_EXPORT size_t oneapi_kernel_preferred_local_size(
 CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_enqueue_kernel(KernelContext *context,
                                                       int kernel,
                                                       size_t global_size,
+                                                       const unsigned int kernel_features,
+                                                       bool use_hardware_raytracing,
                                                       void **args);
 CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_load_kernels(SyclQueue *queue,
-                                                     const unsigned int requested_features);
+                                                     const unsigned int kernel_features,
+                                                     bool use_hardware_raytracing);
 #  ifdef __cplusplus
 }
+
 #  endif
 #endif /* WITH_ONEAPI */
--- a/intern/cycles/kernel/geom/motion_triangle.h
+++ b/intern/cycles/kernel/geom/motion_triangle.h
@@ -74,57 +74,43 @@ ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals kg,
  }
 }

-ccl_device_inline void motion_triangle_vertices(
-    KernelGlobals kg, int object, int prim, float time, float3 verts[3])
+ccl_device_inline void motion_triangle_compute_info(KernelGlobals kg,
+                                                    int object,
+                                                    float time,
+                                                    int prim,
+                                                    ccl_private uint3 *tri_vindex,
+                                                    ccl_private int *numsteps,
+                                                    ccl_private int *numverts,
+                                                    ccl_private int *step,
+                                                    ccl_private float *t)
 {
-  /* get motion info */
-  int numsteps, numverts;
-  object_motion_info(kg, object, &numsteps, &numverts, NULL);
-
-  /* figure out which steps we need to fetch and their interpolation factor */
-  int maxstep = numsteps * 2;
-  int step = min((int)(time * maxstep), maxstep - 1);
-  float t = time * maxstep - step;
-
-  /* find attribute */
-  int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION);
-  kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
-  /* fetch vertex coordinates */
-  float3 next_verts[3];
-
-  uint3 tri_vindex = kernel_data_fetch(tri_vindex, prim);
-
-  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
-  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts);
-
-  /* interpolate between steps */
-  verts[0] = (1.0f - t) * verts[0] + t * next_verts[0];
-  verts[1] = (1.0f - t) * verts[1] + t * next_verts[1];
-  verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
-}
-
-ccl_device_inline void motion_triangle_vertices_and_normals(
-    KernelGlobals kg, int object, int prim, float time, float3 verts[3], float3 normals[3])
-{
-  /* get motion info */
-  int numsteps, numverts;
-  object_motion_info(kg, object, &numsteps, &numverts, NULL);
+  /* Get object motion info. */
+  object_motion_info(kg, object, numsteps, numverts, NULL);

  /* Figure out which steps we need to fetch and their interpolation factor. */
-  int maxstep = numsteps * 2;
-  int step = min((int)(time * maxstep), maxstep - 1);
-  float t = time * maxstep - step;
+  int maxstep = *numsteps * 2;
+  *step = min((int)(time * maxstep), maxstep - 1);
+  *t = time * maxstep - *step;

+  /* Get triangle indices. */
+  *tri_vindex = kernel_data_fetch(tri_vindex, prim);
+}
+
+ccl_device_inline void motion_triangle_vertices(KernelGlobals kg,
+                                                int object,
+                                                uint3 tri_vindex,
+                                                int numsteps,
+                                                int numverts,
+                                                int step,
+                                                float t,
+                                                float3 verts[3])
+{
  /* Find attribute. */
  int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION);
  kernel_assert(offset != ATTR_STD_NOT_FOUND);

  /* Fetch vertex coordinates. */
  float3 next_verts[3];
-
-  uint3 tri_vindex = kernel_data_fetch(tri_vindex, prim);
-
  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts);

@@ -132,60 +118,90 @@ ccl_device_inline void motion_triangle_vertices_and_normals(
  verts[0] = (1.0f - t) * verts[0] + t * next_verts[0];
  verts[1] = (1.0f - t) * verts[1] + t * next_verts[1];
  verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
+}

-  /* Compute smooth normal. */
+ccl_device_inline void motion_triangle_vertices(
+    KernelGlobals kg, int object, int prim, float time, float3 verts[3])
+{
+  int numsteps, numverts, step;
+  float t;
+  uint3 tri_vindex;
+  motion_triangle_compute_info(
+      kg, object, time, prim, &tri_vindex, &numsteps, &numverts, &step, &t);

+  motion_triangle_vertices(kg, object, tri_vindex, numsteps, numverts, step, t, verts);
+}
+
+ccl_device_inline void motion_triangle_normals(KernelGlobals kg,
+                                               int object,
+                                               uint3 tri_vindex,
+                                               int numsteps,
+                                               int numverts,
+                                               int step,
+                                               float t,
+                                               float3 normals[3])
+{
  /* Find attribute. */
-  offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL);
+  int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL);
  kernel_assert(offset != ATTR_STD_NOT_FOUND);

-  /* Fetch vertex coordinates. */
+  /* Fetch normals. */
  float3 next_normals[3];
  motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
  motion_triangle_normals_for_step(
      kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals);

  /* Interpolate between steps. */
-  normals[0] = (1.0f - t) * normals[0] + t * next_normals[0];
-  normals[1] = (1.0f - t) * normals[1] + t * next_normals[1];
-  normals[2] = (1.0f - t) * normals[2] + t * next_normals[2];
+  normals[0] = normalize((1.0f - t) * normals[0] + t * next_normals[0]);
+  normals[1] = normalize((1.0f - t) * normals[1] + t * next_normals[1]);
+  normals[2] = normalize((1.0f - t) * normals[2] + t * next_normals[2]);
+}
+
+ccl_device_inline void motion_triangle_vertices_and_normals(
+    KernelGlobals kg, int object, int prim, float time, float3 verts[3], float3 normals[3])
+{
+  int numsteps, numverts, step;
+  float t;
+  uint3 tri_vindex;
+  motion_triangle_compute_info(
+      kg, object, time, prim, &tri_vindex, &numsteps, &numverts, &step, &t);
+
+  motion_triangle_vertices(kg, object, tri_vindex, numsteps, numverts, step, t, verts);
+  motion_triangle_normals(kg, object, tri_vindex, numsteps, numverts, step, t, normals);
+}
+
+ccl_device_inline float3 motion_triangle_smooth_normal(KernelGlobals kg,
+                                                       float3 Ng,
+                                                       int object,
+                                                       uint3 tri_vindex,
+                                                       int numsteps,
+                                                       int numverts,
+                                                       int step,
+                                                       float t,
+                                                       float u,
+                                                       float v)
+{
+  float3 normals[3];
+  motion_triangle_normals(kg, object, tri_vindex, numsteps, numverts, step, t, normals);
+
+  /* Interpolate between normals. */
+  float w = 1.0f - u - v;
+  float3 N = safe_normalize(w * normals[0] + u * normals[1] + v * normals[2]);
+
+  return is_zero(N) ? Ng : N;
 }

 ccl_device_inline float3 motion_triangle_smooth_normal(
    KernelGlobals kg, float3 Ng, int object, int prim, float u, float v, float time)
 {
-  /* get motion info */
-  int numsteps, numverts;
-  object_motion_info(kg, object, &numsteps, &numverts, NULL);
+  int numsteps, numverts, step;
+  float t;
+  uint3 tri_vindex;
+  motion_triangle_compute_info(
+      kg, object, time, prim, &tri_vindex, &numsteps, &numverts, &step, &t);

-  /* figure out which steps we need to fetch and their interpolation factor */
-  int maxstep = numsteps * 2;
-  int step = min((int)(time * maxstep), maxstep - 1);
-  float t = time * maxstep - step;
-
-  /* find attribute */
-  int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL);
-  kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
-  /* fetch normals */
-  float3 normals[3], next_normals[3];
-
-  uint3 tri_vindex = kernel_data_fetch(tri_vindex, prim);
-
-  motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
-  motion_triangle_normals_for_step(
-      kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals);
-
-  /* interpolate between steps */
-  normals[0] = (1.0f - t) * normals[0] + t * next_normals[0];
-  normals[1] = (1.0f - t) * normals[1] + t * next_normals[1];
-  normals[2] = (1.0f - t) * normals[2] + t * next_normals[2];
-
-  /* interpolate between vertices */
-  float w = 1.0f - u - v;
-  float3 N = safe_normalize(u * normals[0] + v * normals[1] + w * normals[2]);
-
-  return is_zero(N) ? Ng : N;
+  return motion_triangle_smooth_normal(
+      kg, Ng, object, tri_vindex, numsteps, numverts, step, t, u, v);
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/geom/motion_triangle_shader.h
+++ b/intern/cycles/kernel/geom/motion_triangle_shader.h
@@ -32,30 +32,17 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals kg,
 {
  /* Get shader. */
  sd->shader = kernel_data_fetch(tri_shader, sd->prim);
-  /* Get motion info. */
-  /* TODO(sergey): This logic is really similar to motion_triangle_vertices(),
-   * can we de-duplicate something here?
-   */
-  int numsteps, numverts;
-  object_motion_info(kg, sd->object, &numsteps, &numverts, NULL);
-  /* Figure out which steps we need to fetch and their interpolation factor. */
-  int maxstep = numsteps * 2;
-  int step = min((int)(sd->time * maxstep), maxstep - 1);
-  float t = sd->time * maxstep - step;
-  /* Find attribute. */
-  int offset = intersection_find_attribute(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION);
-  kernel_assert(offset != ATTR_STD_NOT_FOUND);
-  /* Fetch vertex coordinates. */
-  float3 verts[3], next_verts[3];

-  uint3 tri_vindex = kernel_data_fetch(tri_vindex, sd->prim);
+  /* Compute motion info. */
+  int numsteps, numverts, step;
+  float t;
+  uint3 tri_vindex;
+  motion_triangle_compute_info(
+      kg, sd->object, sd->time, sd->prim, &tri_vindex, &numsteps, &numverts, &step, &t);
+
+  float3 verts[3];
+  motion_triangle_vertices(kg, sd->object, tri_vindex, numsteps, numverts, step, t, verts);

-  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
-  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts);
-  /* Interpolate between steps. */
-  verts[0] = (1.0f - t) * verts[0] + t * next_verts[0];
-  verts[1] = (1.0f - t) * verts[1] + t * next_verts[1];
-  verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
  /* Compute refined position. */
  sd->P = motion_triangle_point_from_uv(kg, sd, isect_object, isect_prim, sd->u, sd->v, verts);
  /* Compute face normal. */
@@ -75,23 +62,8 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals kg,
 #endif
  /* Compute smooth normal. */
  if (sd->shader & SHADER_SMOOTH_NORMAL) {
-    /* Find attribute. */
-    int offset = intersection_find_attribute(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL);
-    kernel_assert(offset != ATTR_STD_NOT_FOUND);
-    /* Fetch vertex coordinates. */
-    float3 normals[3], next_normals[3];
-    motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
-    motion_triangle_normals_for_step(
-        kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals);
-    /* Interpolate between steps. */
-    normals[0] = (1.0f - t) * normals[0] + t * next_normals[0];
-    normals[1] = (1.0f - t) * normals[1] + t * next_normals[1];
-    normals[2] = (1.0f - t) * normals[2] + t * next_normals[2];
-    /* Interpolate between vertices. */
-    float u = sd->u;
-    float v = sd->v;
-    float w = 1.0f - u - v;
-    sd->N = (w * normals[0] + u * normals[1] + v * normals[2]);
+    sd->N = motion_triangle_smooth_normal(
+        kg, Ng, sd->object, tri_vindex, numsteps, numverts, step, t, sd->u, sd->v);
  }
 }

--- a/Show More
+++ b/Show More