2023-04-20 07:55:37 +02:00
226 changed files with 2627 additions and 976 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1580,6 +1580,8 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
  add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_MISSING_NORETURN -Wno-missing-noreturn)
  add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_UNUSED_BUT_SET_VARIABLE -Wno-unused-but-set-variable)
  add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_DEPRECATED_DECLARATIONS -Wno-deprecated-declarations)
+  add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_STRICT_PROTOTYPES -Wno-strict-prototypes)
+  add_check_c_compiler_flag(C_REMOVE_STRICT_FLAGS C_WARN_NO_BITWISE_INSTEAD_OF_LOGICAL -Wno-bitwise-instead-of-logical)

  add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNUSED_PARAMETER -Wno-unused-parameter)
  add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNUSED_PRIVATE_FIELD -Wno-unused-private-field)
@ -1593,6 +1595,7 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
  add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_UNDEFINED_VAR_TEMPLATE -Wno-undefined-var-template)
  add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_INSTANTIATION_AFTER_SPECIALIZATION -Wno-instantiation-after-specialization)
  add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_MISLEADING_INDENTATION    -Wno-misleading-indentation)
+  add_check_cxx_compiler_flag(CXX_REMOVE_STRICT_FLAGS CXX_WARN_NO_BITWISE_INSTEAD_OF_LOGICAL -Wno-bitwise-instead-of-logical)

 elseif(CMAKE_C_COMPILER_ID MATCHES "Intel")

--- a/build_files/build_environment/CMakeLists.txt
+++ b/build_files/build_environment/CMakeLists.txt
@ -90,28 +90,26 @@ include(cmake/haru.cmake)
 # Boost needs to be included after `python.cmake` due to the PYTHON_BINARY variable being needed.
 include(cmake/boost.cmake)
 include(cmake/pugixml.cmake)
-include(cmake/ispc.cmake)
-include(cmake/openimagedenoise.cmake)
-include(cmake/embree.cmake)
-include(cmake/openpgl.cmake)
-include(cmake/fmt.cmake)
-include(cmake/robinmap.cmake)
-include(cmake/xml2.cmake)
-
 include(cmake/fribidi.cmake)
 include(cmake/harfbuzz.cmake)
 if(NOT APPLE)
  include(cmake/xr_openxr.cmake)
-  if(NOT WIN32 OR BUILD_MODE STREQUAL Release)
-    include(cmake/dpcpp.cmake)
-    include(cmake/dpcpp_deps.cmake)
-  endif()
+  include(cmake/dpcpp.cmake)
+  include(cmake/dpcpp_deps.cmake)
  if(NOT WIN32)
    include(cmake/igc.cmake)
    include(cmake/gmmlib.cmake)
    include(cmake/ocloc.cmake)
  endif()
 endif()
+include(cmake/ispc.cmake)
+include(cmake/openimagedenoise.cmake)
+# Embree needs to be included after dpcpp as it uses it for compiling with GPU support
+include(cmake/embree.cmake)
+include(cmake/openpgl.cmake)
+include(cmake/fmt.cmake)
+include(cmake/robinmap.cmake)
+include(cmake/xml2.cmake)

 # OpenColorIO and dependencies.
 include(cmake/expat.cmake)
--- a/build_files/build_environment/cmake/download.cmake
+++ b/build_files/build_environment/cmake/download.cmake
@ -156,6 +156,7 @@ download_source(OPENCLHEADERS)
 download_source(ICDLOADER)
 download_source(MP11)
 download_source(SPIRV_HEADERS)
+download_source(UNIFIED_RUNTIME)
 download_source(IGC)
 download_source(IGC_LLVM)
 download_source(IGC_OPENCL_CLANG)
--- a/build_files/build_environment/cmake/dpcpp.cmake
+++ b/build_files/build_environment/cmake/dpcpp.cmake
@ -5,6 +5,9 @@
 # for now.
 string(REPLACE "-DCMAKE_CXX_STANDARD=17" " " DPCPP_CMAKE_FLAGS "${DEFAULT_CMAKE_FLAGS}")

+# DPCPP already generates debug libs, there isn't much point in compiling it in debug mode itself.
+string(REPLACE "-DCMAKE_BUILD_TYPE=Debug" "-DCMAKE_BUILD_TYPE=Release" DPCPP_CMAKE_FLAGS "${DPCPP_CMAKE_FLAGS}")
+
 if(WIN32)
  set(LLVM_GENERATOR "Ninja")
 else()
@ -38,17 +41,18 @@ set(DPCPP_EXTRA_ARGS
  -DLEVEL_ZERO_LIBRARY=${LIBDIR}/level-zero/lib/${LIBPREFIX}ze_loader${SHAREDLIBEXT}
  -DLEVEL_ZERO_INCLUDE_DIR=${LIBDIR}/level-zero/include
  -DLLVM_EXTERNAL_SPIRV_HEADERS_SOURCE_DIR=${BUILD_DIR}/spirvheaders/src/external_spirvheaders/
+  -DUNIFIED_RUNTIME_SOURCE_DIR=${BUILD_DIR}/unifiedruntime/src/external_unifiedruntime/
  # Below here is copied from an invocation of buildbot/config.py
  -DLLVM_ENABLE_ASSERTIONS=ON
  -DLLVM_TARGETS_TO_BUILD=X86
-  -DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
+  -DLLVM_EXTERNAL_PROJECTS=sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw^^lld
  -DLLVM_EXTERNAL_SYCL_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/sycl
  -DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/llvm-spirv
  -DLLVM_EXTERNAL_XPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
  -DXPTI_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xpti
  -DLLVM_EXTERNAL_XPTIFW_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/xptifw
  -DLLVM_EXTERNAL_LIBDEVICE_SOURCE_DIR=${DPCPP_SOURCE_ROOT}/libdevice
-  -DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw
+  -DLLVM_ENABLE_PROJECTS=clang^^sycl^^llvm-spirv^^opencl^^libdevice^^xpti^^xptifw^^lld
  -DLIBCLC_TARGETS_TO_BUILD=
  -DLIBCLC_GENERATE_REMANGLED_VARIANTS=OFF
  -DSYCL_BUILD_PI_HIP_PLATFORM=AMD
@ -104,13 +108,13 @@ add_dependencies(
  external_mp11
  external_level-zero
  external_spirvheaders
+  external_unifiedruntime
 )

 if(BUILD_MODE STREQUAL Release AND WIN32)
  ExternalProject_Add_Step(external_dpcpp after_install
      COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cl.exe
      COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang-cpp.exe
-      COMMAND ${CMAKE_COMMAND} -E rm -f ${LIBDIR}/dpcpp/bin/clang.exe
      COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/dpcpp ${HARVEST_TARGET}/dpcpp
  )
 endif()
--- a/build_files/build_environment/cmake/dpcpp_deps.cmake
+++ b/build_files/build_environment/cmake/dpcpp_deps.cmake
@ -59,3 +59,13 @@ ExternalProject_Add(external_spirvheaders
  BUILD_COMMAND echo .
  INSTALL_COMMAND echo .
 )
+
+ExternalProject_Add(external_unifiedruntime
+  URL file://${PACKAGE_DIR}/${UNIFIED_RUNTIME_FILE}
+  URL_HASH ${UNIFIED_RUNTIME_HASH_TYPE}=${UNIFIED_RUNTIME_HASH}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  PREFIX ${BUILD_DIR}/unifiedruntime
+  CONFIGURE_COMMAND echo .
+  BUILD_COMMAND echo .
+  INSTALL_COMMAND echo .
+)
--- a/build_files/build_environment/cmake/embree.cmake
+++ b/build_files/build_environment/cmake/embree.cmake
@ -3,6 +3,8 @@
 # Note the utility apps may use png/tiff/gif system libraries, but the
 # library itself does not depend on them, so should give no problems.

+set(EMBREE_CMAKE_FLAGS ${DEFAULT_CMAKE_FLAGS})
+
 set(EMBREE_EXTRA_ARGS
  -DEMBREE_ISPC_SUPPORT=OFF
  -DEMBREE_TUTORIALS=OFF
@ -31,6 +33,43 @@ if(NOT BLENDER_PLATFORM_ARM)
  )
 endif()

+if(NOT APPLE)
+  if(WIN32)
+    # Levels below -O2 don't work well for Embree+SYCL.
+    string(REGEX REPLACE "-O[A-Za-z0-9]" "" EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG ${BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG})
+    string(APPEND EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG " -O2")
+    string(REGEX REPLACE "-O[A-Za-z0-9]" "" EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG ${BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG})
+    string(APPEND EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG " -O2")
+    set(EMBREE_CMAKE_FLAGS
+      -DCMAKE_BUILD_TYPE=${BUILD_MODE}
+      -DCMAKE_CXX_FLAGS_RELEASE=${BLENDER_CLANG_CMAKE_CXX_FLAGS_RELEASE}
+      -DCMAKE_CXX_FLAGS_MINSIZEREL=${BLENDER_CLANG_CMAKE_CXX_FLAGS_MINSIZEREL}
+      -DCMAKE_CXX_FLAGS_RELWITHDEBINFO=${BLENDER_CLANG_CMAKE_CXX_FLAGS_RELWITHDEBINFO}
+      -DCMAKE_CXX_FLAGS_DEBUG=${EMBREE_CLANG_CMAKE_CXX_FLAGS_DEBUG}
+      -DCMAKE_C_FLAGS_RELEASE=${BLENDER_CLANG_CMAKE_C_FLAGS_RELEASE}
+      -DCMAKE_C_FLAGS_MINSIZEREL=${BLENDER_CLANG_CMAKE_C_FLAGS_MINSIZEREL}
+      -DCMAKE_C_FLAGS_RELWITHDEBINFO=${BLENDER_CLANG_CMAKE_C_FLAGS_RELWITHDEBINFO}
+      -DCMAKE_C_FLAGS_DEBUG=${EMBREE_CLANG_CMAKE_C_FLAGS_DEBUG}
+      -DCMAKE_CXX_STANDARD=17
+    )
+    set(EMBREE_EXTRA_ARGS
+      -DCMAKE_CXX_COMPILER=${LIBDIR}/dpcpp/bin/clang++.exe
+      -DCMAKE_C_COMPILER=${LIBDIR}/dpcpp/bin/clang.exe
+      -DCMAKE_SHARED_LINKER_FLAGS=-L"${LIBDIR}/dpcpp/lib"
+      -DEMBREE_SYCL_SUPPORT=ON
+      ${EMBREE_EXTRA_ARGS}
+    )
+  else()
+    set(EMBREE_EXTRA_ARGS
+      -DCMAKE_CXX_COMPILER=${LIBDIR}/dpcpp/bin/clang++
+      -DCMAKE_C_COMPILER=${LIBDIR}/dpcpp/bin/clang
+      -DCMAKE_SHARED_LINKER_FLAGS=-L"${LIBDIR}/dpcpp/lib"
+      -DEMBREE_SYCL_SUPPORT=ON
+      ${EMBREE_EXTRA_ARGS}
+    )
+  endif()
+endif()
+
 if(TBB_STATIC_LIBRARY)
  set(EMBREE_EXTRA_ARGS
    ${EMBREE_EXTRA_ARGS}
@ -42,16 +81,25 @@ ExternalProject_Add(external_embree
  URL file://${PACKAGE_DIR}/${EMBREE_FILE}
  DOWNLOAD_DIR ${DOWNLOAD_DIR}
  URL_HASH ${EMBREE_HASH_TYPE}=${EMBREE_HASH}
+  CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR}
  PREFIX ${BUILD_DIR}/embree
  PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/embree/src/external_embree < ${PATCH_DIR}/embree.diff
-  CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/embree ${DEFAULT_CMAKE_FLAGS} ${EMBREE_EXTRA_ARGS}
+  CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/embree ${EMBREE_CMAKE_FLAGS} ${EMBREE_EXTRA_ARGS}
  INSTALL_DIR ${LIBDIR}/embree
 )

-add_dependencies(
-  external_embree
-  external_tbb
-)
+if(NOT APPLE)
+  add_dependencies(
+    external_embree
+    external_tbb
+    external_dpcpp
+  )
+else()
+  add_dependencies(
+    external_embree
+    external_tbb
+  )
+endif()

 if(WIN32)
  if(BUILD_MODE STREQUAL Release)
@ -66,6 +114,7 @@ if(WIN32)
    ExternalProject_Add_Step(external_embree after_install
      COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/bin/embree4_d.dll ${HARVEST_TARGET}/embree/bin/embree4_d.dll
      COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_d.lib ${HARVEST_TARGET}/embree/lib/embree4_d.lib
+      COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/embree/lib/embree4_sycl_d.lib ${HARVEST_TARGET}/embree/lib/embree4_sycl_d.lib
      DEPENDEES install
    )
  endif()
--- a/build_files/build_environment/cmake/options.cmake
+++ b/build_files/build_environment/cmake/options.cmake
@ -74,6 +74,27 @@ if(WIN32)
  set(BLENDER_CMAKE_CXX_FLAGS_RELEASE "/MD ${COMMON_MSVC_FLAGS} /D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS /O2 /Ob2 /D NDEBUG /D PLATFORM_WINDOWS /DPSAPI_VERSION=2 /DTINYFORMAT_ALLOW_WCHAR_STRINGS")
  set(BLENDER_CMAKE_CXX_FLAGS_RELWITHDEBINFO "/MD ${COMMON_MSVC_FLAGS} /D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS /Zi /O2 /Ob1 /D NDEBUG /D PLATFORM_WINDOWS /DPSAPI_VERSION=2 /DTINYFORMAT_ALLOW_WCHAR_STRINGS")

+  # Set similar flags for CLANG compilation.
+  set(COMMON_CLANG_FLAGS "-D_DLL -D_MT") # Equivalent to MSVC /MD
+
+  if(WITH_OPTIMIZED_DEBUG)
+    set(BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -O2 -D_DEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
+  else()
+    set(BLENDER_CLANG_CMAKE_C_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -g -D_DEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
+  endif()
+  set(BLENDER_CLANG_CMAKE_C_FLAGS_MINSIZEREL "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -Os -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
+  set(BLENDER_CLANG_CMAKE_C_FLAGS_RELEASE "${COMMON_CLANG_FLAGS}  -Xclang --dependent-lib=msvcrt -O2 -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
+  set(BLENDER_CLANG_CMAKE_C_FLAGS_RELWITHDEBINFO "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -g -O2 -DNDEBUG -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
+
+  if(WITH_OPTIMIZED_DEBUG)
+    set(BLENDER_CLANG_CMAKE_CXX_FLAGS_DEBUG "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrtd -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -D_DEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS -DBOOST_DEBUG_PYTHON -DBOOST_ALL_NO_LIB")
+  else()
+    set(BLENDER_CLANG_CMAKE_CXX_FLAGS_DEBUG "${COMMON_CLANG_FLAG} -Xclang --dependent-lib=msvcrtd -D_DEBUG -DPLATFORM_WINDOWS -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -g -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS -DBOOST_DEBUG_PYTHON -DBOOST_ALL_NO_LIB")
+  endif()
+  set(BLENDER_CLANG_CMAKE_CXX_FLAGS_MINSIZEREL "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -DNDEBUG  -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
+  set(BLENDER_CLANG_CMAKE_CXX_FLAGS_RELEASE "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
+  set(BLENDER_CLANG_CMAKE_CXX_FLAGS_RELWITHDEBINFO "${COMMON_CLANG_FLAGS} -Xclang --dependent-lib=msvcrt -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS -g -O2 -DNDEBUG -DPLATFORM_WINDOWS -DPSAPI_VERSION=2 -DTINYFORMAT_ALLOW_WCHAR_STRINGS")
+
  set(PLATFORM_FLAGS)
  set(PLATFORM_CXX_FLAGS)
  set(PLATFORM_CMAKE_FLAGS)
--- a/build_files/build_environment/cmake/versions.cmake
+++ b/build_files/build_environment/cmake/versions.cmake
@ -599,15 +599,15 @@ set(OPENPGL_HASH db63f5dac5cfa8c110ede241f0c413f00db0c4748697381c4fa23e0f9e82a75
 set(OPENPGL_HASH_TYPE SHA256)
 set(OPENPGL_FILE openpgl-${OPENPGL_VERSION}.tar.gz)

-set(LEVEL_ZERO_VERSION v1.8.5)
+set(LEVEL_ZERO_VERSION v1.8.8)
 set(LEVEL_ZERO_URI https://github.com/oneapi-src/level-zero/archive/refs/tags/${LEVEL_ZERO_VERSION}.tar.gz)
-set(LEVEL_ZERO_HASH b6e9663bbcc53c148d32376998298bec6f7c434ef2218c61fa708963e3a09394)
+set(LEVEL_ZERO_HASH 3553ae8fa0d2d69c4210a8f3428bd6612bd8bb8a627faf52c3658a01851e66d2)
 set(LEVEL_ZERO_HASH_TYPE SHA256)
 set(LEVEL_ZERO_FILE level-zero-${LEVEL_ZERO_VERSION}.tar.gz)

-set(DPCPP_VERSION 20221019)
-set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/sycl-nightly/${DPCPP_VERSION}.tar.gz)
-set(DPCPP_HASH 2f533946e91ce3829431758ea17b0b834b960c1a796e9e4563c86e03eb9603a2)
+set(DPCPP_VERSION 2022-12)
+set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/${DPCPP_VERSION}.tar.gz)
+set(DPCPP_HASH 13151d5ae79f7c9c4a9b072a0c486ae7b3c4993e301bb1268c92214451025790)
 set(DPCPP_HASH_TYPE SHA256)
 set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)

@ -620,9 +620,9 @@ set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
 # will take care of building them, unpack is being done in dpcpp_deps.cmake

 # Source llvm/lib/SYCLLowerIR/CMakeLists.txt
-set(VCINTRINSICS_VERSION abce9184b7a3a7fe1b02289b9285610d9dc45465)
+set(VCINTRINSICS_VERSION 782fbf7301dc73acaa049a4324c976ad94f587f7)
 set(VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/${VCINTRINSICS_VERSION}.tar.gz)
-set(VCINTRINSICS_HASH 3e9fd471246b87633b26f7e15e17ab7733d357458c53d5c5881c03929d6c551f)
+set(VCINTRINSICS_HASH f4c0ccad8c1f77760364c551c65e8e1cf194d058889fa46d3b1b2d19ec4dc33f)
 set(VCINTRINSICS_HASH_TYPE SHA256)
 set(VCINTRINSICS_FILE vc-intrinsics-${VCINTRINSICS_VERSION}.tar.gz)

@ -657,6 +657,13 @@ set(SPIRV_HEADERS_HASH ec8ecb471a62672697846c436501638ab25447ae9d4a6761e0bfe8a9a
 set(SPIRV_HEADERS_HASH_TYPE SHA256)
 set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)

+# Source llvm/sycl/plugins/unified_runtime/CMakeLists.txt
+set(UNIFIED_RUNTIME_VERSION fd711c920acc4434cb52ff18b078c082d9d7f44d)
+set(UNIFIED_RUNTIME_URI https://github.com/oneapi-src/unified-runtime/archive/${UNIFIED_RUNTIME_VERSION}.tar.gz)
+set(UNIFIED_RUNTIME_HASH 535ca2ee78f68c5e7e62b10f1bbabd909179488885566e6d9b1fc50e8a1be65f)
+set(UNIFIED_RUNTIME_HASH_TYPE SHA256)
+set(UNIFIED_RUNTIME_FILE unified-runtime-${UNIFIED_RUNTIME_VERSION}.tar.gz)
+
 ######################
 ### DPCPP DEPS END ###
 ######################
@ -730,9 +737,9 @@ set(GMMLIB_HASH c1f33e1519edfc527127baeb0436b783430dfd256c643130169a3a71dc86aff9
 set(GMMLIB_HASH_TYPE SHA256)
 set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)

-set(OCLOC_VERSION 22.49.25018.21)
+set(OCLOC_VERSION 23.05.25593.18)
 set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
-set(OCLOC_HASH 92362dae08b503a34e5d3820ed284198c452bcd5e7504d90eb69887b20492c06)
+set(OCLOC_HASH 122415028e631922ae999c996954dfd98ce9a32decd564d5484c31476ec9306e)
 set(OCLOC_HASH_TYPE SHA256)
 set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)

--- a/build_files/build_environment/dependencies.dot
+++ b/build_files/build_environment/dependencies.dot
@ -14,6 +14,7 @@ graph[autosize = false, size = "25.7,8.3!", resolution = 300];
 	external_dpcpp -- external_mp11;
 	external_dpcpp -- external_level_zero;
 	external_dpcpp -- external_spirvheaders;
+	external_dpcpp -- external_unifiedruntime;
 	external_embree -- external_tbb;
 	external_ffmpeg -- external_zlib;
 	external_ffmpeg -- external_openjpeg;
--- a/build_files/build_environment/patches/dpcpp.diff
+++ b/build_files/build_environment/patches/dpcpp.diff
@ -34,3 +34,156 @@ diff -Naur llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice
   libsycldevice-obj
   libsycldevice-spv)
 
+diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp
+index 17eeaafae194..09e6d2217aaa 100644
+--- a/sycl/source/detail/program_manager/program_manager.cpp
+++ b/sycl/source/detail/program_manager/program_manager.cpp
+@@ -1647,46 +1647,120 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState(
+   }
+   assert(BinImages.size() > 0 && "Expected to find at least one device image");
+ 
+  // Ignore images with incompatible state. Image is considered compatible
+  // with a target state if an image is already in the target state or can
+  // be brought to target state by compiling/linking/building.
+  //
+  // Example: an image in "executable" state is not compatible with
+  // "input" target state - there is no operation to convert the image it
+  // to "input" state. An image in "input" state is compatible with
+  // "executable" target state because it can be built to get into
+  // "executable" state.
+  for (auto It = BinImages.begin(); It != BinImages.end();) {
+    if (getBinImageState(*It) > TargetState)
+      It = BinImages.erase(It);
+    else
+      ++It;
+  }
+
+   std::vector<device_image_plain> SYCLDeviceImages;
+-  for (RTDeviceBinaryImage *BinImage : BinImages) {
+-    const bundle_state ImgState = getBinImageState(BinImage);
+-
+-    // Ignore images with incompatible state. Image is considered compatible
+-    // with a target state if an image is already in the target state or can
+-    // be brought to target state by compiling/linking/building.
+-    //
+-    // Example: an image in "executable" state is not compatible with
+-    // "input" target state - there is no operation to convert the image it
+-    // to "input" state. An image in "input" state is compatible with
+-    // "executable" target state because it can be built to get into
+-    // "executable" state.
+-    if (ImgState > TargetState)
+-      continue;
+ 
+-    for (const sycl::device &Dev : Devs) {
+  // If a non-input state is requested, we can filter out some compatible
+  // images and return only those with the highest compatible state for each
+  // device-kernel pair. This map tracks how many kernel-device pairs need each
+  // image, so that any unneeded ones are skipped.
+  // TODO this has no effect if the requested state is input, consider having
+  // a separate branch for that case to avoid unnecessary tracking work.
+  struct DeviceBinaryImageInfo {
+    std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
+    bundle_state State = bundle_state::input;
+    int RequirementCounter = 0;
+  };
+  std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
+
+  for (const sycl::device &Dev : Devs) {
+    // Track the highest image state for each requested kernel.
+    using StateImagesPairT =
+        std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
+    using KernelImageMapT =
+        std::map<kernel_id, StateImagesPairT, LessByNameComp>;
+    KernelImageMapT KernelImageMap;
+    if (!KernelIDs.empty())
+      for (const kernel_id &KernelID : KernelIDs)
+        KernelImageMap.insert({KernelID, {}});
+
+    for (RTDeviceBinaryImage *BinImage : BinImages) {
+       if (!compatibleWithDevice(BinImage, Dev) ||
+           !doesDevSupportImgAspects(Dev, *BinImage))
+         continue;
+ 
+-      std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
+-      // Collect kernel names for the image
+-      {
+-        std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
+-        KernelIDs = m_BinImg2KernelIDs[BinImage];
+-        // If the image does not contain any non-service kernels we can skip it.
+-        if (!KernelIDs || KernelIDs->empty())
+-          continue;
+      auto InsertRes = ImageInfoMap.insert({BinImage, {}});
+      DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
+      if (InsertRes.second) {
+        ImgInfo.State = getBinImageState(BinImage);
+        // Collect kernel names for the image
+        {
+          std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
+          ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
+        }
+       }
+      const bundle_state ImgState = ImgInfo.State;
+      const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
+          ImgInfo.KernelIDs;
+      int &ImgRequirementCounter = ImgInfo.RequirementCounter;
+ 
+-      DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
+-          BinImage, Ctx, Devs, ImgState, KernelIDs, /*PIProgram=*/nullptr);
+      // If the image does not contain any non-service kernels we can skip it.
+      if (!ImageKernelIDs || ImageKernelIDs->empty())
+        continue;
+ 
+-      SYCLDeviceImages.push_back(
+-          createSyclObjFromImpl<device_image_plain>(Impl));
+-      break;
+      // Update tracked information.
+      for (kernel_id &KernelID : *ImageKernelIDs) {
+        StateImagesPairT *StateImagesPair;
+        // If only specific kernels are requested, ignore the rest.
+        if (!KernelIDs.empty()) {
+          auto It = KernelImageMap.find(KernelID);
+          if (It == KernelImageMap.end())
+            continue;
+          StateImagesPair = &It->second;
+        } else
+          StateImagesPair = &KernelImageMap[KernelID];
+
+        auto &[KernelImagesState, KernelImages] = *StateImagesPair;
+
+        if (KernelImages.empty()) {
+          KernelImagesState = ImgState;
+          KernelImages.push_back(BinImage);
+          ++ImgRequirementCounter;
+        } else if (KernelImagesState < ImgState) {
+          for (RTDeviceBinaryImage *Img : KernelImages) {
+            auto It = ImageInfoMap.find(Img);
+            assert(It != ImageInfoMap.end());
+            assert(It->second.RequirementCounter > 0);
+            --(It->second.RequirementCounter);
+          }
+          KernelImages.clear();
+          KernelImages.push_back(BinImage);
+          KernelImagesState = ImgState;
+          ++ImgRequirementCounter;
+        } else if (KernelImagesState == ImgState) {
+          KernelImages.push_back(BinImage);
+          ++ImgRequirementCounter;
+        }
+      }
+     }
+   }
+ 
+  for (const auto &ImgInfoPair : ImageInfoMap) {
+    if (ImgInfoPair.second.RequirementCounter == 0)
+      continue;
+
+    DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
+        ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
+        ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr);
+
+    SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
+  }
+
+   return SYCLDeviceImages;
+ }
+ 
--- a/build_files/build_environment/patches/embree.diff
+++ b/build_files/build_environment/patches/embree.diff
@ -149,3 +149,19 @@ index 074f910a2..30f490818 100644
         return is_hit_first | is_hit_second;
       }
     };
+diff -ruN a/kernels/sycl/rthwif_embree_builder.cpp b/kernels/sycl/rthwif_embree_builder.cpp
+--- a/kernels/sycl/rthwif_embree_builder.cpp    2023-03-28 17:23:06.429190200 +0200
+++ b/kernels/sycl/rthwif_embree_builder.cpp    2023-03-28 17:35:01.291938600 +0200
+@@ -540,7 +540,12 @@
+       assert(offset <= geomDescrData.size());
+     }
+
+    /* Force running BVH building sequentially from the calling thread if using TBB < 2021, as it otherwise leads to runtime issues. */
+#if TBB_VERSION_MAJOR<2021
+    RTHWIF_PARALLEL_OPERATION parallelOperation = nullptr;
+#else
+     RTHWIF_PARALLEL_OPERATION parallelOperation = rthwifNewParallelOperation();
+#endif
+
+     /* estimate static accel size */
+     BBox1f time_range(0,1);
--- a/build_files/cmake/macros.cmake
+++ b/build_files/cmake/macros.cmake
@ -1384,4 +1384,3 @@ macro(windows_process_platform_bundled_libraries library_deps)
    endforeach()
  endif()
 endmacro()
-
--- a/build_files/cmake/platform/platform_unix.cmake
+++ b/build_files/cmake/platform/platform_unix.cmake
@ -335,18 +335,6 @@ if(WITH_CYCLES AND (WITH_CYCLES_DEVICE_ONEAPI OR (WITH_CYCLES_EMBREE AND EMBREE_
  unset(_sycl_runtime_libraries)
 endif()

-if(WITH_CYCLES AND WITH_CYCLES_DEVICE_ONEAPI)
-  if(WITH_CYCLES_ONEAPI_BINARIES)
-    set(cycles_kernel_oneapi_lib_suffix "_aot")
-  else()
-    set(cycles_kernel_oneapi_lib_suffix "_jit")
-  endif()
-  list(APPEND PLATFORM_BUNDLED_LIBRARIES
-    ${CMAKE_CURRENT_BINARY_DIR}/intern/cycles/kernel/libcycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.so
-  )
-  unset(cycles_kernel_oneapi_lib_suffix)
-endif()
-
 if(WITH_OPENVDB)
  find_package(OpenVDB)
  set_and_warn_library_found("OpenVDB" OPENVDB_FOUND WITH_OPENVDB)
--- a/build_files/cmake/platform/platform_win32.cmake
+++ b/build_files/cmake/platform/platform_win32.cmake
@ -1105,18 +1105,6 @@ if(WITH_CYCLES AND (WITH_CYCLES_DEVICE_ONEAPI OR (WITH_CYCLES_EMBREE AND EMBREE_
  set(SYCL_LIBRARIES optimized ${SYCL_LIBRARY} debug ${SYCL_LIBRARY_DEBUG})
 endif()

-if(WITH_CYCLES AND WITH_CYCLES_DEVICE_ONEAPI)
-  if(WITH_CYCLES_ONEAPI_BINARIES)
-    set(cycles_kernel_oneapi_lib_suffix "_aot")
-  else()
-    set(cycles_kernel_oneapi_lib_suffix "_jit")
-  endif()
-  list(APPEND PLATFORM_BUNDLED_LIBRARIES
-    ${CMAKE_CURRENT_BINARY_DIR}/intern/cycles/kernel/cycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.dll
-  )
-  unset(cycles_kernel_oneapi_lib_suffix)
-endif()
-

 # Environment variables to run precompiled executables that needed libraries.
 list(JOIN PLATFORM_BUNDLED_LIBRARY_DIRS ";" _library_paths)
--- a/extern/mantaflow/preprocessed/plugin/advection.cpp
+++ b/extern/mantaflow/preprocessed/plugin/advection.cpp
@ -572,7 +572,7 @@ template<class T> inline bool cmpMinMax(T &minv, T &maxv, const T &val)
 }
 template<> inline bool cmpMinMax<Vec3>(Vec3 &minv, Vec3 &maxv, const Vec3 &val)
 {
-  return (cmpMinMax(minv.x, maxv.x, val.x) | cmpMinMax(minv.y, maxv.y, val.y) |
+  return (cmpMinMax(minv.x, maxv.x, val.x) || cmpMinMax(minv.y, maxv.y, val.y) ||
          cmpMinMax(minv.z, maxv.z, val.z));
 }

--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@ -281,6 +281,9 @@ endif()

 if(WITH_CYCLES_EMBREE)
  add_definitions(-DWITH_EMBREE)
+  if(WITH_CYCLES_DEVICE_ONEAPI AND EMBREE_SYCL_SUPPORT)
+    add_definitions(-DWITH_EMBREE_GPU)
+  endif()
  add_definitions(-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION})
  include_directories(
    SYSTEM
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@ -1544,6 +1544,13 @@ class CyclesPreferences(bpy.types.AddonPreferences):
        default=False,
    )

+    use_oneapirt: BoolProperty(
+        name="Embree on GPU (Experimental)",
+        description="Embree GPU execution will allow to use hardware ray tracing on Intel GPUs, which will provide better performance. "
+                    "However this support is experimental and some scenes may render incorrectly",
+        default=False,
+    )
+
    kernel_optimization_level: EnumProperty(
        name="Kernel Optimization",
        description="Kernels can be optimized based on scene content. Optimized kernels are requested at the start of a render. "
@ -1763,6 +1770,11 @@ class CyclesPreferences(bpy.types.AddonPreferences):
                    col.prop(self, "kernel_optimization_level")
                col.prop(self, "use_metalrt")

+        if compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
+            row = layout.row()
+            row.use_property_split = True
+            row.prop(self, "use_oneapirt")
+
    def draw(self, context):
        self.draw_impl(self.layout, context)

--- a/intern/cycles/blender/device.cpp
+++ b/intern/cycles/blender/device.cpp
@ -112,9 +112,26 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences,
    device.has_peer_memory = false;
  }

-  if (get_boolean(cpreferences, "use_metalrt")) {
-    device.use_metalrt = true;
+  bool accumulated_use_hardware_raytracing = false;
+  foreach (
+      DeviceInfo &info,
+      (device.multi_devices.size() != 0 ? device.multi_devices : vector<DeviceInfo>({device}))) {
+    if (info.type == DEVICE_METAL && !get_boolean(cpreferences, "use_metalrt")) {
+      info.use_hardware_raytracing = false;
+    }
+
+    if (info.type == DEVICE_ONEAPI && !get_boolean(cpreferences, "use_oneapirt")) {
+      info.use_hardware_raytracing = false;
+    }
+
+    /* There is an accumulative logic here, because Multi-devices are support only for
+     * the same backend + CPU in Blender right now, and both oneAPI and Metal have a
+     * global boolean backend setting (see above) for enabling/disabling HW RT,
+     * so all sub-devices in the multi-device should enable (or disable) HW RT
+     * simultaneously (and CPU device are expected to ignore `use_hardware_raytracing` setting). */
+    accumulated_use_hardware_raytracing |= info.use_hardware_raytracing;
  }
+  device.use_hardware_raytracing = accumulated_use_hardware_raytracing;

  if (preview) {
    /* Disable specialization for preview renders. */
--- a/intern/cycles/blender/python.cpp
+++ b/intern/cycles/blender/python.cpp
@ -1034,6 +1034,14 @@ void *CCL_python_module_init()
  Py_INCREF(Py_False);
 #endif /* WITH_EMBREE */

+#ifdef WITH_EMBREE_GPU
+  PyModule_AddObject(mod, "with_embree_gpu", Py_True);
+  Py_INCREF(Py_True);
+#else  /* WITH_EMBREE_GPU */
+  PyModule_AddObject(mod, "with_embree_gpu", Py_False);
+  Py_INCREF(Py_False);
+#endif /* WITH_EMBREE_GPU */
+
  if (ccl::openimagedenoise_supported()) {
    PyModule_AddObject(mod, "with_openimagedenoise", Py_True);
    Py_INCREF(Py_True);
--- a/intern/cycles/blender/session.cpp
+++ b/intern/cycles/blender/session.cpp
@ -1061,7 +1061,7 @@ void BlenderSession::ensure_display_driver_if_needed()
  unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(
      b_engine, b_scene, background);
  display_driver_ = display_driver.get();
-  session->set_display_driver(move(display_driver));
+  session->set_display_driver(std::move(display_driver));
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/bvh/bvh2.cpp
+++ b/intern/cycles/bvh/bvh2.cpp
@ -606,7 +606,7 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
      int4 *bvh_nodes = &bvh->pack.nodes[0];
      size_t bvh_nodes_size = bvh->pack.nodes.size();

-      for (size_t i = 0, j = 0; i < bvh_nodes_size; j++) {
+      for (size_t i = 0; i < bvh_nodes_size;) {
        size_t nsize, nsize_bbox;
        if (bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) {
          nsize = BVH_UNALIGNED_NODE_SIZE;
--- a/intern/cycles/device/cpu/device_impl.cpp
+++ b/intern/cycles/device/cpu/device_impl.cpp
@ -84,7 +84,7 @@ CPUDevice::~CPUDevice()
  texture_info.free();
 }

-BVHLayoutMask CPUDevice::get_bvh_layout_mask() const
+BVHLayoutMask CPUDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
 {
  BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
 #ifdef WITH_EMBREE
--- a/intern/cycles/device/cpu/device_impl.h
+++ b/intern/cycles/device/cpu/device_impl.h
@ -56,7 +56,7 @@ class CPUDevice : public Device {
  CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
  ~CPUDevice();

-  virtual BVHLayoutMask get_bvh_layout_mask() const override;
+  virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;

  /* Returns true if the texture info was copied to the device (meaning, some more
   * re-initialization might be needed). */
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@ -35,7 +35,7 @@ bool CUDADevice::have_precompiled_kernels()
  return path_exists(cubins_path);
 }

-BVHLayoutMask CUDADevice::get_bvh_layout_mask() const
+BVHLayoutMask CUDADevice::get_bvh_layout_mask(uint /*kernel_features*/) const
 {
  return BVH_LAYOUT_BVH2;
 }
--- a/intern/cycles/device/cuda/device_impl.h
+++ b/intern/cycles/device/cuda/device_impl.h
@ -38,7 +38,7 @@ class CUDADevice : public GPUDevice {

  static bool have_precompiled_kernels();

-  virtual BVHLayoutMask get_bvh_layout_mask() const override;
+  virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;

  void set_error(const string &error) override;

--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@ -354,7 +354,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
  info.has_guiding = true;
  info.has_profiling = true;
  info.has_peer_memory = false;
-  info.use_metalrt = false;
+  info.use_hardware_raytracing = false;
  info.denoisers = DENOISER_ALL;

  foreach (const DeviceInfo &device, subdevices) {
@ -403,7 +403,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
    info.has_guiding &= device.has_guiding;
    info.has_profiling &= device.has_profiling;
    info.has_peer_memory |= device.has_peer_memory;
-    info.use_metalrt |= device.use_metalrt;
+    info.use_hardware_raytracing |= device.use_hardware_raytracing;
    info.denoisers &= device.denoisers;
  }

--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@ -71,15 +71,16 @@ class DeviceInfo {
  string description;
  string id; /* used for user preferences, should stay fixed with changing hardware config */
  int num;
-  bool display_device;  /* GPU is used as a display device. */
-  bool has_nanovdb;     /* Support NanoVDB volumes. */
-  bool has_light_tree;  /* Support light tree. */
-  bool has_osl;         /* Support Open Shading Language. */
-  bool has_guiding;     /* Support path guiding. */
-  bool has_profiling;   /* Supports runtime collection of profiling info. */
-  bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
-  bool has_gpu_queue;   /* Device supports GPU queue. */
-  bool use_metalrt;     /* Use MetalRT to accelerate ray queries (Metal only). */
+  bool display_device;          /* GPU is used as a display device. */
+  bool has_nanovdb;             /* Support NanoVDB volumes. */
+  bool has_light_tree;          /* Support light tree. */
+  bool has_osl;                 /* Support Open Shading Language. */
+  bool has_guiding;             /* Support path guiding. */
+  bool has_profiling;           /* Supports runtime collection of profiling info. */
+  bool has_peer_memory;         /* GPU has P2P access to memory of another GPU. */
+  bool has_gpu_queue;           /* Device supports GPU queue. */
+  bool use_hardware_raytracing; /* Use hardware ray tracing to accelerate ray queries in a backend.
+                                 */
  KernelOptimizationLevel kernel_optimization_level; /* Optimization level applied to path tracing
                                                      * kernels (Metal only). */
  DenoiserTypeMask denoisers;                        /* Supported denoiser types. */
@ -101,7 +102,7 @@ class DeviceInfo {
    has_profiling = false;
    has_peer_memory = false;
    has_gpu_queue = false;
-    use_metalrt = false;
+    use_hardware_raytracing = false;
    denoisers = DENOISER_NONE;
  }

@ -157,7 +158,7 @@ class Device {
    fprintf(stderr, "%s\n", error.c_str());
    fflush(stderr);
  }
-  virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
+  virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const = 0;

  /* statistics */
  Stats &stats;
--- a/intern/cycles/device/dummy/device.cpp
+++ b/intern/cycles/device/dummy/device.cpp
@ -20,7 +20,7 @@ class DummyDevice : public Device {

  ~DummyDevice() {}

-  virtual BVHLayoutMask get_bvh_layout_mask() const override
+  virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override
  {
    return 0;
  }
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@ -35,7 +35,7 @@ bool HIPDevice::have_precompiled_kernels()
  return path_exists(fatbins_path);
 }

-BVHLayoutMask HIPDevice::get_bvh_layout_mask() const
+BVHLayoutMask HIPDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
 {
  return BVH_LAYOUT_BVH2;
 }
--- a/intern/cycles/device/hip/device_impl.h
+++ b/intern/cycles/device/hip/device_impl.h
@ -35,7 +35,7 @@ class HIPDevice : public GPUDevice {

  static bool have_precompiled_kernels();

-  virtual BVHLayoutMask get_bvh_layout_mask() const override;
+  virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;

  void set_error(const string &error) override;

--- a/intern/cycles/device/kernel.cpp
+++ b/intern/cycles/device/kernel.cpp
@ -3,7 +3,9 @@

 #include "device/kernel.h"

-#include "util/log.h"
+#ifndef __KERNEL_ONEAPI__
+#  include "util/log.h"
+#endif

 CCL_NAMESPACE_BEGIN

@ -153,10 +155,13 @@ const char *device_kernel_as_string(DeviceKernel kernel)
    case DEVICE_KERNEL_NUM:
      break;
  };
+#ifndef __KERNEL_ONEAPI__
  LOG(FATAL) << "Unhandled kernel " << static_cast<int>(kernel) << ", should never happen.";
+#endif
  return "UNKNOWN";
 }

+#ifndef __KERNEL_ONEAPI__
 std::ostream &operator<<(std::ostream &os, DeviceKernel kernel)
 {
  os << device_kernel_as_string(kernel);
@ -178,5 +183,6 @@ string device_kernel_mask_as_string(DeviceKernelMask mask)

  return str;
 }
+#endif

 CCL_NAMESPACE_END
--- a/intern/cycles/device/kernel.h
+++ b/intern/cycles/device/kernel.h
@ -3,11 +3,13 @@

 #pragma once

-#include "kernel/types.h"
+#ifndef __KERNEL_ONEAPI__
+#  include "kernel/types.h"

-#include "util/string.h"
+#  include "util/string.h"

-#include <ostream>  // NOLINT
+#  include <ostream>  // NOLINT
+#endif

 CCL_NAMESPACE_BEGIN

@ -15,9 +17,12 @@ bool device_kernel_has_shading(DeviceKernel kernel);
 bool device_kernel_has_intersection(DeviceKernel kernel);

 const char *device_kernel_as_string(DeviceKernel kernel);
+
+#ifndef __KERNEL_ONEAPI__
 std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);

 typedef uint64_t DeviceKernelMask;
 string device_kernel_mask_as_string(DeviceKernelMask mask);
+#endif

 CCL_NAMESPACE_END
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@ -100,7 +100,7 @@ class MetalDevice : public Device {

  virtual void cancel() override;

-  virtual BVHLayoutMask get_bvh_layout_mask() const override;
+  virtual BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;

  void set_error(const string &error) override;

--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@ -39,7 +39,7 @@ bool MetalDevice::is_device_cancelled(int ID)
  return get_device_by_ID(ID, lock) == nullptr;
 }

-BVHLayoutMask MetalDevice::get_bvh_layout_mask() const
+BVHLayoutMask MetalDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
 {
  return use_metalrt ? BVH_LAYOUT_METAL : BVH_LAYOUT_BVH2;
 }
@ -100,12 +100,12 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
    }
    case METAL_GPU_AMD: {
      max_threads_per_threadgroup = 128;
-      use_metalrt = info.use_metalrt;
+      use_metalrt = info.use_hardware_raytracing;
      break;
    }
    case METAL_GPU_APPLE: {
      max_threads_per_threadgroup = 512;
-      use_metalrt = info.use_metalrt;
+      use_metalrt = info.use_hardware_raytracing;
      break;
    }
  }
--- a/intern/cycles/device/multi/device.cpp
+++ b/intern/cycles/device/multi/device.cpp
@ -96,12 +96,13 @@ class MultiDevice : public Device {
    return error_msg;
  }

-  virtual BVHLayoutMask get_bvh_layout_mask() const override
+  virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override
  {
    BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
    BVHLayoutMask bvh_layout_mask_all = BVH_LAYOUT_NONE;
    foreach (const SubDevice &sub_device, devices) {
-      BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask();
+      BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask(
+          kernel_features);
      bvh_layout_mask &= device_bvh_layout_mask;
      bvh_layout_mask_all |= device_bvh_layout_mask;
    }
--- a/intern/cycles/device/oneapi/device.cpp
+++ b/intern/cycles/device/oneapi/device.cpp
@ -40,12 +40,12 @@ bool device_oneapi_init()
  if (getenv("SYCL_CACHE_TRESHOLD") == nullptr) {
    _putenv_s("SYCL_CACHE_THRESHOLD", "0");
  }
-  if (getenv("SYCL_DEVICE_FILTER") == nullptr) {
+  if (getenv("ONEAPI_DEVICE_SELECTOR") == nullptr) {
    if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
-      _putenv_s("SYCL_DEVICE_FILTER", "level_zero");
+      _putenv_s("ONEAPI_DEVICE_SELECTOR", "level_zero:*");
    }
    else {
-      _putenv_s("SYCL_DEVICE_FILTER", "level_zero,cuda,hip");
+      _putenv_s("ONEAPI_DEVICE_SELECTOR", "!opencl:*");
    }
  }
  if (getenv("SYCL_ENABLE_PCI") == nullptr) {
@ -58,10 +58,10 @@ bool device_oneapi_init()
  setenv("SYCL_CACHE_PERSISTENT", "1", false);
  setenv("SYCL_CACHE_THRESHOLD", "0", false);
  if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
-    setenv("SYCL_DEVICE_FILTER", "level_zero", false);
+    setenv("ONEAPI_DEVICE_SELECTOR", "level_zero:*", false);
  }
  else {
-    setenv("SYCL_DEVICE_FILTER", "level_zero,cuda,hip", false);
+    setenv("ONEAPI_DEVICE_SELECTOR", "!opencl:*", false);
  }
  setenv("SYCL_ENABLE_PCI", "1", false);
  setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
@ -87,7 +87,8 @@ Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &pro
 }

 #ifdef WITH_ONEAPI
-static void device_iterator_cb(const char *id, const char *name, int num, void *user_ptr)
+static void device_iterator_cb(
+    const char *id, const char *name, int num, bool hwrt_support, void *user_ptr)
 {
  vector<DeviceInfo> *devices = (vector<DeviceInfo> *)user_ptr;

@ -112,6 +113,13 @@ static void device_iterator_cb(const char *id, const char *name, int num, void *
  /* NOTE(@nsirgien): Seems not possible to know from SYCL/oneAPI or Level0. */
  info.display_device = false;

+#  ifdef WITH_EMBREE_GPU
+  info.use_hardware_raytracing = hwrt_support;
+#  else
+  info.use_hardware_raytracing = false;
+  (void)hwrt_support;
+#  endif
+
  devices->push_back(info);
  VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
 }
--- a/intern/cycles/device/oneapi/device_impl.cpp
+++ b/intern/cycles/device/oneapi/device_impl.cpp
@ -8,7 +8,19 @@
 #  include "util/debug.h"
 #  include "util/log.h"

+#  ifdef WITH_EMBREE_GPU
+#    include "bvh/embree.h"
+#  endif
+
 #  include "kernel/device/oneapi/globals.h"
+#  include "kernel/device/oneapi/kernel.h"
+
+#  if defined(WITH_EMBREE_GPU) && defined(EMBREE_SYCL_SUPPORT) && !defined(SYCL_LANGUAGE_VERSION)
+/* These declarations are missing from embree headers when compiling from a compiler that doesn't
+ * support SYCL. */
+extern "C" RTCDevice rtcNewSYCLDevice(sycl::context context, const char *config);
+extern "C" bool rtcIsSYCLDeviceSupported(const sycl::device sycl_device);
+#  endif

 CCL_NAMESPACE_BEGIN

@ -22,16 +34,29 @@ static void queue_error_cb(const char *message, void *user_ptr)
 OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
    : Device(info, stats, profiler),
      device_queue_(nullptr),
+#  ifdef WITH_EMBREE_GPU
+      embree_device(nullptr),
+      embree_scene(nullptr),
+#  endif
      texture_info_(this, "texture_info", MEM_GLOBAL),
      kg_memory_(nullptr),
      kg_memory_device_(nullptr),
      kg_memory_size_(0)
 {
  need_texture_info_ = false;
+  use_hardware_raytracing = info.use_hardware_raytracing;

  oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);

-  bool is_finished_ok = create_queue(device_queue_, info.num);
+  bool is_finished_ok = create_queue(device_queue_,
+                                     info.num,
+#  ifdef WITH_EMBREE_GPU
+                                     use_hardware_raytracing ? &embree_device : nullptr
+#  else
+                                     nullptr
+#  endif
+  );
+
  if (is_finished_ok == false) {
    set_error("oneAPI queue initialization error: got runtime exception \"" +
              oneapi_error_string_ + "\"");
@ -42,6 +67,16 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi
    assert(device_queue_);
  }

+#  ifdef WITH_EMBREE_GPU
+  use_hardware_raytracing = use_hardware_raytracing && (embree_device != nullptr);
+#  else
+  use_hardware_raytracing = false;
+#  endif
+
+  if (use_hardware_raytracing) {
+    VLOG_INFO << "oneAPI will use hardware ray tracing for intersection acceleration.";
+  }
+
  size_t globals_segment_size;
  is_finished_ok = kernel_globals_size(globals_segment_size);
  if (is_finished_ok == false) {
@ -64,6 +99,11 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi

 OneapiDevice::~OneapiDevice()
 {
+#  ifdef WITH_EMBREE_GPU
+  if (embree_device)
+    rtcReleaseDevice(embree_device);
+#  endif
+
  texture_info_.free();
  usm_free(device_queue_, kg_memory_);
  usm_free(device_queue_, kg_memory_device_);
@ -80,15 +120,47 @@ bool OneapiDevice::check_peer_access(Device * /*peer_device*/)
  return false;
 }

-BVHLayoutMask OneapiDevice::get_bvh_layout_mask() const
+bool OneapiDevice::can_use_hardware_raytracing_for_features(uint requested_features) const
 {
-  return BVH_LAYOUT_BVH2;
+  /* MNEE and Ray-trace kernels currently don't work correctly with HWRT. */
+  return !(requested_features & (KERNEL_FEATURE_MNEE | KERNEL_FEATURE_NODE_RAYTRACE));
 }

+BVHLayoutMask OneapiDevice::get_bvh_layout_mask(uint requested_features) const
+{
+  return (use_hardware_raytracing &&
+          can_use_hardware_raytracing_for_features(requested_features)) ?
+             BVH_LAYOUT_EMBREE :
+             BVH_LAYOUT_BVH2;
+}
+
+#  ifdef WITH_EMBREE_GPU
+void OneapiDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
+{
+  if (embree_device && bvh->params.bvh_layout == BVH_LAYOUT_EMBREE) {
+    BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
+    if (refit) {
+      bvh_embree->refit(progress);
+    }
+    else {
+      bvh_embree->build(progress, &stats, embree_device);
+    }
+    if (bvh->params.top_level) {
+      embree_scene = bvh_embree->scene;
+    }
+  }
+  else {
+    Device::build_bvh(bvh, progress, refit);
+  }
+}
+#  endif
+
 bool OneapiDevice::load_kernels(const uint requested_features)
 {
  assert(device_queue_);

+  kernel_features = requested_features;
+
  bool is_finished_ok = oneapi_run_test_kernel(device_queue_);
  if (is_finished_ok == false) {
    set_error("oneAPI test kernel execution: got a runtime exception \"" + oneapi_error_string_ +
@ -100,7 +172,14 @@ bool OneapiDevice::load_kernels(const uint requested_features)
    assert(device_queue_);
  }

-  is_finished_ok = oneapi_load_kernels(device_queue_, (const unsigned int)requested_features);
+  if (use_hardware_raytracing && !can_use_hardware_raytracing_for_features(requested_features)) {
+    VLOG_INFO
+        << "Hardware ray tracing disabled, not supported yet by oneAPI for requested features.";
+    use_hardware_raytracing = false;
+  }
+
+  is_finished_ok = oneapi_load_kernels(
+      device_queue_, (const unsigned int)requested_features, use_hardware_raytracing);
  if (is_finished_ok == false) {
    set_error("oneAPI kernels loading: got a runtime exception \"" + oneapi_error_string_ + "\"");
  }
@ -327,6 +406,16 @@ void OneapiDevice::const_copy_to(const char *name, void *host, size_t size)
             << string_human_readable_number(size) << " bytes. ("
             << string_human_readable_size(size) << ")";

+#  ifdef WITH_EMBREE_GPU
+  if (strcmp(name, "data") == 0) {
+    assert(size <= sizeof(KernelData));
+
+    /* Update scene handle(since it is different for each device on multi devices) */
+    KernelData *const data = (KernelData *)host;
+    data->device_bvh = embree_scene;
+  }
+#  endif
+
  ConstMemMap::iterator i = const_mem_map_.find(name);
  device_vector<uchar> *data;

@ -446,7 +535,9 @@ void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_
 #  endif
 }

-bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index)
+bool OneapiDevice::create_queue(SyclQueue *&external_queue,
+                                int device_index,
+                                void *embree_device_pointer)
 {
  bool finished_correct = true;
  try {
@ -457,6 +548,13 @@ bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index)
    sycl::queue *created_queue = new sycl::queue(devices[device_index],
                                                 sycl::property::queue::in_order());
    external_queue = reinterpret_cast<SyclQueue *>(created_queue);
+#  ifdef WITH_EMBREE_GPU
+    if (embree_device_pointer) {
+      *((RTCDevice *)embree_device_pointer) = rtcNewSYCLDevice(created_queue->get_context(), "");
+    }
+#  else
+    (void)embree_device_pointer;
+#  endif
  }
  catch (sycl::exception const &e) {
    finished_correct = false;
@ -625,7 +723,8 @@ bool OneapiDevice::enqueue_kernel(KernelContext *kernel_context,
                                  size_t global_size,
                                  void **args)
 {
-  return oneapi_enqueue_kernel(kernel_context, kernel, global_size, args);
+  return oneapi_enqueue_kernel(
+      kernel_context, kernel, global_size, kernel_features, use_hardware_raytracing, args);
 }

 /* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
@ -830,12 +929,17 @@ void OneapiDevice::iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_p
    std::string name = device.get_info<sycl::info::device::name>();
 #  else
    std::string name = "SYCL Host Task (Debug)";
+#  endif
+#  ifdef WITH_EMBREE_GPU
+    bool hwrt_support = rtcIsSYCLDeviceSupported(device);
+#  else
+    bool hwrt_support = false;
 #  endif
    std::string id = "ONEAPI_" + platform_name + "_" + name;
    if (device.has(sycl::aspect::ext_intel_pci_address)) {
      id.append("_" + device.get_info<sycl::ext::intel::info::device::pci_address>());
    }
-    (cb)(id.c_str(), name.c_str(), num, user_ptr);
+    (cb)(id.c_str(), name.c_str(), num, hwrt_support, user_ptr);
    num++;
  }
 }
--- a/intern/cycles/device/oneapi/device_impl.h
+++ b/intern/cycles/device/oneapi/device_impl.h
@ -16,15 +16,16 @@ CCL_NAMESPACE_BEGIN

 class DeviceQueue;

-typedef void (*OneAPIDeviceIteratorCallback)(const char *id,
-                                             const char *name,
-                                             int num,
-                                             void *user_ptr);
+typedef void (*OneAPIDeviceIteratorCallback)(
+    const char *id, const char *name, int num, bool hwrt_support, void *user_ptr);

 class OneapiDevice : public Device {
 private:
  SyclQueue *device_queue_;
-
+#  ifdef WITH_EMBREE_GPU
+  RTCDevice embree_device;
+  RTCScene embree_scene;
+#  endif
  using ConstMemMap = map<string, device_vector<uchar> *>;
  ConstMemMap const_mem_map_;
  device_vector<TextureInfo> texture_info_;
@ -34,17 +35,21 @@ class OneapiDevice : public Device {
  size_t kg_memory_size_ = (size_t)0;
  size_t max_memory_on_device_ = (size_t)0;
  std::string oneapi_error_string_;
+  bool use_hardware_raytracing = false;
+  unsigned int kernel_features = 0;

 public:
-  virtual BVHLayoutMask get_bvh_layout_mask() const override;
+  virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override;

  OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);

  virtual ~OneapiDevice();
-
+#  ifdef WITH_EMBREE_GPU
+  void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
+#  endif
  bool check_peer_access(Device *peer_device) override;

-  bool load_kernels(const uint requested_features) override;
+  bool load_kernels(const uint kernel_features) override;

  void load_texture_info();

@ -113,8 +118,9 @@ class OneapiDevice : public Device {
  SyclQueue *sycl_queue();

 protected:
+  bool can_use_hardware_raytracing_for_features(uint kernel_features) const;
  void check_usm(SyclQueue *queue, const void *usm_ptr, bool allow_host);
-  bool create_queue(SyclQueue *&external_queue, int device_index);
+  bool create_queue(SyclQueue *&external_queue, int device_index, void *embree_device);
  void free_queue(SyclQueue *queue);
  void *usm_aligned_alloc_host(SyclQueue *queue, size_t memory_size, size_t alignment);
  void *usm_alloc_device(SyclQueue *queue, size_t memory_size);
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@ -151,7 +151,7 @@ unique_ptr<DeviceQueue> OptiXDevice::gpu_queue_create()
  return make_unique<OptiXDeviceQueue>(this);
 }

-BVHLayoutMask OptiXDevice::get_bvh_layout_mask() const
+BVHLayoutMask OptiXDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
 {
  /* OptiX has its own internal acceleration structure format. */
  return BVH_LAYOUT_OPTIX;
--- a/intern/cycles/device/optix/device_impl.h
+++ b/intern/cycles/device/optix/device_impl.h
@ -88,7 +88,7 @@ class OptiXDevice : public CUDADevice {
  OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
  ~OptiXDevice();

-  BVHLayoutMask get_bvh_layout_mask() const override;
+  BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;

  string compile_kernel_get_common_cflags(const uint kernel_features);

--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@ -574,7 +574,7 @@ void PathTrace::denoise(const RenderWork &render_work)

 void PathTrace::set_output_driver(unique_ptr<OutputDriver> driver)
 {
-  output_driver_ = move(driver);
+  output_driver_ = std::move(driver);
 }

 void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
@ -585,7 +585,7 @@ void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
  destroy_gpu_resources();

  if (driver) {
-    display_ = make_unique<PathTraceDisplay>(move(driver));
+    display_ = make_unique<PathTraceDisplay>(std::move(driver));
  }
  else {
    display_ = nullptr;
--- a/intern/cycles/integrator/path_trace_display.cpp
+++ b/intern/cycles/integrator/path_trace_display.cpp
@ -9,7 +9,9 @@

 CCL_NAMESPACE_BEGIN

-PathTraceDisplay::PathTraceDisplay(unique_ptr<DisplayDriver> driver) : driver_(move(driver)) {}
+PathTraceDisplay::PathTraceDisplay(unique_ptr<DisplayDriver> driver) : driver_(std::move(driver))
+{
+}

 void PathTraceDisplay::reset(const BufferParams &buffer_params, const bool reset_rendering)
 {
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@ -28,6 +28,7 @@ static size_t estimate_single_state_size(const uint kernel_features)
 #define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) \
  state_size += (kernel_features & (feature)) ? sizeof(type) : 0;
 #define KERNEL_STRUCT_END(name) \
+  (void)array_index; \
  break; \
  }
 #define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
@ -139,6 +140,7 @@ void PathTraceWorkGPU::alloc_integrator_soa()
    integrator_state_gpu_.parent_struct[array_index].name = (type *)array->device_pointer; \
  }
 #define KERNEL_STRUCT_END(name) \
+  (void)array_index; \
  break; \
  }
 #define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
@ -299,8 +301,8 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
   * become busy after adding new tiles). This is especially important for the shadow catcher which
   * schedules work in halves of available number of paths. */
  work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
-  work_tile_scheduler_.set_accelerated_rt((device_->get_bvh_layout_mask() & BVH_LAYOUT_OPTIX) !=
-                                          0);
+  work_tile_scheduler_.set_accelerated_rt(
+      (device_->get_bvh_layout_mask(device_scene_->data.kernel_features) & BVH_LAYOUT_OPTIX) != 0);
  work_tile_scheduler_.reset(effective_buffer_params_,
                             start_sample,
                             samples_num,
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@ -96,10 +96,13 @@ set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS
  device/oneapi/compat.h
  device/oneapi/context_begin.h
  device/oneapi/context_end.h
+  device/oneapi/context_intersect_begin.h
+  device/oneapi/context_intersect_end.h
  device/oneapi/globals.h
  device/oneapi/image.h
  device/oneapi/kernel.h
  device/oneapi/kernel_templates.h
+  device/cpu/bvh.h
 )

 set(SRC_KERNEL_CLOSURE_HEADERS
@ -764,7 +767,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)

  # Set defaults for spir64 and spir64_gen options
  if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
-    set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'")
+    set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-regular-grf-kernel integrator_intersect -ze-opt-large-grf-kernel shade -ze-opt-no-local-to-generic'")
  endif()
  if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen)
    set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}" CACHE STRING "Extra build options for spir64_gen target")
@ -776,7 +779,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
  # Host execution won't use GPU binaries, no need to compile them.
  if(WITH_CYCLES_ONEAPI_BINARIES AND NOT WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
    # AoT binaries aren't currently reused when calling sycl::build.
-    list(APPEND sycl_compiler_flags -DSYCL_SKIP_KERNELS_PRELOAD)
+    list(APPEND sycl_compiler_flags -DWITH_CYCLES_ONEAPI_BINARIES)
    # Iterate over all targest and their options
    list(JOIN CYCLES_ONEAPI_SYCL_TARGETS "," targets_string)
    list(APPEND sycl_compiler_flags -fsycl-targets=${targets_string})
@ -798,6 +801,59 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
      -I"${NANOVDB_INCLUDE_DIR}")
  endif()

+  if(WITH_CYCLES_EMBREE AND EMBREE_SYCL_SUPPORT)
+    list(APPEND sycl_compiler_flags
+      -DWITH_EMBREE
+      -DWITH_EMBREE_GPU
+      -DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION}
+      -I"${EMBREE_INCLUDE_DIRS}")
+
+    if(WIN32)
+      list(APPEND sycl_compiler_flags
+        -ladvapi32.lib
+      )
+    endif()
+
+    set(next_library_mode "")
+    foreach(library ${EMBREE_LIBRARIES})
+      string(TOLOWER "${library}" library_lower)
+      if(("${library_lower}" STREQUAL "optimized") OR
+         ("${library_lower}" STREQUAL "debug"))
+        set(next_library_mode "${library_lower}")
+      else()
+        if(next_library_mode STREQUAL "")
+          list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
+          list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
+        else()
+          list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
+        endif()
+        set(next_library_mode "")
+      endif()
+    endforeach()
+
+    foreach(library ${TBB_LIBRARIES})
+      string(TOLOWER "${library}" library_lower)
+      if(("${library_lower}" STREQUAL "optimized") OR
+         ("${library_lower}" STREQUAL "debug"))
+        set(next_library_mode "${library_lower}")
+      else()
+        if(next_library_mode STREQUAL "")
+          list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
+          list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
+        else()
+          list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
+        endif()
+        set(next_library_mode "")
+      endif()
+    endforeach()
+      list(APPEND sycl_compiler_flags
+        "$<$<CONFIG:Release>:${EMBREE_TBB_LIBRARIES_optimized}>"
+        "$<$<CONFIG:RelWithDebInfo>:${EMBREE_TBB_LIBRARIES_optimized}>"
+        "$<$<CONFIG:MinSizeRel>:${EMBREE_TBB_LIBRARIES_optimized}>"
+        "$<$<CONFIG:Debug>:${EMBREE_TBB_LIBRARIES_debug}>"
+      )
+  endif()
+
  if(WITH_CYCLES_DEBUG)
    list(APPEND sycl_compiler_flags -DWITH_CYCLES_DEBUG)
  endif()
@ -907,13 +963,22 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
      DEPENDS ${cycles_oneapi_kernel_sources})
  endif()

-  # For the Cycles standalone put libraries next to the Cycles application.
  if(NOT WITH_BLENDER)
-    if(WIN32)
-      delayed_install("" "${cycles_kernel_oneapi_lib}" ${CYCLES_INSTALL_PATH})
-    else()
-      delayed_install("" "${cycles_kernel_oneapi_lib}" ${CYCLES_INSTALL_PATH}/lib)
-    endif()
+    # For the Cycles standalone put libraries next to the Cycles application.
+    set(cycles_oneapi_target_path ${CYCLES_INSTALL_PATH})
+  else()
+    # For Blender put the libraries next to the Blender executable.
+    #
+    # Note that the installation path in the delayed_install is relative to the versioned folder,
+    # which means we need to go one level up.
+    set(cycles_oneapi_target_path "../")
+  endif()
+
+  # install dynamic libraries required at runtime
+  if(WIN32)
+    delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_oneapi_target_path})
+  elseif(UNIX AND NOT APPLE)
+    delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_oneapi_target_path}/lib)
  endif()

  add_custom_target(cycles_kernel_oneapi ALL DEPENDS ${cycles_kernel_oneapi_lib})
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@ -21,6 +21,28 @@
 #  define __BVH2__
 #endif

+#if defined(__KERNEL_ONEAPI__) && defined(WITH_EMBREE_GPU)
+/* bool is apparently not tested for specialization constants:
+ * https://github.com/intel/llvm/blob/39d1c65272a786b2b13a6f094facfddf9408406d/sycl/test/basic_tests/SYCL-2020-spec-constants.cpp#L25-L27
+ * Instead of adding one more bool specialization constant, we reuse existing embree_features one
+ * and use RTC_FEATURE_FLAG_NONE as value to test for avoiding to call Embree on GPU.
+ */
+/* We set it to RTC_FEATURE_FLAG_NONE by default so AoT binaries contain MNE and ray-trace kernels
+ * pre-compiled without Embree.
+ * Changing this default value would require updating the logic in oneapi_load_kernels(). */
+static constexpr sycl::specialization_id<RTCFeatureFlags> oneapi_embree_features{
+    RTC_FEATURE_FLAG_NONE};
+#  define IF_USING_EMBREE \
+    if (kernel_handler.get_specialization_constant<oneapi_embree_features>() != \
+        RTC_FEATURE_FLAG_NONE)
+#  define IF_NOT_USING_EMBREE \
+    if (kernel_handler.get_specialization_constant<oneapi_embree_features>() == \
+        RTC_FEATURE_FLAG_NONE)
+#else
+#  define IF_USING_EMBREE
+#  define IF_NOT_USING_EMBREE
+#endif
+
 CCL_NAMESPACE_BEGIN

 #ifdef __BVH2__
@ -74,30 +96,39 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
  }

 #  ifdef __EMBREE__
-  if (kernel_data.device_bvh) {
-    return kernel_embree_intersect(kg, ray, visibility, isect);
+  IF_USING_EMBREE
+  {
+    if (kernel_data.device_bvh) {
+      return kernel_embree_intersect(kg, ray, visibility, isect);
+    }
  }
 #  endif

+  IF_NOT_USING_EMBREE
+  {
 #  ifdef __OBJECT_MOTION__
-  if (kernel_data.bvh.have_motion) {
+    if (kernel_data.bvh.have_motion) {
 #    ifdef __HAIR__
-    if (kernel_data.bvh.have_curves) {
-      return bvh_intersect_hair_motion(kg, ray, isect, visibility);
-    }
+      if (kernel_data.bvh.have_curves) {
+        return bvh_intersect_hair_motion(kg, ray, isect, visibility);
+      }
 #    endif /* __HAIR__ */

-    return bvh_intersect_motion(kg, ray, isect, visibility);
-  }
+      return bvh_intersect_motion(kg, ray, isect, visibility);
+    }
 #  endif /* __OBJECT_MOTION__ */

 #  ifdef __HAIR__
-  if (kernel_data.bvh.have_curves) {
-    return bvh_intersect_hair(kg, ray, isect, visibility);
-  }
+    if (kernel_data.bvh.have_curves) {
+      return bvh_intersect_hair(kg, ray, isect, visibility);
+    }
 #  endif /* __HAIR__ */

-  return bvh_intersect(kg, ray, isect, visibility);
+    return bvh_intersect(kg, ray, isect, visibility);
+  }
+
+  kernel_assert(false);
+  return false;
 }

 /* Single object BVH traversal, for SSS/AO/bevel. */
@ -129,17 +160,27 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
  }

 #    ifdef __EMBREE__
-  if (kernel_data.device_bvh) {
-    return kernel_embree_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
+  IF_USING_EMBREE
+  {
+    if (kernel_data.device_bvh) {
+      return kernel_embree_intersect_local(
+          kg, ray, local_isect, local_object, lcg_state, max_hits);
+    }
  }
 #    endif

+  IF_NOT_USING_EMBREE
+  {
 #    ifdef __OBJECT_MOTION__
-  if (kernel_data.bvh.have_motion) {
-    return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
-  }
+    if (kernel_data.bvh.have_motion) {
+      return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
+    }
 #    endif /* __OBJECT_MOTION__ */
-  return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
+    return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
+  }
+
+  kernel_assert(false);
+  return false;
 }
 #  endif

@ -184,35 +225,44 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
  }

 #    ifdef __EMBREE__
-  if (kernel_data.device_bvh) {
-    return kernel_embree_intersect_shadow_all(
-        kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
+  IF_USING_EMBREE
+  {
+    if (kernel_data.device_bvh) {
+      return kernel_embree_intersect_shadow_all(
+          kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
+    }
  }
 #    endif

+  IF_NOT_USING_EMBREE
+  {
 #    ifdef __OBJECT_MOTION__
-  if (kernel_data.bvh.have_motion) {
+    if (kernel_data.bvh.have_motion) {
 #      ifdef __HAIR__
-    if (kernel_data.bvh.have_curves) {
-      return bvh_intersect_shadow_all_hair_motion(
-          kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
-    }
+      if (kernel_data.bvh.have_curves) {
+        return bvh_intersect_shadow_all_hair_motion(
+            kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
+      }
 #      endif /* __HAIR__ */

-    return bvh_intersect_shadow_all_motion(
-        kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
-  }
+      return bvh_intersect_shadow_all_motion(
+          kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
+    }
 #    endif /* __OBJECT_MOTION__ */

 #    ifdef __HAIR__
-  if (kernel_data.bvh.have_curves) {
-    return bvh_intersect_shadow_all_hair(
-        kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
-  }
+    if (kernel_data.bvh.have_curves) {
+      return bvh_intersect_shadow_all_hair(
+          kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
+    }
 #    endif /* __HAIR__ */

-  return bvh_intersect_shadow_all(
-      kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
+    return bvh_intersect_shadow_all(
+        kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
+  }
+
+  kernel_assert(false);
+  return false;
 }
 #  endif /* __SHADOW_RECORD_ALL__ */

@ -239,13 +289,19 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
    return false;
  }

+  IF_NOT_USING_EMBREE
+  {
 #    ifdef __OBJECT_MOTION__
-  if (kernel_data.bvh.have_motion) {
-    return bvh_intersect_volume_motion(kg, ray, isect, visibility);
-  }
+    if (kernel_data.bvh.have_motion) {
+      return bvh_intersect_volume_motion(kg, ray, isect, visibility);
+    }
 #    endif /* __OBJECT_MOTION__ */

-  return bvh_intersect_volume(kg, ray, isect, visibility);
+    return bvh_intersect_volume(kg, ray, isect, visibility);
+  }
+
+  kernel_assert(false);
+  return false;
 }
 #  endif /* defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) */

@ -275,18 +331,27 @@ ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg,
  }

 #    ifdef __EMBREE__
-  if (kernel_data.device_bvh) {
-    return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
+  IF_USING_EMBREE
+  {
+    if (kernel_data.device_bvh) {
+      return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
+    }
  }
 #    endif

+  IF_NOT_USING_EMBREE
+  {
 #    ifdef __OBJECT_MOTION__
-  if (kernel_data.bvh.have_motion) {
-    return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
-  }
+    if (kernel_data.bvh.have_motion) {
+      return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
+    }
 #    endif /* __OBJECT_MOTION__ */

-  return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
+    return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
+  }
+
+  kernel_assert(false);
+  return false;
 }

 #  endif /* defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) */
--- a/intern/cycles/kernel/bvh/volume_all.h
+++ b/intern/cycles/kernel/bvh/volume_all.h
@ -51,8 +51,6 @@ ccl_device_inline
  int object = OBJECT_NONE;
  float isect_t = ray->tmax;

-  int num_hits_in_instance = 0;
-
  uint num_hits = 0;
  isect_array->t = ray->tmax;

@ -152,7 +150,6 @@ ccl_device_inline
                  /* Move on to next entry in intersections array. */
                  isect_array++;
                  num_hits++;
-                  num_hits_in_instance++;
                  isect_array->t = isect_t;
                  if (num_hits == max_hits) {
                    return num_hits;
@ -193,7 +190,6 @@ ccl_device_inline
                  /* Move on to next entry in intersections array. */
                  isect_array++;
                  num_hits++;
-                  num_hits_in_instance++;
                  isect_array->t = isect_t;
                  if (num_hits == max_hits) {
                    return num_hits;
@ -219,7 +215,6 @@ ccl_device_inline
            bvh_instance_push(kg, object, ray, &P, &dir, &idir);
 #endif

-            num_hits_in_instance = 0;
            isect_array->t = isect_t;

            ++stack_ptr;
--- a/intern/cycles/kernel/device/cpu/bvh.h
+++ b/intern/cycles/kernel/device/cpu/bvh.h
@ -13,8 +13,13 @@
 #  include <embree3/rtcore_scene.h>
 #endif

-#include "kernel/device/cpu/compat.h"
-#include "kernel/device/cpu/globals.h"
+#ifdef __KERNEL_ONEAPI__
+#  include "kernel/device/oneapi/compat.h"
+#  include "kernel/device/oneapi/globals.h"
+#else
+#  include "kernel/device/cpu/compat.h"
+#  include "kernel/device/cpu/globals.h"
+#endif

 #include "kernel/bvh/types.h"
 #include "kernel/bvh/util.h"
@ -33,11 +38,16 @@ using numhit_t = uint8_t;
 using numhit_t = uint32_t;
 #endif

-#define CYCLES_EMBREE_USED_FEATURES \
-  (RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
-                    RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
-                    RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
-                    RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
+#ifdef __KERNEL_ONEAPI__
+#  define CYCLES_EMBREE_USED_FEATURES \
+    (kernel_handler.get_specialization_constant<oneapi_embree_features>())
+#else
+#  define CYCLES_EMBREE_USED_FEATURES \
+    (RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
+                      RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
+                      RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
+                      RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
+#endif

 #define EMBREE_IS_HAIR(x) (x & 1)

@ -252,7 +262,8 @@ ccl_device_inline void kernel_embree_convert_sss_hit(KernelGlobals kg,
 * Things like recording subsurface or shadow hits for later evaluation
 * as well as filtering for volume objects happen here.
 * Cycles' own BVH does that directly inside the traversal calls. */
-ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNArguments *args)
+ccl_device_forceinline void kernel_embree_filter_intersection_func_impl(
+    const RTCFilterFunctionNArguments *args)
 {
  /* Current implementation in Cycles assumes only single-ray intersection queries. */
  assert(args->N == 1);
@ -263,7 +274,11 @@ ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNA
 #else
  CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
 #endif
+#ifdef __KERNEL_ONEAPI__
+  KernelGlobalsGPU *kg = nullptr;
+#else
  const KernelGlobalsCPU *kg = ctx->kg;
+#endif
  const Ray *cray = ctx->ray;

  if (kernel_embree_is_self_intersection(
@ -277,7 +292,7 @@ ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNA
 * as well as filtering for volume objects happen here.
 * Cycles' own BVH does that directly inside the traversal calls.
 */
-ccl_device void kernel_embree_filter_occluded_shadow_all_func(
+ccl_device_forceinline void kernel_embree_filter_occluded_shadow_all_func_impl(
    const RTCFilterFunctionNArguments *args)
 {
  /* Current implementation in Cycles assumes only single-ray intersection queries. */
@ -290,7 +305,11 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
 #else
  CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
 #endif
+#ifdef __KERNEL_ONEAPI__
+  KernelGlobalsGPU *kg = nullptr;
+#else
  const KernelGlobalsCPU *kg = ctx->kg;
+#endif
  const Ray *cray = ctx->ray;

  Intersection current_isect;
@ -326,7 +345,7 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
  }

  /* Test if we need to record this transparent intersection. */
-  const numhit_t max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+  const numhit_t max_record_hits = min(ctx->max_hits, numhit_t(INTEGRATOR_SHADOW_ISECT_SIZE));
  if (ctx->num_recorded_hits < max_record_hits) {
    /* If maximum number of hits was reached, replace the intersection with the
     * highest distance. We want to find the N closest intersections. */
@ -363,7 +382,7 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
  *args->valid = 0;
 }

-ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
+ccl_device_forceinline void kernel_embree_filter_occluded_local_func_impl(
    const RTCFilterFunctionNArguments *args)
 {
  /* Current implementation in Cycles assumes only single-ray intersection queries. */
@ -376,7 +395,11 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
 #else
  CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
 #endif
+#ifdef __KERNEL_ONEAPI__
+  KernelGlobalsGPU *kg = nullptr;
+#else
  const KernelGlobalsCPU *kg = ctx->kg;
+#endif
  const Ray *cray = ctx->ray;

  /* Check if it's hitting the correct object. */
@ -462,7 +485,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
  *args->valid = 0;
 }

-ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
+ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func_impl(
    const RTCFilterFunctionNArguments *args)
 {
  /* Current implementation in Cycles assumes only single-ray intersection queries. */
@ -475,7 +498,11 @@ ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
 #else
  CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
 #endif
+#ifdef __KERNEL_ONEAPI__
+  KernelGlobalsGPU *kg = nullptr;
+#else
  const KernelGlobalsCPU *kg = ctx->kg;
+#endif
  const Ray *cray = ctx->ray;

  /* Append the intersection to the end of the array. */
@ -513,14 +540,14 @@ ccl_device_forceinline void kernel_embree_filter_occluded_func(

  switch (ctx->type) {
    case CCLIntersectContext::RAY_SHADOW_ALL:
-      kernel_embree_filter_occluded_shadow_all_func(args);
+      kernel_embree_filter_occluded_shadow_all_func_impl(args);
      break;
    case CCLIntersectContext::RAY_LOCAL:
    case CCLIntersectContext::RAY_SSS:
-      kernel_embree_filter_occluded_local_func(args);
+      kernel_embree_filter_occluded_local_func_impl(args);
      break;
    case CCLIntersectContext::RAY_VOLUME_ALL:
-      kernel_embree_filter_occluded_volume_all_func(args);
+      kernel_embree_filter_occluded_volume_all_func_impl(args);
      break;

    case CCLIntersectContext::RAY_REGULAR:
@ -569,7 +596,63 @@ ccl_device void kernel_embree_filter_occluded_func_backface_cull(

  kernel_embree_filter_occluded_func(args);
 }
+#endif

+#ifdef __KERNEL_ONEAPI__
+/* Static wrappers so we can call the callbacks from out side the ONEAPIKernelContext class */
+RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
+kernel_embree_filter_intersection_func_static(const RTCFilterFunctionNArguments *args)
+{
+  RTCHit *hit = (RTCHit *)args->hit;
+  CCLFirstHitContext *ctx = (CCLFirstHitContext *)(args->context);
+  ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
+  context->kernel_embree_filter_intersection_func_impl(args);
+}
+
+RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
+kernel_embree_filter_occluded_shadow_all_func_static(const RTCFilterFunctionNArguments *args)
+{
+  RTCHit *hit = (RTCHit *)args->hit;
+  CCLShadowContext *ctx = (CCLShadowContext *)(args->context);
+  ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
+  context->kernel_embree_filter_occluded_shadow_all_func_impl(args);
+}
+
+RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
+kernel_embree_filter_occluded_local_func_static(const RTCFilterFunctionNArguments *args)
+{
+  RTCHit *hit = (RTCHit *)args->hit;
+  CCLLocalContext *ctx = (CCLLocalContext *)(args->context);
+  ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
+  context->kernel_embree_filter_occluded_local_func_impl(args);
+}
+
+RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
+kernel_embree_filter_occluded_volume_all_func_static(const RTCFilterFunctionNArguments *args)
+{
+  RTCHit *hit = (RTCHit *)args->hit;
+  CCLVolumeContext *ctx = (CCLVolumeContext *)(args->context);
+  ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
+  context->kernel_embree_filter_occluded_volume_all_func_impl(args);
+}
+
+#  define kernel_embree_filter_intersection_func \
+    ONEAPIKernelContext::kernel_embree_filter_intersection_func_static
+#  define kernel_embree_filter_occluded_shadow_all_func \
+    ONEAPIKernelContext::kernel_embree_filter_occluded_shadow_all_func_static
+#  define kernel_embree_filter_occluded_local_func \
+    ONEAPIKernelContext::kernel_embree_filter_occluded_local_func_static
+#  define kernel_embree_filter_occluded_volume_all_func \
+    ONEAPIKernelContext::kernel_embree_filter_occluded_volume_all_func_static
+#else
+#  define kernel_embree_filter_intersection_func kernel_embree_filter_intersection_func_impl
+#  if EMBREE_MAJOR_VERSION >= 4
+#    define kernel_embree_filter_occluded_shadow_all_func \
+      kernel_embree_filter_occluded_shadow_all_func_impl
+#    define kernel_embree_filter_occluded_local_func kernel_embree_filter_occluded_local_func_impl
+#    define kernel_embree_filter_occluded_volume_all_func \
+      kernel_embree_filter_occluded_volume_all_func_impl
+#  endif
 #endif

 /* Scene intersection. */
@ -583,7 +666,15 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
 #if EMBREE_MAJOR_VERSION >= 4
  CCLFirstHitContext ctx;
  rtcInitRayQueryContext(&ctx);
+#  ifdef __KERNEL_ONEAPI__
+  /* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
+   * uses global device allocation (CUDA, Optix, HIP) or passes all needed data
+   * as a class context (Metal, oneAPI). So we need to pass this context here
+   * in order to have an access to it later in Embree filter functions on GPU. */
+  ctx.kg = (KernelGlobals)this;
+#  else
  ctx.kg = kg;
+#  endif
 #else
  CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
  rtcInitIntersectContext(&ctx);
@ -596,7 +687,7 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
 #if EMBREE_MAJOR_VERSION >= 4
  RTCIntersectArguments args;
  rtcInitIntersectArguments(&args);
-  args.filter = (RTCFilterFunctionN)kernel_embree_filter_intersection_func;
+  args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_intersection_func);
  args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
  args.context = &ctx;
  rtcIntersect1(kernel_data.device_bvh, &ray_hit, &args);
@ -625,7 +716,15 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
 #  if EMBREE_MAJOR_VERSION >= 4
  CCLLocalContext ctx;
  rtcInitRayQueryContext(&ctx);
+#    ifdef __KERNEL_ONEAPI__
+  /* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
+   * uses global device allocation (CUDA, Optix, HIP) or passes all needed data
+   * as a class context (Metal, oneAPI). So we need to pass this context here
+   * in order to have an access to it later in Embree filter functions on GPU. */
+  ctx.kg = (KernelGlobals)this;
+#    else
  ctx.kg = kg;
+#    endif
 #  else
  CCLIntersectContext ctx(kg,
                          has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
@ -646,7 +745,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
 #  if EMBREE_MAJOR_VERSION >= 4
  RTCOccludedArguments args;
  rtcInitOccludedArguments(&args);
-  args.filter = (RTCFilterFunctionN)(kernel_embree_filter_occluded_local_func);
+  args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_occluded_local_func);
  args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
  args.context = &ctx;
 #  endif
@ -692,7 +791,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,

 #ifdef __SHADOW_RECORD_ALL__
 ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
-                                                             IntegratorShadowStateCPU *state,
+                                                             IntegratorShadowState state,
                                                             ccl_private const Ray *ray,
                                                             uint visibility,
                                                             uint max_hits,
@ -702,7 +801,15 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
 #  if EMBREE_MAJOR_VERSION >= 4
  CCLShadowContext ctx;
  rtcInitRayQueryContext(&ctx);
+#    ifdef __KERNEL_ONEAPI__
+  /* NOTE(sirgienko): Cycles GPU back-ends passes NULL to KernelGlobals and
+   * uses global device allocation (CUDA, Optix, HIP) or passes all needed data
+   * as a class context (Metal, oneAPI). So we need to pass this context here
+   * in order to have an access to it later in Embree filter functions on GPU. */
+  ctx.kg = (KernelGlobals)this;
+#    else
  ctx.kg = kg;
+#    endif
 #  else
  CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
  rtcInitIntersectContext(&ctx);
@ -718,7 +825,8 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
 #  if EMBREE_MAJOR_VERSION >= 4
  RTCOccludedArguments args;
  rtcInitOccludedArguments(&args);
-  args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_shadow_all_func;
+  args.filter = reinterpret_cast<RTCFilterFunctionN>(
+      kernel_embree_filter_occluded_shadow_all_func);
  args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
  args.context = &ctx;
  rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
@ -742,7 +850,15 @@ ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
 #  if EMBREE_MAJOR_VERSION >= 4
  CCLVolumeContext ctx;
  rtcInitRayQueryContext(&ctx);
+#    ifdef __KERNEL_ONEAPI__
+  /* NOTE(sirgienko) Cycles GPU back-ends passes NULL to KernelGlobals and
+   * uses global device allocation (CUDA, Optix, HIP) or passes all needed data
+   * as a class context (Metal, oneAPI). So we need to pass this context here
+   * in order to have an access to it later in Embree filter functions on GPU. */
+  ctx.kg = (KernelGlobals)this;
+#    else
  ctx.kg = kg;
+#    endif
 #  else
  CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
  rtcInitIntersectContext(&ctx);
@ -756,7 +872,8 @@ ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
 #  if EMBREE_MAJOR_VERSION >= 4
  RTCOccludedArguments args;
  rtcInitOccludedArguments(&args);
-  args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_volume_all_func;
+  args.filter = reinterpret_cast<RTCFilterFunctionN>(
+      kernel_embree_filter_occluded_volume_all_func);
  args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
  args.context = &ctx;
  rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@ -128,6 +128,12 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
 }
 ccl_gpu_kernel_postfix

+/* Intersection kernels need access to the kernel handler for specialization constants to work
+ * properly. */
+#ifdef __KERNEL_ONEAPI__
+#  include "kernel/device/oneapi/context_intersect_begin.h"
+#endif
+
 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
    ccl_gpu_kernel_signature(integrator_intersect_closest,
                             ccl_global const int *path_index_array,
@ -185,6 +191,10 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
 }
 ccl_gpu_kernel_postfix

+#ifdef __KERNEL_ONEAPI__
+#  include "kernel/device/oneapi/context_intersect_end.h"
+#endif
+
 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
    ccl_gpu_kernel_signature(integrator_shade_background,
                             ccl_global const int *path_index_array,
@ -249,6 +259,12 @@ ccl_gpu_kernel_postfix
 constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]];
 #endif

+/* Kernels using intersections need access to the kernel handler for specialization constants to
+ * work properly. */
+#ifdef __KERNEL_ONEAPI__
+#  include "kernel/device/oneapi/context_intersect_begin.h"
+#endif
+
 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
    ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
                             ccl_global const int *path_index_array,
@ -287,6 +303,9 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
  }
 }
 ccl_gpu_kernel_postfix
+#ifdef __KERNEL_ONEAPI__
+#  include "kernel/device/oneapi/context_intersect_end.h"
+#endif

 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
    ccl_gpu_kernel_signature(integrator_shade_volume,
--- a/intern/cycles/kernel/device/oneapi/compat.h
+++ b/intern/cycles/kernel/device/oneapi/compat.h
@ -5,6 +5,11 @@

 #define __KERNEL_GPU__
 #define __KERNEL_ONEAPI__
+#define __KERNEL_64_BIT__
+
+#ifdef WITH_EMBREE_GPU
+#  define __KERNEL_GPU_RAYTRACING__
+#endif

 #define CCL_NAMESPACE_BEGIN
 #define CCL_NAMESPACE_END
@ -57,17 +62,19 @@
 #define ccl_gpu_kernel_threads(block_num_threads)

 #ifndef WITH_ONEAPI_SYCL_HOST_TASK
-#  define ccl_gpu_kernel_signature(name, ...) \
+#  define __ccl_gpu_kernel_signature(name, ...) \
 void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
                          size_t kernel_global_size, \
                          size_t kernel_local_size, \
                          sycl::handler &cgh, \
                          __VA_ARGS__) { \
      (kg); \
-      cgh.parallel_for<class kernel_##name>( \
+      cgh.parallel_for( \
          sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
          [=](sycl::nd_item<1> item) {

+#  define ccl_gpu_kernel_signature __ccl_gpu_kernel_signature
+
 #  define ccl_gpu_kernel_postfix \
          }); \
    }
--- a/intern/cycles/kernel/device/oneapi/context_intersect_begin.h
+++ b/intern/cycles/kernel/device/oneapi/context_intersect_begin.h
@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2023 Intel Corporation */
+
+#if !defined(WITH_ONEAPI_SYCL_HOST_TASK) && defined(WITH_EMBREE_GPU)
+#  undef ccl_gpu_kernel_signature
+#  define ccl_gpu_kernel_signature(name, ...) \
+    void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
+                              size_t kernel_global_size, \
+                              size_t kernel_local_size, \
+                              sycl::handler &cgh, \
+                              __VA_ARGS__) \
+    { \
+      (kg); \
+      cgh.parallel_for( \
+          sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
+          [=](sycl::nd_item<1> item, sycl::kernel_handler oneapi_kernel_handler) { \
+            ((ONEAPIKernelContext*)kg)->kernel_handler = oneapi_kernel_handler;
+#endif
--- a/intern/cycles/kernel/device/oneapi/context_intersect_end.h
+++ b/intern/cycles/kernel/device/oneapi/context_intersect_end.h
@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2023 Intel Corporation */
+
+#if !defined(WITH_ONEAPI_SYCL_HOST_TASK) && defined(WITH_EMBREE_GPU)
+#  undef ccl_gpu_kernel_signature
+#  define ccl_gpu_kernel_signature __ccl_gpu_kernel_signature
+#endif
--- a/intern/cycles/kernel/device/oneapi/globals.h
+++ b/intern/cycles/kernel/device/oneapi/globals.h
@ -31,6 +31,8 @@ typedef struct KernelGlobalsGPU {
  size_t nd_item_group_range_0;
  size_t nd_item_global_id_0;
  size_t nd_item_global_range_0;
+#else
+  sycl::kernel_handler kernel_handler;
 #endif
 } KernelGlobalsGPU;

--- a/intern/cycles/kernel/device/oneapi/kernel.cpp
+++ b/intern/cycles/kernel/device/oneapi/kernel.cpp
@ -16,9 +16,22 @@

 #  include "kernel/device/gpu/kernel.h"

+#  include "device/kernel.cpp"
+
 static OneAPIErrorCallback s_error_cb = nullptr;
 static void *s_error_user_ptr = nullptr;

+#  ifdef WITH_EMBREE_GPU
+static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_BASIC_FEATURES =
+    (const RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE |
+                            RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS |
+                            RTC_FEATURE_FLAG_POINT | RTC_FEATURE_FLAG_MOTION_BLUR);
+static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_ALL_FEATURES =
+    (const RTCFeatureFlags)(CYCLES_ONEAPI_EMBREE_BASIC_FEATURES |
+                            RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE |
+                            RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE);
+#  endif
+
 void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr)
 {
  s_error_cb = cb;
@ -142,15 +155,96 @@ size_t oneapi_kernel_preferred_local_size(SyclQueue *queue,
  return std::min(limit_work_group_size, preferred_work_group_size);
 }

-bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
+bool oneapi_kernel_is_required_for_features(const std::string &kernel_name,
+                                            const uint kernel_features)
+{
+  if ((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0 &&
+      kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE)) !=
+          std::string::npos)
+    return false;
+  if ((kernel_features & KERNEL_FEATURE_MNEE) == 0 &&
+      kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE)) !=
+          std::string::npos)
+    return false;
+  if ((kernel_features & KERNEL_FEATURE_VOLUME) == 0 &&
+      kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK)) !=
+          std::string::npos)
+    return false;
+
+  return true;
+}
+
+bool oneapi_kernel_is_using_embree(const std::string &kernel_name)
+{
+#  ifdef WITH_EMBREE_GPU
+  /* MNEE and Ray-trace kernels aren't yet enabled to use Embree. */
+  for (int i = 0; i < (int)DEVICE_KERNEL_NUM; i++) {
+    DeviceKernel kernel = (DeviceKernel)i;
+    if (device_kernel_has_intersection(kernel)) {
+      if (kernel_name.find(device_kernel_as_string(kernel)) != std::string::npos) {
+        return !(kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
+                 kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE);
+      }
+    }
+  }
+#  endif
+  return false;
+}
+
+bool oneapi_load_kernels(SyclQueue *queue_,
+                         const uint kernel_features,
+                         bool use_hardware_raytracing)
 {
-#  ifdef SYCL_SKIP_KERNELS_PRELOAD
-  (void)queue_;
-  (void)requested_features;
-#  else
  assert(queue_);
  sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);

+#  ifdef WITH_EMBREE_GPU
+  /* For best performance, we always JIT compile the kernels that are using Embree. */
+  if (use_hardware_raytracing) {
+    try {
+      sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
+          sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
+                                                             {queue->get_device()});
+
+      for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
+        const std::string &kernel_name = kernel_id.get_name();
+
+        if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
+            !oneapi_kernel_is_using_embree(kernel_name)) {
+          continue;
+        }
+
+        sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
+            sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
+
+        /* Hair requires embree curves support. */
+        if (kernel_features & KERNEL_FEATURE_HAIR) {
+          one_kernel_bundle_input
+              .set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
+                  CYCLES_ONEAPI_EMBREE_ALL_FEATURES);
+          sycl::build(one_kernel_bundle_input);
+        }
+        else {
+          one_kernel_bundle_input
+              .set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
+                  CYCLES_ONEAPI_EMBREE_BASIC_FEATURES);
+          sycl::build(one_kernel_bundle_input);
+        }
+      }
+    }
+    catch (sycl::exception const &e) {
+      if (s_error_cb) {
+        s_error_cb(e.what(), s_error_user_ptr);
+      }
+      return false;
+    }
+  }
+#  endif
+
+#  ifdef WITH_CYCLES_ONEAPI_BINARIES
+  (void)queue_;
+  (void)kernel_features;
+#  else
  try {
    sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
        sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
@ -159,27 +253,25 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
    for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
      const std::string &kernel_name = kernel_id.get_name();

-      /* NOTE(@nsirgien): Names in this conditions below should match names from
-       * oneapi_call macro in oneapi_enqueue_kernel below */
-      if (((requested_features & KERNEL_FEATURE_VOLUME) == 0) &&
-          kernel_name.find("oneapi_kernel_integrator_shade_volume") != std::string::npos) {
+      /* In case HWRT is on, compilation of kernels using Embree is already handled in previous
+       * block. */
+      if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
+          (use_hardware_raytracing && oneapi_kernel_is_using_embree(kernel_name))) {
        continue;
      }

-      if (((requested_features & KERNEL_FEATURE_MNEE) == 0) &&
-          kernel_name.find("oneapi_kernel_integrator_shade_surface_mnee") != std::string::npos) {
-        continue;
-      }
-
-      if (((requested_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0) &&
-          kernel_name.find("oneapi_kernel_integrator_shade_surface_raytrace") !=
-              std::string::npos) {
-        continue;
-      }
-
-      sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle =
+      sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
          sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
-      sycl::build(one_kernel_bundle);
+#    ifdef WITH_EMBREE_GPU
+      /* This is expected to be the default, we set it again to be sure. */
+      if (one_kernel_bundle_input
+              .has_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>()) {
+        one_kernel_bundle_input
+            .set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
+                RTC_FEATURE_FLAG_NONE);
+      }
+#    endif
+      sycl::build(one_kernel_bundle_input);
    }
  }
  catch (sycl::exception const &e) {
@ -195,6 +287,8 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
 bool oneapi_enqueue_kernel(KernelContext *kernel_context,
                           int kernel,
                           size_t global_size,
+                           const uint kernel_features,
+                           bool use_hardware_raytracing,
                           void **args)
 {
  bool success = true;
@ -248,6 +342,21 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,

  try {
    queue->submit([&](sycl::handler &cgh) {
+#  ifdef WITH_EMBREE_GPU
+      /* Spec says it has no effect if the called kernel doesn't support the below specialization
+       * constant but it can still trigger a recompilation, so we set it only if needed. */
+      if (device_kernel_has_intersection(device_kernel)) {
+        const RTCFeatureFlags used_embree_features = !use_hardware_raytracing ?
+                                                         RTC_FEATURE_FLAG_NONE :
+                                                     !(kernel_features & KERNEL_FEATURE_HAIR) ?
+                                                         CYCLES_ONEAPI_EMBREE_BASIC_FEATURES :
+                                                         CYCLES_ONEAPI_EMBREE_ALL_FEATURES;
+        cgh.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
+            used_embree_features);
+      }
+#  else
+      (void)kernel_features;
+#  endif
      switch (device_kernel) {
        case DEVICE_KERNEL_INTEGRATOR_RESET: {
          oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_reset);
@ -549,4 +658,5 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
 #  endif
  return success;
 }
+
 #endif /* WITH_ONEAPI */
--- a/intern/cycles/kernel/device/oneapi/kernel.h
+++ b/intern/cycles/kernel/device/oneapi/kernel.h
@ -47,10 +47,14 @@ CYCLES_KERNEL_ONEAPI_EXPORT size_t oneapi_kernel_preferred_local_size(
 CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_enqueue_kernel(KernelContext *context,
                                                       int kernel,
                                                       size_t global_size,
+                                                       const unsigned int kernel_features,
+                                                       bool use_hardware_raytracing,
                                                       void **args);
 CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_load_kernels(SyclQueue *queue,
-                                                     const unsigned int requested_features);
+                                                     const unsigned int kernel_features,
+                                                     bool use_hardware_raytracing);
 #  ifdef __cplusplus
 }
+
 #  endif
 #endif /* WITH_ONEAPI */
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@ -3,8 +3,9 @@

 #pragma once

-#if !defined(__KERNEL_GPU__) && defined(WITH_EMBREE)
-#  if EMBREE_MAJOR_VERSION >= 4
+#if (!defined(__KERNEL_GPU__) || (defined(__KERNEL_ONEAPI__) && defined(WITH_EMBREE_GPU))) && \
+    defined(WITH_EMBREE)
+#  if EMBREE_MAJOR_VERSION == 4
 #    include <embree4/rtcore.h>
 #    include <embree4/rtcore_scene.h>
 #  else
--- a/intern/cycles/scene/geometry.cpp
+++ b/intern/cycles/scene/geometry.cpp
@ -194,8 +194,8 @@ void Geometry::compute_bvh(Device *device,

  compute_bounds();

-  const BVHLayout bvh_layout = BVHParams::best_bvh_layout(params->bvh_layout,
-                                                          device->get_bvh_layout_mask());
+  const BVHLayout bvh_layout = BVHParams::best_bvh_layout(
+      params->bvh_layout, device->get_bvh_layout_mask(dscene->data.kernel_features));
  if (need_build_bvh(bvh_layout)) {
    string msg = "Updating Geometry BVH ";
    if (name.empty())
@ -1235,8 +1235,8 @@ void GeometryManager::device_update_bvh(Device *device,

  BVHParams bparams;
  bparams.top_level = true;
-  bparams.bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout,
-                                                  device->get_bvh_layout_mask());
+  bparams.bvh_layout = BVHParams::best_bvh_layout(
+      scene->params.bvh_layout, device->get_bvh_layout_mask(dscene->data.kernel_features));
  bparams.use_spatial_split = scene->params.use_bvh_spatial_split;
  bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
                                scene->params.use_bvh_unaligned_nodes;
@ -1889,8 +1889,8 @@ void GeometryManager::device_update(Device *device,
  /* Device update. */
  device_free(device, dscene, false);

-  const BVHLayout bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout,
-                                                          device->get_bvh_layout_mask());
+  const BVHLayout bvh_layout = BVHParams::best_bvh_layout(
+      scene->params.bvh_layout, device->get_bvh_layout_mask(dscene->data.kernel_features));
  geom_calc_offset(scene, bvh_layout);
  if (true_displacement_used || curve_shadow_transparency_used) {
    scoped_callback_timer timer([scene](double time) {
@ -2051,8 +2051,8 @@ void GeometryManager::device_update(Device *device,

  /* Always set BVH layout again after displacement where it was set to none,
   * to avoid ray-tracing at that stage. */
-  dscene->data.bvh.bvh_layout = BVHParams::best_bvh_layout(scene->params.bvh_layout,
-                                                           device->get_bvh_layout_mask());
+  dscene->data.bvh.bvh_layout = BVHParams::best_bvh_layout(
+      scene->params.bvh_layout, device->get_bvh_layout_mask(dscene->data.kernel_features));

  {
    scoped_callback_timer timer([scene](double time) {
--- a/intern/cycles/scene/object.cpp
+++ b/intern/cycles/scene/object.cpp
@ -595,7 +595,7 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
 void ObjectManager::device_update_prim_offsets(Device *device, DeviceScene *dscene, Scene *scene)
 {
  if (!scene->integrator->get_use_light_tree()) {
-    BVHLayoutMask layout_mask = device->get_bvh_layout_mask();
+    BVHLayoutMask layout_mask = device->get_bvh_layout_mask(dscene->data.kernel_features);
    if (layout_mask != BVH_LAYOUT_METAL && layout_mask != BVH_LAYOUT_MULTI_METAL &&
        layout_mask != BVH_LAYOUT_MULTI_METAL_EMBREE) {
      return;
--- a/intern/cycles/session/session.cpp
+++ b/intern/cycles/session/session.cpp
@ -621,12 +621,12 @@ void Session::set_pause(bool pause)

 void Session::set_output_driver(unique_ptr<OutputDriver> driver)
 {
-  path_trace_->set_output_driver(move(driver));
+  path_trace_->set_output_driver(std::move(driver));
 }

 void Session::set_display_driver(unique_ptr<DisplayDriver> driver)
 {
-  path_trace_->set_display_driver(move(driver));
+  path_trace_->set_display_driver(std::move(driver));
 }

 double Session::get_estimated_remaining_time() const
--- a/intern/cycles/session/tile.cpp
+++ b/intern/cycles/session/tile.cpp
@ -285,7 +285,7 @@ static bool configure_image_spec_from_buffer(ImageSpec *image_spec,
  *image_spec = ImageSpec(
      buffer_params.width, buffer_params.height, num_channels, TypeDesc::FLOAT);

-  image_spec->channelnames = move(channel_names);
+  image_spec->channelnames = std::move(channel_names);

  if (!buffer_params_to_image_spec_atttributes(image_spec, buffer_params)) {
    return false;
--- a/intern/cycles/util/path.cpp
+++ b/intern/cycles/util/path.cpp
@ -809,7 +809,7 @@ static string path_source_replace_includes_recursive(const string &_source,
  const size_t source_length = source.length();
  size_t index = 0;
  /* Information about where we are in the source. */
-  size_t line_number = 0, column_number = 1;
+  size_t column_number = 1;
  /* Currently gathered non-preprocessor token.
   * Store as start/length rather than token itself to avoid overhead of
   * memory re-allocations on each character concatenation.
@ -842,7 +842,6 @@ static string path_source_replace_includes_recursive(const string &_source,
        preprocessor_line = "";
      }
      column_number = 0;
-      ++line_number;
    }
    else if (ch == '#' && column_number == 1 && !inside_preprocessor) {
      /* Append all possible non-preprocessor token to the result. */
--- a/intern/cycles/util/vector.h
+++ b/intern/cycles/util/vector.h
@ -4,7 +4,6 @@
 #ifndef __UTIL_VECTOR_H__
 #define __UTIL_VECTOR_H__

-#include <cassert>
 #include <cstring>
 #include <vector>

--- a/intern/dualcon/intern/octree.cpp
+++ b/intern/dualcon/intern/octree.cpp
@ -897,12 +897,10 @@ void Octree::printPath(PathElement *path)
 void Octree::printPaths(PathList *path)
 {
  PathList *iter = path;
-  int i = 0;
  while (iter != NULL) {
    dc_printf("Path %d:\n", i);
    printPath(iter);
    iter = iter->next;
-    i++;
  }
 }

@ -1256,7 +1254,6 @@ Node *Octree::connectFace(

    updateParent(&newnode->internal, len, st);

-    int flag = 0;
    // Add the cells to the rings and fill in the patch
    PathElement *newEleN;
    if (curEleN->pos[0] != stN[0] || curEleN->pos[1] != stN[1] || curEleN->pos[2] != stN[2]) {
@ -1286,7 +1283,6 @@ Node *Octree::connectFace(
                           alpha);

      curEleN = newEleN;
-      flag++;
    }

    PathElement *newEleP;
@ -1316,7 +1312,6 @@ Node *Octree::connectFace(
                           alpha);

      curEleP = newEleP;
-      flag++;
    }

    /*
@ -1543,6 +1538,8 @@ void Octree::getFacePoint(PathElement *leaf, int dir, int &x, int &y, float &p,
  float off[3];
  int num = 0, num2 = 0;

+  (void)num2;  // Unused in release builds.
+
  LeafNode *leafnode = locateLeaf(leaf->pos);
  for (int i = 0; i < 4; i++) {
    int edgeind = faceMap[dir * 2][i];
--- a/intern/ghost/intern/GHOST_SystemWayland.cc
+++ b/intern/ghost/intern/GHOST_SystemWayland.cc
@ -193,6 +193,8 @@ static bool use_gnome_confine_hack = false;
 #  define USE_GNOME_NEEDS_LIBDECOR_HACK
 #endif

+/** \} */
+
 /* -------------------------------------------------------------------- */
 /** \name Local Defines
 *
@ -6307,6 +6309,7 @@ GHOST_IContext *GHOST_SystemWayland::createOffscreenContext(GHOST_GLSettings glS
      delete context;
      return nullptr;
    }
+    context->setUserData(wl_surface);
    return context;
  }
 #else
@ -6345,7 +6348,9 @@ GHOST_TSuccess GHOST_SystemWayland::disposeContext(GHOST_IContext *context)
  delete context;

  wl_egl_window *egl_window = (wl_egl_window *)wl_surface_get_user_data(wl_surface);
-  wl_egl_window_destroy(egl_window);
+  if (egl_window != nullptr) {
+    wl_egl_window_destroy(egl_window);
+  }
  wl_surface_destroy(wl_surface);

  return GHOST_kSuccess;
--- a/intern/iksolver/intern/IK_QJacobianSolver.cpp
+++ b/intern/iksolver/intern/IK_QJacobianSolver.cpp
@ -74,7 +74,7 @@ bool IK_QJacobianSolver::Setup(IK_QSegment *root, std::list<IK_QTask *> &tasks)
    return false;

  // compute task ids and assign weights to task
-  int primary_size = 0, primary = 0;
+  int primary_size = 0;
  int secondary_size = 0, secondary = 0;
  double primary_weight = 0.0, secondary_weight = 0.0;
  std::list<IK_QTask *>::iterator task;
@ -86,7 +86,6 @@ bool IK_QJacobianSolver::Setup(IK_QSegment *root, std::list<IK_QTask *> &tasks)
      qtask->SetId(primary_size);
      primary_size += qtask->Size();
      primary_weight += qtask->Weight();
-      primary++;
    }
    else {
      qtask->SetId(secondary_size);
--- a/intern/libmv/libmv/autotrack/autotrack.cc
+++ b/intern/libmv/libmv/autotrack/autotrack.cc
@ -283,15 +283,13 @@ void AutoTrack::DetectAndTrack(const DetectAndTrackOptions& options) {

      // Find tracks in the previous frame that are not in this one.
      vector<Marker*> previous_frame_markers_to_track;
-      int num_skipped = 0;
      for (int i = 0; i < previous_frame_markers.size(); ++i) {
        if (std::binary_search(tracks_in_this_frame.begin(),
                               tracks_in_this_frame.end(),
                               previous_frame_markers[i].track)) {
-          num_skipped++;
-        } else {
-          previous_frame_markers_to_track.push_back(&previous_frame_markers[i]);
+          continue;
        }
+        previous_frame_markers_to_track.push_back(&previous_frame_markers[i]);
      }

      // Finally track the markers from the last frame into this one.
--- a/intern/libmv/libmv/simple_pipeline/keyframe_selection.cc
+++ b/intern/libmv/libmv/simple_pipeline/keyframe_selection.cc
@ -140,7 +140,6 @@ void SelectKeyframesBasedOnGRICAndVariance(const Tracks& _tracks,

  int max_image = filtered_tracks.MaxImage();
  int next_keyframe = 1;
-  int number_keyframes = 0;

  // Limit correspondence ratio from both sides.
  // On the one hand if number of correspondent features is too low,
@ -162,7 +161,6 @@ void SelectKeyframesBasedOnGRICAndVariance(const Tracks& _tracks,

    LG << "Found keyframe " << next_keyframe;

-    number_keyframes++;
    next_keyframe = -1;

    for (int candidate_image = current_keyframe + 1;
@ -406,7 +404,6 @@ void SelectKeyframesBasedOnGRICAndVariance(const Tracks& _tracks,
    // However, it's just quick hack and smarter way to do this would be nice
    if (next_keyframe == -1) {
      next_keyframe = current_keyframe + 10;
-      number_keyframes = 0;

      if (next_keyframe >= max_image)
        break;
--- a/intern/opensubdiv/internal/topology/topology_refiner_factory.cc
+++ b/intern/opensubdiv/internal/topology/topology_refiner_factory.cc
@ -380,7 +380,7 @@ TopologyRefinerImpl *TopologyRefinerImpl::createFromConverter(
  TopologyRefinerImpl *topology_refiner_impl = new TopologyRefinerImpl();
  topology_refiner_impl->topology_refiner = topology_refiner;
  topology_refiner_impl->settings = settings;
-  topology_refiner_impl->base_mesh_topology = move(base_mesh_topology);
+  topology_refiner_impl->base_mesh_topology = std::move(base_mesh_topology);

  return topology_refiner_impl;
 }
--- a/intern/renderdoc_dynload/CMakeLists.txt
+++ b/intern/renderdoc_dynload/CMakeLists.txt
@ -14,4 +14,4 @@ set(SRC
    include/renderdoc_api.hh
 )

-blender_add_lib(bf_intern_renderdoc_dynload "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
+blender_add_lib(bf_intern_renderdoc_dynload "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
--- a/scripts/presets/interface_theme/Blender_Light.xml
+++ b/scripts/presets/interface_theme/Blender_Light.xml
@ -869,10 +869,10 @@
        </space>
        <space_list>
          <ThemeSpaceListGeneric
-            list="#181818"
-            list_title="#ffffff"
-            list_text="#ffffff"
-            list_text_hi="#ffffff"
+            list="#b3b3b3"
+            list_title="#747474"
+            list_text="#333333"
+            list_text_hi="#747474"
            >
          </ThemeSpaceListGeneric>
        </space_list>
@ -1240,7 +1240,7 @@
            title="#000000"
            text="#000000"
            text_hi="#ffffff"
-            header="#727272ff"
+            header="#b3b3b3ff"
            header_text="#000000"
            header_text_hi="#ffffff"
            button="#7272727f"
@ -1317,7 +1317,7 @@
            title="#ffffff"
            text="#ffffff"
            text_hi="#ffffff"
-            header="#999999ff"
+            header="#b3b3b3ff"
            header_text="#1a1a1a"
            header_text_hi="#ffffff"
            button="#2f303500"
--- a/scripts/presets/keyconfig/keymap_data/blender_default.py
+++ b/scripts/presets/keyconfig/keymap_data/blender_default.py
@ -5639,6 +5639,10 @@ def km_font(params):
         {"properties": [("type", 'PREVIOUS_PAGE')]}),
        ("font.move", {"type": 'PAGE_DOWN', "value": 'PRESS', "repeat": True},
         {"properties": [("type", 'NEXT_PAGE')]}),
+        ("font.move", {"type": 'HOME', "value": 'PRESS', "ctrl": True, "repeat": True},
+         {"properties": [("type", 'TEXT_BEGIN')]}),
+        ("font.move", {"type": 'END', "value": 'PRESS', "ctrl": True, "repeat": True},
+         {"properties": [("type", 'TEXT_END')]}),
        ("font.move_select", {"type": 'HOME', "value": 'PRESS', "shift": True},
         {"properties": [("type", 'LINE_BEGIN')]}),
        ("font.move_select", {"type": 'END', "value": 'PRESS', "shift": True},
@ -5659,6 +5663,10 @@ def km_font(params):
         {"properties": [("type", 'PREVIOUS_PAGE')]}),
        ("font.move_select", {"type": 'PAGE_DOWN', "value": 'PRESS', "shift": True, "repeat": True},
         {"properties": [("type", 'NEXT_PAGE')]}),
+        ("font.move_select", {"type": 'HOME', "value": 'PRESS', "shift": True, "ctrl": True, "repeat": True},
+         {"properties": [("type", 'TEXT_BEGIN')]}),
+        ("font.move_select", {"type": 'END', "value": 'PRESS', "shift": True, "ctrl": True, "repeat": True},
+         {"properties": [("type", 'TEXT_END')]}),
        ("font.change_spacing", {"type": 'LEFT_ARROW', "value": 'PRESS', "alt": True, "repeat": True},
         {"properties": [("delta", -1.0)]}),
        ("font.change_spacing", {"type": 'RIGHT_ARROW', "value": 'PRESS', "alt": True, "repeat": True},
--- a/scripts/presets/keyconfig/keymap_data/industry_compatible_data.py
+++ b/scripts/presets/keyconfig/keymap_data/industry_compatible_data.py
@ -3825,6 +3825,10 @@ def km_font(params):
         {"properties": [("type", 'PREVIOUS_PAGE')]}),
        ("font.move", {"type": 'PAGE_DOWN', "value": 'PRESS', "repeat": True},
         {"properties": [("type", 'NEXT_PAGE')]}),
+        ("font.move", {"type": 'HOME', "value": 'PRESS', "ctrl": True, "repeat": True},
+         {"properties": [("type", 'TEXT_BEGIN')]}),
+        ("font.move", {"type": 'END', "value": 'PRESS', "ctrl": True, "repeat": True},
+         {"properties": [("type", 'TEXT_END')]}),
        ("font.move_select", {"type": 'HOME', "value": 'PRESS', "shift": True},
         {"properties": [("type", 'LINE_BEGIN')]}),
        ("font.move_select", {"type": 'END', "value": 'PRESS', "shift": True},
@ -3845,6 +3849,10 @@ def km_font(params):
         {"properties": [("type", 'PREVIOUS_PAGE')]}),
        ("font.move_select", {"type": 'PAGE_DOWN', "value": 'PRESS', "shift": True, "repeat": True},
         {"properties": [("type", 'NEXT_PAGE')]}),
+        ("font.move_select", {"type": 'HOME', "value": 'PRESS', "shift": True, "ctrl": True, "repeat": True},
+         {"properties": [("type", 'TEXT_BEGIN')]}),
+        ("font.move_select", {"type": 'END', "value": 'PRESS', "shift": True, "ctrl": True, "repeat": True},
+         {"properties": [("type", 'TEXT_END')]}),
        ("font.change_spacing", {"type": 'LEFT_ARROW', "value": 'PRESS', "alt": True, "repeat": True},
         {"properties": [("delta", -1)]}),
        ("font.change_spacing", {"type": 'RIGHT_ARROW', "value": 'PRESS', "alt": True, "repeat": True},
--- a/scripts/startup/bl_ui/properties_data_curve.py
+++ b/scripts/startup/bl_ui/properties_data_curve.py
@ -363,11 +363,18 @@ class DATA_PT_font(CurveButtonsPanelText, Panel):
        if mode == 'EDIT_TEXT':
            layout.separator()

-            row = layout.row(align=True)
-            row.prop(char, "use_bold", toggle=True)
-            row.prop(char, "use_italic", toggle=True)
-            row.prop(char, "use_underline", toggle=True)
-            row.prop(char, "use_small_caps", toggle=True)
+            if not text.has_selection:
+                row = layout.row(align=True)
+                row.prop(char, "use_bold", toggle=True)
+                row.prop(char, "use_italic", toggle=True)
+                row.prop(char, "use_underline", toggle=True)
+                row.prop(char, "use_small_caps", toggle=True)
+            else:
+                row = layout.row(align=True)
+                row.operator("font.style_toggle", text="Bold", icon='BOLD' , depress = text.is_select_bold).style = 'BOLD'
+                row.operator("font.style_toggle", text="Italic", icon='ITALIC' , depress = text.is_select_italic).style = 'ITALIC'
+                row.operator("font.style_toggle", text="Underline", icon='UNDERLINE' , depress = text.is_select_underline).style = 'UNDERLINE'
+                row.operator("font.style_toggle", text="Small Caps", icon='SMALL_CAPS' , depress = text.is_select_smallcaps).style = 'SMALL_CAPS'


 class DATA_PT_font_transform(CurveButtonsPanelText, Panel):
--- a/scripts/startup/bl_ui/space_filebrowser.py
+++ b/scripts/startup/bl_ui/space_filebrowser.py
@ -538,6 +538,7 @@ class FILEBROWSER_MT_context_menu(FileBrowserMenu, Menu):
        layout.operator("file.next", text="Forward")
        layout.operator("file.parent", text="Go to Parent")
        layout.operator("file.refresh", text="Refresh")
+        layout.menu("FILEBROWSER_MT_operations_menu")

        layout.separator()

--- a/scripts/startup/bl_ui/space_view3d.py
+++ b/scripts/startup/bl_ui/space_view3d.py
@ -1806,6 +1806,11 @@ class VIEW3D_MT_select_edit_text(Menu):

        layout.separator()

+        layout.operator("font.move_select", text="Top").type = 'TEXT_BEGIN'
+        layout.operator("font.move_select", text="Bottom").type = 'TEXT_END'
+
+        layout.separator()
+
        layout.operator("font.move_select", text="Previous Block").type = 'PREVIOUS_PAGE'
        layout.operator("font.move_select", text="Next Block").type = 'NEXT_PAGE'

--- a/source/blender/blenfont/intern/blf_font.c
+++ b/source/blender/blenfont/intern/blf_font.c
@ -129,6 +129,8 @@ static void blf_size_finalizer(void *object)
  font->ft_size = NULL;
 }

+/** \} */
+
 /* -------------------------------------------------------------------- */
 /** \name FreeType Utilities (Internal)
 * \{ */
--- a/source/blender/blenfont/intern/blf_glyph.c
+++ b/source/blender/blenfont/intern/blf_glyph.c
@ -286,7 +286,7 @@ static GlyphBLF *blf_glyph_cache_add_glyph(
 *
 * This table can be used to find a coverage bit based on a charcode.
 * Later we can get default language and script from `codepoint`.
- */
+ * \{ */

 struct UnicodeBlock {
  uint first;
--- a/source/blender/blenkernel/BKE_attribute.hh
+++ b/source/blender/blenkernel/BKE_attribute.hh
@ -86,6 +86,8 @@ struct AttributeInit {
    VArray,
    /** #AttributeInitMoveArray. */
    MoveArray,
+    /** #AttributeInitShared. */
+    Shared,
  };
  Type type;
  AttributeInit(const Type type) : type(type) {}
@ -121,9 +123,6 @@ struct AttributeInitVArray : public AttributeInit {
 * Sometimes data is created before a geometry component is available. In that case, it's
 * preferable to move data directly to the created attribute to avoid a new allocation and a copy.
 *
- * Note that this will only have a benefit for attributes that are stored directly as contiguous
- * arrays, so not for some built-in attributes.
- *
 * The array must be allocated with MEM_*, since `attribute_try_create` will free the array if it
 * can't be used directly, and that is generally how Blender expects custom data to be allocated.
 */
@ -133,6 +132,20 @@ struct AttributeInitMoveArray : public AttributeInit {
  AttributeInitMoveArray(void *data) : AttributeInit(Type::MoveArray), data(data) {}
 };

+/**
+ * Create a shared attribute by adding a user to a shared data array.
+ * The sharing info has ownership of the provided contiguous array.
+ */
+struct AttributeInitShared : public AttributeInit {
+  const void *data = nullptr;
+  const ImplicitSharingInfo *sharing_info = nullptr;
+
+  AttributeInitShared(const void *data, const ImplicitSharingInfo &sharing_info)
+      : AttributeInit(Type::Shared), data(data), sharing_info(&sharing_info)
+  {
+  }
+};
+
 /* Returns false when the iteration should be stopped. */
 using AttributeForeachCallback =
    FunctionRef<bool(const AttributeIDRef &attribute_id, const AttributeMetaData &meta_data)>;
@ -151,6 +164,21 @@ template<typename T> struct AttributeReader {
   */
  eAttrDomain domain;

+  /**
+   * Information about shared ownership of the attribute array. This will only be provided
+   * if the virtual array directly references the contiguous original attribute array.
+   */
+  const ImplicitSharingInfo *sharing_info;
+
+  const VArray<T> &operator*() const
+  {
+    return this->varray;
+  }
+  VArray<T> &operator*()
+  {
+    return this->varray;
+  }
+
  operator bool() const
  {
    return this->varray;
@ -270,15 +298,25 @@ template<typename T> struct SpanAttributeWriter {
 struct GAttributeReader {
  GVArray varray;
  eAttrDomain domain;
+  const ImplicitSharingInfo *sharing_info;

  operator bool() const
  {
    return this->varray;
  }

+  const GVArray &operator*() const
+  {
+    return this->varray;
+  }
+  GVArray &operator*()
+  {
+    return this->varray;
+  }
+
  template<typename T> AttributeReader<T> typed() const
  {
-    return {varray.typed<T>(), domain};
+    return {varray.typed<T>(), domain, sharing_info};
  }
 };

@ -457,15 +495,15 @@ class AttributeAccessor {
   * Get read-only access to the attribute. If necessary, the attribute is interpolated to the
   * given domain, and converted to the given type, in that order.  The result may be empty.
   */
-  GVArray lookup(const AttributeIDRef &attribute_id,
-                 const std::optional<eAttrDomain> domain,
-                 const std::optional<eCustomDataType> data_type) const;
+  GAttributeReader lookup(const AttributeIDRef &attribute_id,
+                          const std::optional<eAttrDomain> domain,
+                          const std::optional<eCustomDataType> data_type) const;

  /**
   * Get read-only access to the attribute whereby the attribute is interpolated to the given
   * domain. The result may be empty.
   */
-  GVArray lookup(const AttributeIDRef &attribute_id, const eAttrDomain domain) const
+  GAttributeReader lookup(const AttributeIDRef &attribute_id, const eAttrDomain domain) const
  {
    return this->lookup(attribute_id, domain, std::nullopt);
  }
@ -474,7 +512,8 @@ class AttributeAccessor {
   * Get read-only access to the attribute whereby the attribute is converted to the given type.
   * The result may be empty.
   */
-  GVArray lookup(const AttributeIDRef &attribute_id, const eCustomDataType data_type) const
+  GAttributeReader lookup(const AttributeIDRef &attribute_id,
+                          const eCustomDataType data_type) const
  {
    return this->lookup(attribute_id, std::nullopt, data_type);
  }
@ -484,8 +523,8 @@ class AttributeAccessor {
   * given domain and then converted to the given type, in that order. The result may be empty.
   */
  template<typename T>
-  VArray<T> lookup(const AttributeIDRef &attribute_id,
-                   const std::optional<eAttrDomain> domain = std::nullopt) const
+  AttributeReader<T> lookup(const AttributeIDRef &attribute_id,
+                            const std::optional<eAttrDomain> domain = std::nullopt) const
  {
    const CPPType &cpp_type = CPPType::get<T>();
    const eCustomDataType data_type = cpp_type_to_custom_data_type(cpp_type);
@ -498,23 +537,23 @@ class AttributeAccessor {
   * If the attribute does not exist, a virtual array with the given default value is returned.
   * If the passed in default value is null, the default value of the type is used (generally 0).
   */
-  GVArray lookup_or_default(const AttributeIDRef &attribute_id,
-                            const eAttrDomain domain,
-                            const eCustomDataType data_type,
-                            const void *default_value = nullptr) const;
+  GAttributeReader lookup_or_default(const AttributeIDRef &attribute_id,
+                                     const eAttrDomain domain,
+                                     const eCustomDataType data_type,
+                                     const void *default_value = nullptr) const;

  /**
   * Same as the generic version above, but should be used when the type is known at compile time.
   */
  template<typename T>
-  VArray<T> lookup_or_default(const AttributeIDRef &attribute_id,
-                              const eAttrDomain domain,
-                              const T &default_value) const
+  AttributeReader<T> lookup_or_default(const AttributeIDRef &attribute_id,
+                                       const eAttrDomain domain,
+                                       const T &default_value) const
  {
-    if (VArray<T> varray = this->lookup<T>(attribute_id, domain)) {
+    if (AttributeReader<T> varray = this->lookup<T>(attribute_id, domain)) {
      return varray;
    }
-    return VArray<T>::ForSingle(default_value, this->domain_size(domain));
+    return {VArray<T>::ForSingle(default_value, this->domain_size(domain)), domain};
  }

  /**
@ -626,6 +665,15 @@ class MutableAttributeAccessor : public AttributeAccessor {
  {
    return fn_->add(owner_, attribute_id, domain, data_type, initializer);
  }
+  template<typename T>
+  bool add(const AttributeIDRef &attribute_id,
+           const eAttrDomain domain,
+           const AttributeInit &initializer)
+  {
+    const CPPType &cpp_type = CPPType::get<T>();
+    const eCustomDataType data_type = cpp_type_to_custom_data_type(cpp_type);
+    return this->add(attribute_id, domain, data_type, initializer);
+  }

  /**
   * Find an attribute with the given id, domain and data type. If it does not exist, create a new
--- a/source/blender/blenkernel/BKE_lib_id.h
+++ b/source/blender/blenkernel/BKE_lib_id.h
@ -465,7 +465,7 @@ struct ID *BKE_id_copy_for_use_in_bmain(struct Main *bmain, const struct ID *id)
 * \note Most internal ID data itself is not swapped (only IDProperties are).
 *
 * \param bmain: May be NULL, in which case there is no guarantee that internal remapping of ID
- * pointers to themselves will be complete (reguarding depsgraph and/or runtime data updates).
+ * pointers to themselves will be complete (regarding depsgraph and/or runtime data updates).
 * \param do_self_remap: Whether to remap internal pointers to itself or not.
 * \param self_remap_flags: Flags controlling self remapping, see BKE_lib_remap.h.
 */
--- a/source/blender/blenkernel/BKE_mesh.hh
+++ b/source/blender/blenkernel/BKE_mesh.hh
@ -210,12 +210,12 @@ inline int edge_other_vert(const int2 &edge, const int vert)

 /** \} */

+}  // namespace blender::bke::mesh
+
 /* -------------------------------------------------------------------- */
 /** \name Inline Mesh Data Access
 * \{ */

-}  // namespace blender::bke::mesh
-
 inline blender::Span<blender::float3> Mesh::vert_positions() const
 {
  return {reinterpret_cast<const blender::float3 *>(BKE_mesh_vert_positions(this)), this->totvert};
--- a/source/blender/blenkernel/BKE_vfont.h
+++ b/source/blender/blenkernel/BKE_vfont.h
@ -28,20 +28,36 @@ typedef struct EditFontSelBox {
  float rot;
 } EditFontSelBox;

+/**
+ * Edit data for #Curve (a text curve, with an #Object::type of `OB_FONT`).
+ * */
 typedef struct EditFont {
+  /** Array of UTF32 code-points. */
  char32_t *textbuf;
+  /** Text style info (aligned with `textbuf`). */
  struct CharInfo *textbufinfo;

-  /* array of rectangles & rotation */
+  /** Array of rectangles & rotation. */
  float textcurs[4][2];
  EditFontSelBox *selboxes;
  int selboxes_len;

-  /* positional vars relative to the textbuf, textbufinfo (not utf8 bytes)
-   * a copy of these is kept in Curve, but use these in editmode */
-  int len, pos;
+  /* Positional vars relative to the `textbuf` (not utf8 bytes)
+   * a copy of these is kept in Curve, but use these in edit-mode. */
+
+  /** Length of `textbuf`. */
+  int len;
+  /** Cursor position of (aligned with `textbuf`). */
+  int pos;
+  /** Text selection start/end, see #BKE_vfont_select_get. */
  int selstart, selend;

+  /**
+   * Combined styles (#CharInfo.flag) for selected string. A flag will be
+   * set only if ALL characters in the selected string have it.
+   */
+  int select_char_info_flag;
+
  /**
   * ID data is older than edit-mode data.
   * Set #Main.is_memfile_undo_flush_needed when enabling.
--- a/source/blender/blenkernel/intern/DerivedMesh.cc
+++ b/source/blender/blenkernel/intern/DerivedMesh.cc
@ -658,11 +658,18 @@ static void mesh_calc_modifiers(struct Depsgraph *depsgraph,
      ASSERT_IS_VALID_MESH(mesh_final);
    }
    MutableAttributeAccessor attributes = mesh_final->attributes_for_write();
-    SpanAttributeWriter<float3> rest_positions =
-        attributes.lookup_or_add_for_write_only_span<float3>("rest_position", ATTR_DOMAIN_POINT);
-    if (rest_positions && attributes.domain_size(ATTR_DOMAIN_POINT) > 0) {
-      attributes.lookup<float3>("position").materialize(rest_positions.span);
-      rest_positions.finish();
+    const AttributeReader positions = attributes.lookup<float3>("position");
+    if (positions) {
+      if (positions.sharing_info && positions.varray.is_span()) {
+        attributes.add<float3>("rest_position",
+                               ATTR_DOMAIN_POINT,
+                               AttributeInitShared(positions.varray.get_internal_span().data(),
+                                                   *positions.sharing_info));
+      }
+      else {
+        attributes.add<float3>(
+            "rest_position", ATTR_DOMAIN_POINT, AttributeInitVArray(positions.varray));
+      }
    }
  }

--- a/source/blender/blenkernel/intern/attribute_access.cc
+++ b/source/blender/blenkernel/intern/attribute_access.cc
@ -218,6 +218,12 @@ static bool add_builtin_type_custom_data_layer_from_init(CustomData &custom_data
      }
      return true;
    }
+    case AttributeInit::Type::Shared: {
+      const AttributeInitShared &init = static_cast<const AttributeInitShared &>(initializer);
+      const void *stored_data = CustomData_add_layer_with_data(
+          &custom_data, data_type, const_cast<void *>(init.data), domain_num, init.sharing_info);
+      return stored_data != nullptr;
+    }
  }

  BLI_assert_unreachable();
@ -293,6 +299,16 @@ static bool add_custom_data_layer_from_attribute_init(const AttributeIDRef &attr
          custom_data, data_type, attribute_id, domain_num, data, nullptr);
      break;
    }
+    case AttributeInit::Type::Shared: {
+      const AttributeInitShared &init = static_cast<const AttributeInitShared &>(initializer);
+      add_generic_custom_data_layer_with_existing_data(custom_data,
+                                                       data_type,
+                                                       attribute_id,
+                                                       domain_num,
+                                                       const_cast<void *>(init.data),
+                                                       init.sharing_info);
+      break;
+    }
  }
  return old_layer_num < custom_data.totlayer;
 }
@ -314,7 +330,7 @@ bool BuiltinCustomDataLayerProvider::layer_exists(const CustomData &custom_data)
  return CustomData_has_layer(&custom_data, stored_type_);
 }

-GVArray BuiltinCustomDataLayerProvider::try_get_for_read(const void *owner) const
+GAttributeReader BuiltinCustomDataLayerProvider::try_get_for_read(const void *owner) const
 {
  const CustomData *custom_data = custom_data_access_.get_const_custom_data(owner);
  if (custom_data == nullptr) {
@ -326,22 +342,23 @@ GVArray BuiltinCustomDataLayerProvider::try_get_for_read(const void *owner) cons
  const int element_num = custom_data_access_.get_element_num(owner);
  if (element_num == 0) {
    if (this->layer_exists(*custom_data)) {
-      return GVArray::ForSpan({type, nullptr, 0});
+      return {GVArray::ForSpan({type, nullptr, 0}), domain_, nullptr};
    }
    return {};
  }

-  const void *data = nullptr;
+  int index;
  if (stored_as_named_attribute_) {
-    data = CustomData_get_layer_named(custom_data, stored_type_, name_.c_str());
+    index = CustomData_get_named_layer_index(custom_data, stored_type_, name_.c_str());
  }
  else {
-    data = CustomData_get_layer(custom_data, stored_type_);
+    index = CustomData_get_layer_index(custom_data, stored_type_);
  }
-  if (data == nullptr) {
+  if (index == -1) {
    return {};
  }
-  return GVArray::ForSpan({type, data, element_num});
+  const CustomDataLayer &layer = custom_data->layers[index];
+  return {GVArray::ForSpan({type, layer.data, element_num}), domain_, layer.sharing_info};
 }

 GAttributeWriter BuiltinCustomDataLayerProvider::try_get_for_write(void *owner) const
@ -472,7 +489,7 @@ GAttributeReader CustomDataAttributeProvider::try_get_for_read(
      continue;
    }
    GSpan data{*type, layer.data, element_num};
-    return {GVArray::ForSpan(data), domain_};
+    return {GVArray::ForSpan(data), domain_, layer.sharing_info};
  }
  return {};
 }
@ -729,50 +746,52 @@ static blender::GVArray try_adapt_data_type(blender::GVArray varray,
  return conversions.try_convert(std::move(varray), to_type);
 }

-GVArray AttributeAccessor::lookup(const AttributeIDRef &attribute_id,
-                                  const std::optional<eAttrDomain> domain,
-                                  const std::optional<eCustomDataType> data_type) const
+GAttributeReader AttributeAccessor::lookup(const AttributeIDRef &attribute_id,
+                                           const std::optional<eAttrDomain> domain,
+                                           const std::optional<eCustomDataType> data_type) const
 {
  GAttributeReader attribute = this->lookup(attribute_id);
  if (!attribute) {
    return {};
  }
-  GVArray varray = std::move(attribute.varray);
  if (domain.has_value()) {
    if (attribute.domain != domain) {
-      varray = this->adapt_domain(varray, attribute.domain, *domain);
-      if (!varray) {
+      attribute.varray = this->adapt_domain(attribute.varray, attribute.domain, *domain);
+      attribute.domain = *domain;
+      attribute.sharing_info = nullptr;
+      if (!attribute.varray) {
        return {};
      }
    }
  }
  if (data_type.has_value()) {
    const CPPType &type = *custom_data_type_to_cpp_type(*data_type);
-    if (varray.type() != type) {
-      varray = try_adapt_data_type(std::move(varray), type);
-      if (!varray) {
+    if (attribute.varray.type() != type) {
+      attribute.varray = try_adapt_data_type(std::move(attribute.varray), type);
+      attribute.sharing_info = nullptr;
+      if (!attribute.varray) {
        return {};
      }
    }
  }
-  return varray;
+  return attribute;
 }

-GVArray AttributeAccessor::lookup_or_default(const AttributeIDRef &attribute_id,
-                                             const eAttrDomain domain,
-                                             const eCustomDataType data_type,
-                                             const void *default_value) const
+GAttributeReader AttributeAccessor::lookup_or_default(const AttributeIDRef &attribute_id,
+                                                      const eAttrDomain domain,
+                                                      const eCustomDataType data_type,
+                                                      const void *default_value) const
 {
-  GVArray varray = this->lookup(attribute_id, domain, data_type);
-  if (varray) {
-    return varray;
+  GAttributeReader attribute = this->lookup(attribute_id, domain, data_type);
+  if (attribute) {
+    return attribute;
  }
  const CPPType &type = *custom_data_type_to_cpp_type(data_type);
  const int64_t domain_size = this->domain_size(domain);
  if (default_value == nullptr) {
-    return GVArray::ForSingleRef(type, domain_size, type.default_value());
+    return {GVArray::ForSingleRef(type, domain_size, type.default_value()), domain, nullptr};
  }
-  return GVArray::ForSingle(type, domain_size, default_value);
+  return {GVArray::ForSingle(type, domain_size, default_value), domain, nullptr};
 }

 Set<AttributeIDRef> AttributeAccessor::all_ids() const
@ -921,7 +940,7 @@ Vector<AttributeTransferData> retrieve_attributes_for_transfer(
          return true;
        }

-        GVArray src = src_attributes.lookup(id, meta_data.domain);
+        GVArray src = *src_attributes.lookup(id, meta_data.domain);
        BLI_assert(src);
        bke::GSpanAttributeWriter dst = dst_attributes.lookup_or_add_for_write_only_span(
            id, meta_data.domain, meta_data.data_type);
@ -933,6 +952,6 @@ Vector<AttributeTransferData> retrieve_attributes_for_transfer(
  return attributes;
 }

-}  // namespace blender::bke
-
 /** \} */
+
+}  // namespace blender::bke
--- a/source/blender/blenkernel/intern/attribute_access_intern.hh
+++ b/source/blender/blenkernel/intern/attribute_access_intern.hh
@ -66,7 +66,7 @@ class BuiltinAttributeProvider {
  {
  }

-  virtual GVArray try_get_for_read(const void *owner) const = 0;
+  virtual GAttributeReader try_get_for_read(const void *owner) const = 0;
  virtual GAttributeWriter try_get_for_write(void *owner) const = 0;
  virtual bool try_delete(void *owner) const = 0;
  virtual bool try_create(void *onwer, const AttributeInit &initializer) const = 0;
@ -171,7 +171,6 @@ class CustomDataAttributeProvider final : public DynamicAttributesProvider {
 * if the stored type is the same as the attribute type.
 */
 class BuiltinCustomDataLayerProvider final : public BuiltinAttributeProvider {
-  using UpdateOnRead = void (*)(const void *owner);
  using UpdateOnChange = void (*)(void *owner);
  const eCustomDataType stored_type_;
  const CustomDataAccessInfo custom_data_access_;
@ -197,7 +196,7 @@ class BuiltinCustomDataLayerProvider final : public BuiltinAttributeProvider {
  {
  }

-  GVArray try_get_for_read(const void *owner) const final;
+  GAttributeReader try_get_for_read(const void *owner) const final;
  GAttributeWriter try_get_for_write(void *owner) const final;
  bool try_delete(void *owner) const final;
  bool try_create(void *owner, const AttributeInit &initializer) const final;
@ -280,7 +279,7 @@ inline GAttributeReader lookup(const void *owner, const AttributeIDRef &attribut
    const StringRef name = attribute_id.name();
    if (const BuiltinAttributeProvider *provider =
            providers.builtin_attribute_providers().lookup_default_as(name, nullptr)) {
-      return {provider->try_get_for_read(owner), provider->domain()};
+      return provider->try_get_for_read(owner);
    }
  }
  for (const DynamicAttributesProvider *provider : providers.dynamic_attribute_providers()) {
--- a/source/blender/blenkernel/intern/bvhutils.cc
+++ b/source/blender/blenkernel/intern/bvhutils.cc
@ -1280,7 +1280,7 @@ BVHTree *BKE_bvhtree_from_mesh_get(struct BVHTreeFromMesh *data,
      blender::bke::AttributeAccessor attributes = mesh->attributes();
      mask = looptri_no_hidden_map_get(
          mesh->polys(),
-          attributes.lookup_or_default(".hide_poly", ATTR_DOMAIN_FACE, false),
+          *attributes.lookup_or_default(".hide_poly", ATTR_DOMAIN_FACE, false),
          looptris.size(),
          &mask_bits_act_len);
      ATTR_FALLTHROUGH;
--- a/source/blender/blenkernel/intern/curve_to_mesh_convert.cc
+++ b/source/blender/blenkernel/intern/curve_to_mesh_convert.cc
@ -747,7 +747,7 @@ Mesh *curve_to_mesh_sweep(const CurvesGeometry &main,
  Span<float> radii = {};
  if (main_attributes.contains("radius")) {
    radii = evaluated_attribute_if_necessary(
-                main_attributes.lookup_or_default<float>("radius", ATTR_DOMAIN_POINT, 1.0f),
+                *main_attributes.lookup_or_default<float>("radius", ATTR_DOMAIN_POINT, 1.0f),
                main,
                main.curve_type_counts(),
                eval_buffer)
@ -805,7 +805,7 @@ Mesh *curve_to_mesh_sweep(const CurvesGeometry &main,

    const eAttrDomain src_domain = meta_data.domain;
    const eCustomDataType type = meta_data.data_type;
-    GVArray src = main_attributes.lookup(id, src_domain, type);
+    const GVArray src = *main_attributes.lookup(id, src_domain, type);

    const eAttrDomain dst_domain = get_attribute_domain_for_mesh(mesh_attributes, id);
    GSpanAttributeWriter dst = mesh_attributes.lookup_or_add_for_write_only_span(
@ -841,7 +841,7 @@ Mesh *curve_to_mesh_sweep(const CurvesGeometry &main,
    }
    const eAttrDomain src_domain = meta_data.domain;
    const eCustomDataType type = meta_data.data_type;
-    GVArray src = profile_attributes.lookup(id, src_domain, type);
+    const GVArray src = *profile_attributes.lookup(id, src_domain, type);

    const eAttrDomain dst_domain = get_attribute_domain_for_mesh(mesh_attributes, id);
    GSpanAttributeWriter dst = mesh_attributes.lookup_or_add_for_write_only_span(
--- a/source/blender/blenkernel/intern/fluid.cc
+++ b/source/blender/blenkernel/intern/fluid.cc
@ -3237,8 +3237,9 @@ static Mesh *create_liquid_geometry(FluidDomainSettings *fds,
  blender::MutableSpan<int> poly_offsets = me->poly_offsets_for_write();
  blender::MutableSpan<int> corner_verts = me->corner_verts_for_write();

-  const bool is_sharp = orgmesh->attributes().lookup_or_default<bool>(
-      "sharp_face", ATTR_DOMAIN_FACE, false)[0];
+  const bool is_sharp = orgmesh->attributes()
+                            .lookup_or_default<bool>("sharp_face", ATTR_DOMAIN_FACE, false)
+                            .varray[0];
  BKE_mesh_smooth_flag_set(me, !is_sharp);

  /* Get size (dimension) but considering scaling. */
--- a/source/blender/blenkernel/intern/geometry_component_instances.cc
+++ b/source/blender/blenkernel/intern/geometry_component_instances.cc
@ -131,14 +131,16 @@ class InstancePositionAttributeProvider final : public BuiltinAttributeProvider
  {
  }

-  GVArray try_get_for_read(const void *owner) const final
+  GAttributeReader try_get_for_read(const void *owner) const final
  {
    const Instances *instances = static_cast<const Instances *>(owner);
    if (instances == nullptr) {
      return {};
    }
    Span<float4x4> transforms = instances->transforms();
-    return VArray<float3>::ForDerivedSpan<float4x4, get_transform_position>(transforms);
+    return {VArray<float3>::ForDerivedSpan<float4x4, get_transform_position>(transforms),
+            domain_,
+            nullptr};
  }

  GAttributeWriter try_get_for_write(void *owner) const final
--- a/source/blender/blenkernel/intern/geometry_component_mesh.cc
+++ b/source/blender/blenkernel/intern/geometry_component_mesh.cc
@ -1114,7 +1114,7 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
                                                 ATTR_DOMAIN_POINT,
                                                 CD_PROP_FLOAT3,
                                                 CD_PROP_FLOAT3,
-                                                 BuiltinAttributeProvider::NonCreatable,
+                                                 BuiltinAttributeProvider::Creatable,
                                                 BuiltinAttributeProvider::NonDeletable,
                                                 point_access,
                                                 tag_component_positions_changed);
--- a/source/blender/blenkernel/intern/geometry_component_pointcloud.cc
+++ b/source/blender/blenkernel/intern/geometry_component_pointcloud.cc
@ -139,7 +139,7 @@ static ComponentAttributeProviders create_attribute_providers_for_point_cloud()
                                                 ATTR_DOMAIN_POINT,
                                                 CD_PROP_FLOAT3,
                                                 CD_PROP_FLOAT3,
-                                                 BuiltinAttributeProvider::NonCreatable,
+                                                 BuiltinAttributeProvider::Creatable,
                                                 BuiltinAttributeProvider::NonDeletable,
                                                 point_access,
                                                 tag_component_positions_changed);
--- a/source/blender/blenkernel/intern/geometry_fields.cc
+++ b/source/blender/blenkernel/intern/geometry_fields.cc
@ -248,7 +248,7 @@ GVArray AttributeFieldInput::get_varray_for_context(const GeometryFieldContext &
 {
  const eCustomDataType data_type = cpp_type_to_custom_data_type(*type_);
  if (auto attributes = context.attributes()) {
-    return attributes->lookup(name_, context.domain(), data_type);
+    return *attributes->lookup(name_, context.domain(), data_type);
  }
  return {};
 }
@ -304,7 +304,7 @@ GVArray IDAttributeFieldInput::get_varray_for_context(const GeometryFieldContext

  const StringRef name = get_random_id_attribute_name(context.domain());
  if (auto attributes = context.attributes()) {
-    if (GVArray attribute = attributes->lookup(name, context.domain(), CD_PROP_INT32)) {
+    if (GVArray attribute = *attributes->lookup(name, context.domain(), CD_PROP_INT32)) {
      return attribute;
    }
  }
@ -334,7 +334,7 @@ GVArray AnonymousAttributeFieldInput::get_varray_for_context(const GeometryField
                                                             const IndexMask /*mask*/) const
 {
  const eCustomDataType data_type = cpp_type_to_custom_data_type(*type_);
-  return context.attributes()->lookup(*anonymous_id_, context.domain(), data_type);
+  return *context.attributes()->lookup(*anonymous_id_, context.domain(), data_type);
 }

 std::string AnonymousAttributeFieldInput::socket_inspection_name() const
@ -474,7 +474,7 @@ bool try_capture_field_on_geometry(GeometryComponent &component,
  }

  attributes.remove(attribute_id);
-  if (attributes.add(attribute_id, domain, data_type, bke::AttributeInitMoveArray{buffer})) {
+  if (attributes.add(attribute_id, domain, data_type, bke::AttributeInitMoveArray(buffer))) {
    return true;
  }

--- a/source/blender/blenkernel/intern/gpencil_geom_legacy.cc
+++ b/source/blender/blenkernel/intern/gpencil_geom_legacy.cc
@ -2507,7 +2507,7 @@ static void gpencil_generate_edgeloops(Object *ob,
  const Span<MDeformVert> dverts = me->deform_verts();
  const blender::Span<blender::float3> vert_normals = me->vert_normals();
  const bke::AttributeAccessor attributes = me->attributes();
-  const VArray<bool> uv_seams = attributes.lookup_or_default<bool>(
+  const VArray<bool> uv_seams = *attributes.lookup_or_default<bool>(
      ".uv_seam", ATTR_DOMAIN_EDGE, false);

  /* Arrays for all edge vertices (forward and backward) that form a edge loop.
@ -2753,7 +2753,7 @@ bool BKE_gpencil_convert_mesh(Main *bmain,
        gpl_fill, scene->r.cfra + frame_offset, GP_GETFRAME_ADD_NEW);
    int i;

-    const VArray<int> mesh_material_indices = me_eval->attributes().lookup_or_default<int>(
+    const VArray<int> mesh_material_indices = *me_eval->attributes().lookup_or_default<int>(
        "material_index", ATTR_DOMAIN_FACE, 0);
    for (i = 0; i < polys_len; i++) {
      const IndexRange poly = polys[i];
--- a/source/blender/blenkernel/intern/lib_id.c
+++ b/source/blender/blenkernel/intern/lib_id.c
@ -845,7 +845,7 @@ static void id_swap(Main *bmain,
    BKE_id_remapper_add(remapper_id_b, id_a, id_b);
  }

-  /* Finalize remapping of internal referrences to self broken by swapping, if requested. */
+  /* Finalize remapping of internal references to self broken by swapping, if requested. */
  if (do_self_remap) {
    LinkNode ids = {.next = NULL, .link = id_a};
    BKE_libblock_relink_multiple(
@ -864,7 +864,7 @@ static void id_swap(Main *bmain,
 }

 /* Conceptually, embedded IDs are part of their owner's data. However, some parts of the code
- * (like e.g. the depsgraph) may treat them as independant IDs, so swapping them here and
+ * (like e.g. the depsgraph) may treat them as independent IDs, so swapping them here and
 * switching their pointers in the owner IDs allows to help not break cached relationships and
 * such (by preserving the pointer values). */
 static void id_embedded_swap(ID **embedded_id_a,
--- a/source/blender/blenkernel/intern/mesh.cc
+++ b/source/blender/blenkernel/intern/mesh.cc
@ -1486,7 +1486,7 @@ bool BKE_mesh_material_index_used(Mesh *me, short index)
  using namespace blender;
  using namespace blender::bke;
  const AttributeAccessor attributes = me->attributes();
-  const VArray<int> material_indices = attributes.lookup_or_default<int>(
+  const VArray<int> material_indices = *attributes.lookup_or_default<int>(
      "material_index", ATTR_DOMAIN_FACE, 0);
  if (material_indices.is_single()) {
    return material_indices.get_internal_single() == index;
@ -1718,11 +1718,11 @@ void BKE_mesh_mselect_validate(Mesh *me)
      (me->totselect), sizeof(MSelect), "Mesh selection history");

  const AttributeAccessor attributes = me->attributes();
-  const VArray<bool> select_vert = attributes.lookup_or_default<bool>(
+  const VArray<bool> select_vert = *attributes.lookup_or_default<bool>(
      ".select_vert", ATTR_DOMAIN_POINT, false);
-  const VArray<bool> select_edge = attributes.lookup_or_default<bool>(
+  const VArray<bool> select_edge = *attributes.lookup_or_default<bool>(
      ".select_edge", ATTR_DOMAIN_EDGE, false);
-  const VArray<bool> select_poly = attributes.lookup_or_default<bool>(
+  const VArray<bool> select_poly = *attributes.lookup_or_default<bool>(
      ".select_poly", ATTR_DOMAIN_FACE, false);

  for (i_src = 0, i_dst = 0; i_src < me->totselect; i_src++) {
@ -1830,7 +1830,7 @@ void BKE_mesh_count_selected_items(const Mesh *mesh, int r_count[3])
 void BKE_mesh_vert_coords_get(const Mesh *mesh, float (*vert_coords)[3])
 {
  blender::bke::AttributeAccessor attributes = mesh->attributes();
-  VArray<float3> positions = attributes.lookup_or_default(
+  VArray<float3> positions = *attributes.lookup_or_default(
      "position", ATTR_DOMAIN_POINT, float3(0));
  positions.materialize({(float3 *)vert_coords, mesh->totvert});
 }
--- a/source/blender/blenkernel/intern/mesh_boolean_convert.cc
+++ b/source/blender/blenkernel/intern/mesh_boolean_convert.cc
@ -411,7 +411,7 @@ static void copy_poly_attributes(Mesh *dest_mesh,
  }

  /* Fix material indices after they have been transferred as a generic attribute. */
-  const VArray<int> src_material_indices = orig_me->attributes().lookup_or_default<int>(
+  const VArray<int> src_material_indices = *orig_me->attributes().lookup_or_default<int>(
      "material_index", ATTR_DOMAIN_FACE, 0);
  const int src_index = src_material_indices[index_in_orig_me];
  if (material_remap.index_range().contains(src_index)) {
--- a/source/blender/blenkernel/intern/mesh_convert.cc
+++ b/source/blender/blenkernel/intern/mesh_convert.cc
@ -1087,7 +1087,7 @@ static void move_shapekey_layers_to_keyblocks(const Mesh &mesh,
    if (kb->uid == actshape_uid) {
      kb->data = MEM_malloc_arrayN(kb->totelem, sizeof(float3), __func__);
      MutableSpan<float3> kb_coords(static_cast<float3 *>(kb->data), kb->totelem);
-      mesh.attributes().lookup<float3>("position").materialize(kb_coords);
+      mesh.attributes().lookup<float3>("position").varray.materialize(kb_coords);
    }
    else {
      kb->data = layer.data;
--- a/source/blender/blenkernel/intern/mesh_evaluate.cc
+++ b/source/blender/blenkernel/intern/mesh_evaluate.cc
@ -623,7 +623,7 @@ void BKE_mesh_flush_hidden_from_verts(Mesh *me)
  using namespace blender::bke;
  MutableAttributeAccessor attributes = me->attributes_for_write();

-  const VArray<bool> hide_vert = attributes.lookup_or_default<bool>(
+  const VArray<bool> hide_vert = *attributes.lookup_or_default<bool>(
      ".hide_vert", ATTR_DOMAIN_POINT, false);
  if (hide_vert.is_single() && !hide_vert.get_internal_single()) {
    attributes.remove(".hide_edge");
@ -662,7 +662,7 @@ void BKE_mesh_flush_hidden_from_polys(Mesh *me)
  using namespace blender::bke;
  MutableAttributeAccessor attributes = me->attributes_for_write();

-  const VArray<bool> hide_poly = attributes.lookup_or_default<bool>(
+  const VArray<bool> hide_poly = *attributes.lookup_or_default<bool>(
      ".hide_poly", ATTR_DOMAIN_FACE, false);
  if (hide_poly.is_single() && !hide_poly.get_internal_single()) {
    attributes.remove(".hide_vert");
@ -705,7 +705,7 @@ void BKE_mesh_flush_select_from_polys(Mesh *me)
 {
  using namespace blender::bke;
  MutableAttributeAccessor attributes = me->attributes_for_write();
-  const VArray<bool> select_poly = attributes.lookup_or_default<bool>(
+  const VArray<bool> select_poly = *attributes.lookup_or_default<bool>(
      ".select_poly", ATTR_DOMAIN_FACE, false);
  if (select_poly.is_single() && !select_poly.get_internal_single()) {
    attributes.remove(".select_vert");
@ -720,9 +720,9 @@ void BKE_mesh_flush_select_from_polys(Mesh *me)
  /* Use generic domain interpolation to read the polygon attribute on the other domains.
   * Assume selected faces are not hidden and none of their vertices/edges are hidden. */
  attributes.lookup_or_default<bool>(".select_poly", ATTR_DOMAIN_POINT, false)
-      .materialize(select_vert.span);
+      .varray.materialize(select_vert.span);
  attributes.lookup_or_default<bool>(".select_poly", ATTR_DOMAIN_EDGE, false)
-      .materialize(select_edge.span);
+      .varray.materialize(select_edge.span);

  select_vert.finish();
  select_edge.finish();
@ -759,7 +759,7 @@ void BKE_mesh_flush_select_from_verts(Mesh *me)
 {
  using namespace blender::bke;
  MutableAttributeAccessor attributes = me->attributes_for_write();
-  const VArray<bool> select_vert = attributes.lookup_or_default<bool>(
+  const VArray<bool> select_vert = *attributes.lookup_or_default<bool>(
      ".select_vert", ATTR_DOMAIN_POINT, false);
  if (select_vert.is_single() && !select_vert.get_internal_single()) {
    attributes.remove(".select_edge");
@ -774,8 +774,8 @@ void BKE_mesh_flush_select_from_verts(Mesh *me)
      me->edges(),
      me->polys(),
      me->corner_verts(),
-      attributes.lookup_or_default<bool>(".hide_edge", ATTR_DOMAIN_EDGE, false),
-      attributes.lookup_or_default<bool>(".hide_poly", ATTR_DOMAIN_FACE, false),
+      *attributes.lookup_or_default<bool>(".hide_edge", ATTR_DOMAIN_EDGE, false),
+      *attributes.lookup_or_default<bool>(".hide_poly", ATTR_DOMAIN_FACE, false),
      select_vert,
      select_edge.span,
      select_poly.span);
--- a/source/blender/blenkernel/intern/mesh_legacy_convert.cc
+++ b/source/blender/blenkernel/intern/mesh_legacy_convert.cc
@ -1436,6 +1436,8 @@ void BKE_mesh_legacy_edge_crease_to_layers(Mesh *mesh)
  }
 }

+/** \} */
+
 /* -------------------------------------------------------------------- */
 /** \name Sharp Edge Conversion
 * \{ */
@ -1550,7 +1552,7 @@ void BKE_mesh_legacy_convert_hide_layers_to_flags(Mesh *mesh,
  const AttributeAccessor attributes = mesh->attributes();

  MutableSpan<MVert> verts(mesh->mvert, mesh->totvert);
-  const VArray<bool> hide_vert = attributes.lookup_or_default<bool>(
+  const VArray<bool> hide_vert = *attributes.lookup_or_default<bool>(
      ".hide_vert", ATTR_DOMAIN_POINT, false);
  threading::parallel_for(verts.index_range(), 4096, [&](IndexRange range) {
    for (const int i : range) {
@ -1559,7 +1561,7 @@ void BKE_mesh_legacy_convert_hide_layers_to_flags(Mesh *mesh,
  });

  MutableSpan<MEdge> edges(mesh->medge, mesh->totedge);
-  const VArray<bool> hide_edge = attributes.lookup_or_default<bool>(
+  const VArray<bool> hide_edge = *attributes.lookup_or_default<bool>(
      ".hide_edge", ATTR_DOMAIN_EDGE, false);
  threading::parallel_for(edges.index_range(), 4096, [&](IndexRange range) {
    for (const int i : range) {
@ -1567,7 +1569,7 @@ void BKE_mesh_legacy_convert_hide_layers_to_flags(Mesh *mesh,
    }
  });

-  const VArray<bool> hide_poly = attributes.lookup_or_default<bool>(
+  const VArray<bool> hide_poly = *attributes.lookup_or_default<bool>(
      ".hide_poly", ATTR_DOMAIN_FACE, false);
  threading::parallel_for(legacy_polys.index_range(), 4096, [&](IndexRange range) {
    for (const int i : range) {
@ -1643,7 +1645,7 @@ void BKE_mesh_legacy_convert_material_indices_to_mpoly(Mesh *mesh,
  using namespace blender;
  using namespace blender::bke;
  const AttributeAccessor attributes = mesh->attributes();
-  const VArray<int> material_indices = attributes.lookup_or_default<int>(
+  const VArray<int> material_indices = *attributes.lookup_or_default<int>(
      "material_index", ATTR_DOMAIN_FACE, 0);
  threading::parallel_for(legacy_polys.index_range(), 4096, [&](IndexRange range) {
    for (const int i : range) {
@ -1886,6 +1888,8 @@ void BKE_mesh_legacy_convert_uvs_to_generic(Mesh *mesh)
  }
 }

+/** \} */
+
 /** \name Selection Attribute and Legacy Flag Conversion
 * \{ */

@ -1897,7 +1901,7 @@ void BKE_mesh_legacy_convert_selection_layers_to_flags(Mesh *mesh,
  const AttributeAccessor attributes = mesh->attributes();

  MutableSpan<MVert> verts(mesh->mvert, mesh->totvert);
-  const VArray<bool> select_vert = attributes.lookup_or_default<bool>(
+  const VArray<bool> select_vert = *attributes.lookup_or_default<bool>(
      ".select_vert", ATTR_DOMAIN_POINT, false);
  threading::parallel_for(verts.index_range(), 4096, [&](IndexRange range) {
    for (const int i : range) {
@ -1906,7 +1910,7 @@ void BKE_mesh_legacy_convert_selection_layers_to_flags(Mesh *mesh,
  });

  MutableSpan<MEdge> edges(mesh->medge, mesh->totedge);
-  const VArray<bool> select_edge = attributes.lookup_or_default<bool>(
+  const VArray<bool> select_edge = *attributes.lookup_or_default<bool>(
      ".select_edge", ATTR_DOMAIN_EDGE, false);
  threading::parallel_for(edges.index_range(), 4096, [&](IndexRange range) {
    for (const int i : range) {
@ -1914,7 +1918,7 @@ void BKE_mesh_legacy_convert_selection_layers_to_flags(Mesh *mesh,
    }
  });

-  const VArray<bool> select_poly = attributes.lookup_or_default<bool>(
+  const VArray<bool> select_poly = *attributes.lookup_or_default<bool>(
      ".select_poly", ATTR_DOMAIN_FACE, false);
  threading::parallel_for(legacy_polys.index_range(), 4096, [&](IndexRange range) {
    for (const int i : range) {
--- a/source/blender/blenkernel/intern/mesh_normals.cc
+++ b/source/blender/blenkernel/intern/mesh_normals.cc
@ -193,8 +193,17 @@ void BKE_mesh_calc_poly_normal(const int *poly_verts,
                 {poly_verts, poly_size}));
 }

+/** \} */
+
 namespace blender::bke::mesh {

+/* -------------------------------------------------------------------- */
+/** \name Mesh Normal Calculation (Polygons & Vertices)
+ *
+ * Take care making optimizations to this function as improvements to low-poly
+ * meshes can slow down high-poly meshes. For details on performance, see D11993.
+ * \{ */
+
 void normals_calc_polys(const Span<float3> positions,
                        const OffsetIndices<int> polys,
                        const Span<int> corner_verts,
@ -208,15 +217,6 @@ void normals_calc_polys(const Span<float3> positions,
  });
 }

-/** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name Mesh Normal Calculation (Polygons & Vertices)
- *
- * Take care making optimizations to this function as improvements to low-poly
- * meshes can slow down high-poly meshes. For details on performance, see D11993.
- * \{ */
-
 void normals_calc_poly_vert(const Span<float3> positions,
                            const OffsetIndices<int> polys,
                            const Span<int> corner_verts,
@ -305,10 +305,10 @@ void normals_calc_poly_vert(const Span<float3> positions,
  }
 }

-}  // namespace blender::bke::mesh
-
 /** \} */

+}  // namespace blender::bke::mesh
+
 /* -------------------------------------------------------------------- */
 /** \name Mesh Normal Calculation
 * \{ */
--- a/source/blender/blenkernel/intern/mesh_remesh_voxel.cc
+++ b/source/blender/blenkernel/intern/mesh_remesh_voxel.cc
@ -317,8 +317,8 @@ void BKE_remesh_reproject_sculpt_face_sets(Mesh *target, const Mesh *source)
  const OffsetIndices target_polys = target->polys();
  const Span<int> target_corner_verts = target->corner_verts();

-  const VArray<int> src_face_sets = src_attributes.lookup<int>(".sculpt_face_set",
-                                                               ATTR_DOMAIN_FACE);
+  const VArray src_face_sets =
+    *  src_attributes.lookup<int>(".sculpt_face_set", ATTR_DOMAIN_FACE);
  if (!src_face_sets) {
    return;
  }
--- a/source/blender/blenkernel/intern/mesh_tangent.cc
+++ b/source/blender/blenkernel/intern/mesh_tangent.cc
@ -126,7 +126,7 @@ void BKE_mesh_calc_loop_tangent_single(Mesh *mesh,
  }

  const AttributeAccessor attributes = mesh->attributes();
-  const VArraySpan<float2> uv_map = attributes.lookup<float2>(uvmap, ATTR_DOMAIN_CORNER);
+  const VArraySpan uv_map = *attributes.lookup<float2>(uvmap, ATTR_DOMAIN_CORNER);
  if (uv_map.is_empty()) {
    BKE_reportf(reports,
                RPT_ERROR,
--- a/Show More
+++ b/Show More