2023-05-30 11:14:22 +02:00
156 changed files with 4461 additions and 792 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -525,6 +525,12 @@ if(NOT APPLE)
  set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
  mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
  mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
+
+  # HIPRT is only available on Windows for now.
+  if(WIN32)
+    option(WITH_CYCLES_DEVICE_HIPRT      "Enable Cycles AMD HIPRT support" OFF)
+    mark_as_advanced(WITH_CYCLES_DEVICE_HIPRT)
+  endif()
 endif()

 # Apple Metal
@ -1981,10 +1987,13 @@ if(FIRST_RUN)
      info_cfg_option(WITH_CYCLES_DEVICE_OPTIX)
      info_cfg_option(WITH_CYCLES_DEVICE_CUDA)
      info_cfg_option(WITH_CYCLES_CUDA_BINARIES)
-      info_cfg_option(WITH_CYCLES_DEVICE_HIP)
-      info_cfg_option(WITH_CYCLES_HIP_BINARIES)
      info_cfg_option(WITH_CYCLES_DEVICE_ONEAPI)
      info_cfg_option(WITH_CYCLES_ONEAPI_BINARIES)
+      info_cfg_option(WITH_CYCLES_DEVICE_HIP)
+      info_cfg_option(WITH_CYCLES_HIP_BINARIES)
+    endif()
+    if(WIN32)
+      info_cfg_option(WITH_CYCLES_DEVICE_HIPRT)
    endif()
  endif()

--- a/build_files/cmake/Modules/FindHIP.cmake
+++ b/build_files/cmake/Modules/FindHIP.cmake
@ -1,12 +1,9 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2021 Blender Foundation.

-# - Find HIP compiler
-#
-# This module defines
+# Find HIP compiler. This module defines
 #  HIP_HIPCC_EXECUTABLE, the full path to the hipcc executable
 #  HIP_VERSION, the HIP compiler version
-#
 #  HIP_FOUND, if the HIP toolkit is found.

 # If HIP_ROOT_DIR was defined in the environment, use it.
@ -27,12 +24,21 @@ find_program(HIP_HIPCC_EXECUTABLE
    bin
 )

-if(HIP_HIPCC_EXECUTABLE AND NOT EXISTS ${HIP_HIPCC_EXECUTABLE})
-  message(WARNING "Cached or directly specified hipcc executable does not exist.")
-  set(HIP_FOUND FALSE)
-elseif(HIP_HIPCC_EXECUTABLE)
-  set(HIP_FOUND TRUE)
+if(WIN32)
+  # Needed for HIP-RT on Windows.
+  find_program(HIP_LINKER_EXECUTABLE
+    NAMES
+      clang++
+    HINTS
+      ${_hip_SEARCH_DIRS}
+    PATH_SUFFIXES
+      bin
+    NO_DEFAULT_PATH
+    NO_CMAKE_PATH
+  )
+endif()

+if(HIP_HIPCC_EXECUTABLE)
  set(HIP_VERSION_MAJOR 0)
  set(HIP_VERSION_MINOR 0)
  set(HIP_VERSION_PATCH 0)
@ -54,33 +60,31 @@ elseif(HIP_HIPCC_EXECUTABLE)
    # Strip the HIP prefix and get list of individual version components.
    string(REGEX REPLACE
           ".*HIP version: ([.0-9]+).*" "\\1"
-           HIP_SEMANTIC_VERSION "${_hip_version_raw}")
-    string(REPLACE "." ";" HIP_VERSION_PARTS "${HIP_SEMANTIC_VERSION}")
-    list(LENGTH HIP_VERSION_PARTS NUM_HIP_VERSION_PARTS)
+           _hip_semantic_version "${_hip_version_raw}")
+    string(REPLACE "." ";" _hip_version_parts "${_hip_semantic_version}")
+    list(LENGTH _hip_version_parts _num_hip_version_parts)

    # Extract components into corresponding variables.
-    if(NUM_HIP_VERSION_PARTS GREATER 0)
-      list(GET HIP_VERSION_PARTS 0 HIP_VERSION_MAJOR)
+    if(_num_hip_version_parts GREATER 0)
+      list(GET _hip_version_parts 0 HIP_VERSION_MAJOR)
    endif()
-    if(NUM_HIP_VERSION_PARTS GREATER 1)
-      list(GET HIP_VERSION_PARTS 1 HIP_VERSION_MINOR)
+    if(_num_hip_version_parts GREATER 1)
+      list(GET _hip_version_parts 1 HIP_VERSION_MINOR)
    endif()
-    if(NUM_HIP_VERSION_PARTS GREATER 2)
-      list(GET HIP_VERSION_PARTS 2 HIP_VERSION_PATCH)
+    if(_num_hip_version_parts GREATER 2)
+      list(GET _hip_version_parts 2 HIP_VERSION_PATCH)
    endif()

    # Unset temp variables.
-    unset(NUM_HIP_VERSION_PARTS)
-    unset(HIP_SEMANTIC_VERSION)
-    unset(HIP_VERSION_PARTS)
+    unset(_num_hip_version_parts)
+    unset(_hip_semantic_version)
+    unset(_hip_version_parts)
  endif()

  # Construct full semantic version.
  set(HIP_VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_PATCH}")
  unset(_hip_version_raw)
  unset(_hipcc_executable)
-else()
-  set(HIP_FOUND FALSE)
 endif()

 include(FindPackageHandleStandardArgs)
--- a/build_files/cmake/Modules/FindHIPRT.cmake
+++ b/build_files/cmake/Modules/FindHIPRT.cmake
@ -0,0 +1,47 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2021 Blender Foundation.
+
+# Find HIPRT SDK. This module defines:
+#   HIPRT_INCLUDE_DIR, path to HIPRT include directory
+#   HIPRT_BITCODE, bitcode file with ray-tracing functionality
+#   HIPRT_FOUND, if SDK found
+
+# If HIPRT_ROOT_DIR was defined in the environment, use it.
+if(NOT HIPRT_ROOT_DIR AND NOT $ENV{HIPRT_ROOT_DIR} STREQUAL "")
+  set(HIPRT_ROOT_DIR $ENV{HIPRT_ROOT_DIR})
+endif()
+
+set(_hiprt_SEARCH_DIRS
+  ${HIPRT_ROOT_DIR}
+)
+
+find_path(HIPRT_INCLUDE_DIR
+  NAMES
+    hiprt/hiprt.h
+  HINTS
+    ${_hiprt_SEARCH_DIRS}
+)
+
+if(HIPRT_INCLUDE_DIR)
+  file(STRINGS "${HIPRT_INCLUDE_DIR}/hiprt/hiprt.h" _hiprt_version
+    REGEX "^#define HIPRT_VERSION_STR[ \t]\".*\"$")
+  string(REGEX MATCHALL "[0-9]+[.0-9]+" _hiprt_version ${_hiprt_version})
+
+  find_file(HIPRT_BITCODE
+    NAMES
+      hiprt${_hiprt_version}_amd_lib_win.bc
+    HINTS
+      ${HIPRT_INCLUDE_DIR}/hiprt/win
+    NO_DEFAULT_PATH
+  )
+
+  unset(_hiprt_version)
+endif()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(HIPRT DEFAULT_MSG
+  HIPRT_INCLUDE_DIR HIPRT_BITCODE)
+
+mark_as_advanced(
+  HIPRT_INCLUDE_DIR
+)
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@ -81,7 +81,6 @@ if(NOT APPLE)

  set(WITH_CYCLES_DEVICE_OPTIX    ON  CACHE BOOL "" FORCE)
  set(WITH_CYCLES_CUDA_BINARIES   ON  CACHE BOOL "" FORCE)
-  set(WITH_CYCLES_CUBIN_COMPILER  OFF CACHE BOOL "" FORCE)
  set(WITH_CYCLES_HIP_BINARIES    ON  CACHE BOOL "" FORCE)
  set(WITH_CYCLES_DEVICE_ONEAPI   ON  CACHE BOOL "" FORCE)
  set(WITH_CYCLES_ONEAPI_BINARIES ON  CACHE BOOL "" FORCE)
--- a/build_files/config/pipeline_config.yaml
+++ b/build_files/config/pipeline_config.yaml
@ -10,6 +10,8 @@ buildbot:
        version: '11.4.1'
    hip:
        version: '5.5.30571'
+    hiprt:
+        version: '2.0.0'
    optix:
        version: '7.3.0'
    ocloc:
--- a/extern/hipew/CMakeLists.txt
+++ b/extern/hipew/CMakeLists.txt
@ -12,11 +12,21 @@ set(INC_SYS

 set(SRC
  src/hipew.c
-
  include/hipew.h
 )

 set(LIB
 )

+if(HIPRT_INCLUDE_DIR)
+  list(APPEND INC_SYS
+    ${HIPRT_INCLUDE_DIR}
+  )
+
+  list(APPEND SRC
+    src/hiprtew.cc
+    include/hiprtew.h
+  )
+endif()
+
 blender_add_lib(extern_hipew "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
--- a/extern/hipew/include/hipew.h
+++ b/extern/hipew/include/hipew.h
@ -43,9 +43,9 @@ extern "C" {
 #define HIP_TRSA_OVERRIDE_FORMAT 0x01
 #define HIP_TRSF_READ_AS_INTEGER 0x01
 #define HIP_TRSF_NORMALIZED_COORDINATES 0x02
-#define HIP_LAUNCH_PARAM_END ((void*)0x00)
 #define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
 #define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02)
+#define HIP_LAUNCH_PARAM_END ((void*)0x03)

 /* Functions which changed 3.1 -> 3.2 for 64 bit stuff,
 * the cuda library has both the old ones for compatibility and new
@ -55,6 +55,7 @@ extern "C" {
 #define hipMemGetInfo hipMemGetInfo
 #define hipMemAllocPitch hipMemAllocPitch
 #define hipMemGetAddressRange hipMemGetAddressRange
+#define hipMemcpy hipMemcpy
 #define hipMemcpyHtoD hipMemcpyHtoD
 #define hipMemcpyDtoH hipMemcpyDtoH
 #define hipMemcpyDtoD hipMemcpyDtoD
@ -68,6 +69,7 @@ extern "C" {
 #define hipMemsetD32 hipMemsetD32
 #define hipArrayCreate hipArrayCreate
 #define hipArray3DCreate hipArray3DCreate
+#define hipPointerGetAttributes hipPointerGetAttributes
 #define hipTexRefSetAddress hipTexRefSetAddress
 #define hipTexRefGetAddress hipTexRefGetAddress
 #define hipStreamDestroy hipStreamDestroy
@ -108,11 +110,20 @@ typedef struct hipMipmappedArray_st* hipMipmappedArray_t;
 typedef struct ihipEvent_t* hipEvent_t;
 typedef struct ihipStream_t* hipStream_t;
 typedef unsigned long long hipTextureObject_t;
+typedef void* hipExternalMemory_t;

 typedef struct HIPuuid_st {
  char bytes[16];
 } HIPuuid;

+typedef enum hipMemcpyKind {
+    hipMemcpyHostToHost = 0,
+    hipMemcpyHostToDevice = 1,
+    hipMemcpyDeviceToHost = 2,
+    hipMemcpyDeviceToDevice = 3,
+    hipMemcpyDefault = 4
+} hipMemcpyKind;
+
 typedef enum hipChannelFormatKind {
    hipChannelFormatKindSigned = 0,
    hipChannelFormatKindUnsigned = 1,
@ -1048,28 +1059,105 @@ typedef enum HIPGLmap_flags_enum {
  HIP_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
 } HIPGLmap_flags;

+typedef enum hipExternalMemoryHandleType_enum {
+  hipExternalMemoryHandleTypeOpaqueFd = 1,
+  hipExternalMemoryHandleTypeOpaqueWin32 = 2,
+  hipExternalMemoryHandleTypeOpaqueWin32Kmt = 3,
+  hipExternalMemoryHandleTypeD3D12Heap = 4,
+  hipExternalMemoryHandleTypeD3D12Resource = 5,
+  hipExternalMemoryHandleTypeD3D11Resource = 6,
+  hipExternalMemoryHandleTypeD3D11ResourceKmt = 7,
+} hipExternalMemoryHandleType;
+
+typedef struct hipExternalMemoryHandleDesc_st {
+  hipExternalMemoryHandleType type;
+  union {
+    int fd;
+    struct {
+      void *handle;
+      const void *name;
+    } win32;
+  } handle;
+  unsigned long long size;
+  unsigned int flags;
+} hipExternalMemoryHandleDesc;
+
+typedef struct hipExternalMemoryBufferDesc_st {
+  unsigned long long offset;
+  unsigned long long size;
+  unsigned int flags;
+} hipExternalMemoryBufferDesc;
+
 /**
 * hipRTC related
 */
 typedef struct _hiprtcProgram* hiprtcProgram;

 typedef enum hiprtcResult {
-    HIPRTC_SUCCESS = 0,
-    HIPRTC_ERROR_OUT_OF_MEMORY = 1,
-    HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
-    HIPRTC_ERROR_INVALID_INPUT = 3,
-    HIPRTC_ERROR_INVALID_PROGRAM = 4,
-    HIPRTC_ERROR_INVALID_OPTION = 5,
-    HIPRTC_ERROR_COMPILATION = 6,
-    HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
-    HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
-    HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
-    HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
-    HIPRTC_ERROR_INTERNAL_ERROR = 11
+	  HIPRTC_SUCCESS = 0,
+	  HIPRTC_ERROR_OUT_OF_MEMORY = 1,
+	  HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
+	  HIPRTC_ERROR_INVALID_INPUT = 3,
+	  HIPRTC_ERROR_INVALID_PROGRAM = 4,
+	  HIPRTC_ERROR_INVALID_OPTION = 5,
+	  HIPRTC_ERROR_COMPILATION = 6,
+	  HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
+	  HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
+	  HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
+	  HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
+	  HIPRTC_ERROR_INTERNAL_ERROR = 11,
+	  HIPRTC_ERROR_LINKING = 100
 } hiprtcResult;

+typedef enum hiprtcJIT_option {
+	HIPRTC_JIT_MAX_REGISTERS = 0,
+	HIPRTC_JIT_THREADS_PER_BLOCK,
+	HIPRTC_JIT_WALL_TIME,
+	HIPRTC_JIT_INFO_LOG_BUFFER,
+	HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
+	HIPRTC_JIT_ERROR_LOG_BUFFER,
+	HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
+	HIPRTC_JIT_OPTIMIZATION_LEVEL,
+	HIPRTC_JIT_TARGET_FROM_HIPCONTEXT,
+	HIPRTC_JIT_TARGET,
+	HIPRTC_JIT_FALLBACK_STRATEGY,
+	HIPRTC_JIT_GENERATE_DEBUG_INFO,
+	HIPRTC_JIT_LOG_VERBOSE,
+	HIPRTC_JIT_GENERATE_LINE_INFO,
+	HIPRTC_JIT_CACHE_MODE,
+	HIPRTC_JIT_NEW_SM3X_OPT,
+	HIPRTC_JIT_FAST_COMPILE,
+	HIPRTC_JIT_GLOBAL_SYMBOL_NAMES,
+	HIPRTC_JIT_GLOBAL_SYMBOL_ADDRESS,
+	HIPRTC_JIT_GLOBAL_SYMBOL_COUNT,
+	HIPRTC_JIT_LTO,
+	HIPRTC_JIT_FTZ,
+	HIPRTC_JIT_PREC_DIV,
+	HIPRTC_JIT_PREC_SQRT,
+	HIPRTC_JIT_FMA,
+	HIPRTC_JIT_NUM_OPTIONS,
+} hiprtcJIT_option;
+
+typedef enum hiprtcJITInputType {
+	HIPRTC_JIT_INPUT_CUBIN = 0,
+	HIPRTC_JIT_INPUT_PTX,
+	HIPRTC_JIT_INPUT_FATBINARY,
+	HIPRTC_JIT_INPUT_OBJECT,
+	HIPRTC_JIT_INPUT_LIBRARY,
+	HIPRTC_JIT_INPUT_NVVM,
+	HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES,
+	HIPRTC_JIT_INPUT_LLVM_BITCODE = 100,
+	HIPRTC_JIT_INPUT_LLVM_BUNDLED_BITCODE = 101,
+	HIPRTC_JIT_INPUT_LLVM_ARCHIVES_OF_BUNDLED_BITCODE = 102,
+	HIPRTC_JIT_NUM_INPUT_TYPES = ( HIPRTC_JIT_NUM_LEGACY_INPUT_TYPES + 3 )
+} hiprtcJITInputType;
+
+typedef struct ihiprtcLinkState* hiprtcLinkState;
+
 /* Function types. */
 typedef hipError_t HIPAPI thipGetErrorName(hipError_t error, const char** pStr);
+typedef const char* HIPAPI thipGetErrorString(hipError_t error);
+typedef hipError_t HIPAPI thipGetLastError(hipError_t error);
 typedef hipError_t HIPAPI thipInit(unsigned int Flags);
 typedef hipError_t HIPAPI thipDriverGetVersion(int* driverVersion);
 typedef hipError_t HIPAPI thipGetDevice(int* device);
@ -1078,6 +1166,8 @@ typedef hipError_t HIPAPI thipGetDeviceProperties(hipDeviceProp_t* props, int de
 typedef hipError_t HIPAPI thipDeviceGet(hipDevice_t* device, int ordinal);
 typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev);
 typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
+typedef hipError_t HIPAPI thipDeviceGetLimit(size_t* pValue, enum hipLimit_t limit);
+typedef hipError_t HIPAPI thipDeviceSetLimit(enum hipLimit_t limit, size_t value);
 typedef hipError_t HIPAPI thipDeviceComputeCapability(int* major, int* minor, hipDevice_t dev);
 typedef hipError_t HIPAPI thipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev);
 typedef hipError_t HIPAPI thipDevicePrimaryCtxRelease(hipDevice_t dev);
@ -1114,13 +1204,14 @@ typedef hipError_t HIPAPI thipMemGetAddressRange(hipDeviceptr_t* pbase, size_t*
 typedef hipError_t HIPAPI thipHostMalloc(void** pp, size_t bytesize, unsigned int flags);
 typedef hipError_t HIPAPI thipHostFree(void* p);
 typedef hipError_t HIPAPI thipMemHostAlloc(void** pp, size_t bytesize, unsigned int Flags);
+typedef hipError_t HIPAPI thipHostRegister(void* p, size_t bytesize, unsigned int Flags);
 typedef hipError_t HIPAPI thipHostGetDevicePointer(hipDeviceptr_t* pdptr, void* p, unsigned int Flags);
 typedef hipError_t HIPAPI thipHostGetFlags(unsigned int* pFlags, void* p);
 typedef hipError_t HIPAPI thipMallocManaged(hipDeviceptr_t* dptr, size_t bytesize, unsigned int flags);
 typedef hipError_t HIPAPI thipDeviceGetByPCIBusId(hipDevice_t* dev, const char* pciBusId);
 typedef hipError_t HIPAPI thipDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t dev);
-typedef hipError_t HIPAPI thipMemHostUnregister(void* p);
-typedef hipError_t HIPAPI thipMemcpy(hipDeviceptr_t dst, hipDeviceptr_t src, size_t ByteCount);
+typedef hipError_t HIPAPI thipHostUnregister(void* p);
+typedef hipError_t HIPAPI thipMemcpy(void* dst, const void* src, size_t ByteCount, hipMemcpyKind kind);
 typedef hipError_t HIPAPI thipMemcpyPeer(hipDeviceptr_t dstDevice, hipCtx_t dstContext, hipDeviceptr_t srcDevice, hipCtx_t srcContext, size_t ByteCount);
 typedef hipError_t HIPAPI thipMemcpyHtoD(hipDeviceptr_t dstDevice, void* srcHost, size_t ByteCount);
 typedef hipError_t HIPAPI thipMemcpyDtoH(void* dstHost, hipDeviceptr_t srcDevice, size_t ByteCount);
@ -1130,8 +1221,10 @@ typedef hipError_t HIPAPI thipMemcpyParam2D(const hip_Memcpy2D* pCopy);
 typedef hipError_t HIPAPI thipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy);
 typedef hipError_t HIPAPI thipMemcpyHtoDAsync(hipDeviceptr_t dstDevice, const void* srcHost, size_t ByteCount, hipStream_t hStream);
 typedef hipError_t HIPAPI thipMemcpyDtoHAsync(void* dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
+typedef hipError_t HIPAPI thipMemcpyDtoDAsync(hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
 typedef hipError_t HIPAPI thipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t hStream);
 typedef hipError_t HIPAPI thipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t hStream);
+typedef hipError_t HIPAPI thipMemset(void* dstDevice, int value, size_t sizeBytes);
 typedef hipError_t HIPAPI thipMemsetD8(hipDeviceptr_t dstDevice, unsigned char uc, size_t N);
 typedef hipError_t HIPAPI thipMemsetD16(hipDeviceptr_t dstDevice, unsigned short us, size_t N);
 typedef hipError_t HIPAPI thipMemsetD32(hipDeviceptr_t dstDevice, unsigned int ui, size_t N);
@ -1144,7 +1237,8 @@ typedef hipError_t HIPAPI thipMemsetD2D32Async(hipDeviceptr_t dstDevice, size_t
 typedef hipError_t HIPAPI thipArrayCreate(hArray ** pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray);
 typedef hipError_t HIPAPI thipArrayDestroy(hArray hArray);
 typedef hipError_t HIPAPI thipArray3DCreate(hArray * pHandle, const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray);
-typedef hipError_t HIPAPI hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr);
+typedef hipError_t HIPAPI thipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr);
+typedef hipError_t HIPAPI thipStreamCreate(hipStream_t* phStream);
 typedef hipError_t HIPAPI thipStreamCreateWithFlags(hipStream_t* phStream, unsigned int Flags);
 typedef hipError_t HIPAPI thipStreamCreateWithPriority(hipStream_t* phStream, unsigned int flags, int priority);
 typedef hipError_t HIPAPI thipStreamGetPriority(hipStream_t hStream, int* priority);
@ -1189,7 +1283,10 @@ typedef hipError_t HIPAPI thipGraphicsMapResources(unsigned int count, hipGraphi
 typedef hipError_t HIPAPI thipGraphicsUnmapResources(unsigned int count, hipGraphicsResource* resources, hipStream_t hStream);
 typedef hipError_t HIPAPI thipGraphicsGLRegisterBuffer(hipGraphicsResource* pCudaResource, GLuint buffer, unsigned int Flags);
 typedef hipError_t HIPAPI thipGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices, unsigned int hipDeviceCount, hipGLDeviceList deviceList);
-typedef hiprtcResult HIPAPI thiprtcGetErrorString(hiprtcResult result);
+typedef hipError_t HIPAPI thipImportExternalMemory(hipExternalMemory_t* extMem_out, const hipExternalMemoryHandleDesc* memHandleDesc);
+typedef hipError_t HIPAPI thipExternalMemoryGetMappedBuffer(void **devPtr, hipExternalMemory_t extMem, const hipExternalMemoryBufferDesc *bufferDesc);
+typedef hipError_t HIPAPI thipDestroyExternalMemory(hipExternalMemory_t extMem);
+typedef const char* HIPAPI thiprtcGetErrorString(hiprtcResult result);
 typedef hiprtcResult HIPAPI thiprtcAddNameExpression(hiprtcProgram prog, const char* name_expression);
 typedef hiprtcResult HIPAPI thiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options);
 typedef hiprtcResult HIPAPI thiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name, int numHeaders, const char** headers, const char** includeNames);
@ -1197,20 +1294,30 @@ typedef hiprtcResult HIPAPI thiprtcDestroyProgram(hiprtcProgram* prog);
 typedef hiprtcResult HIPAPI thiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression, const char** lowered_name);
 typedef hiprtcResult HIPAPI thiprtcGetProgramLog(hiprtcProgram prog, char* log);
 typedef hiprtcResult HIPAPI thiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet);
+typedef hiprtcResult HIPAPI thiprtcGetBitcode( hiprtcProgram prog, char* bitcode );
+typedef hiprtcResult HIPAPI thiprtcGetBitcodeSize( hiprtcProgram prog, size_t* bitcodeSizeRet );
 typedef hiprtcResult HIPAPI thiprtcGetCode(hiprtcProgram prog, char* code);
 typedef hiprtcResult HIPAPI thiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet);
-
+typedef hiprtcResult HIPAPI thiprtcLinkCreate( unsigned int num_options, hiprtcJIT_option* option_ptr, void** option_vals_pptr, hiprtcLinkState* hip_link_state_ptr );
+typedef hiprtcResult HIPAPI thiprtcLinkAddFile( hiprtcLinkState hip_link_state, hiprtcJITInputType input_type, const char* file_path, unsigned int num_options, hiprtcJIT_option* options_ptr, void** option_values );
+typedef hiprtcResult HIPAPI thiprtcLinkAddData( hiprtcLinkState hip_link_state, hiprtcJITInputType input_type, void* image, size_t image_size, const char* name, unsigned int num_options, hiprtcJIT_option* options_ptr, void** option_values );
+typedef hiprtcResult HIPAPI thiprtcLinkComplete( hiprtcLinkState hip_link_state, void** bin_out, size_t* size_out );
+typedef hiprtcResult HIPAPI thiprtcLinkDestroy( hiprtcLinkState hip_link_state );

 /* Function declarations. */
 extern thipGetErrorName *hipGetErrorName;
+extern thipGetErrorString* hipGetErrorString;
+extern thipGetLastError* hipGetLastError;
 extern thipInit *hipInit;
 extern thipDriverGetVersion *hipDriverGetVersion;
 extern thipGetDevice *hipGetDevice;
 extern thipGetDeviceCount *hipGetDeviceCount;
 extern thipGetDeviceProperties *hipGetDeviceProperties;
-extern thipDeviceGet* hipDeviceGet;
+extern thipDeviceGet *hipDeviceGet;
 extern thipDeviceGetName *hipDeviceGetName;
 extern thipDeviceGetAttribute *hipDeviceGetAttribute;
+extern thipDeviceGetLimit *hipDeviceGetLimit;
+extern thipDeviceSetLimit *hipDeviceSetLimit;
 extern thipDeviceComputeCapability *hipDeviceComputeCapability;
 extern thipDevicePrimaryCtxRetain *hipDevicePrimaryCtxRetain;
 extern thipDevicePrimaryCtxRelease *hipDevicePrimaryCtxRelease;
@ -1246,11 +1353,14 @@ extern thipFree *hipFree;
 extern thipMemGetAddressRange *hipMemGetAddressRange;
 extern thipHostMalloc *hipHostMalloc;
 extern thipHostFree *hipHostFree;
+extern thipHostRegister *hipHostRegister;
 extern thipHostGetDevicePointer *hipHostGetDevicePointer;
 extern thipHostGetFlags *hipHostGetFlags;
+extern thipHostUnregister *hipHostUnregister;
 extern thipMallocManaged *hipMallocManaged;
 extern thipDeviceGetByPCIBusId *hipDeviceGetByPCIBusId;
 extern thipDeviceGetPCIBusId *hipDeviceGetPCIBusId;
+extern thipMemcpy *hipMemcpy;
 extern thipMemcpyPeer *hipMemcpyPeer;
 extern thipMemcpyHtoD *hipMemcpyHtoD;
 extern thipMemcpyDtoH *hipMemcpyDtoH;
@ -1260,8 +1370,10 @@ extern thipMemcpyParam2D *hipMemcpyParam2D;
 extern thipDrvMemcpy3D *hipDrvMemcpy3D;
 extern thipMemcpyHtoDAsync *hipMemcpyHtoDAsync;
 extern thipMemcpyDtoHAsync *hipMemcpyDtoHAsync;
+extern thipMemcpyDtoDAsync *hipMemcpyDtoDAsync;
 extern thipMemcpyParam2DAsync *hipMemcpyParam2DAsync;
 extern thipDrvMemcpy3DAsync *hipDrvMemcpy3DAsync;
+extern thipMemset *hipMemset;
 extern thipMemsetD8 *hipMemsetD8;
 extern thipMemsetD16 *hipMemsetD16;
 extern thipMemsetD32 *hipMemsetD32;
@ -1271,6 +1383,8 @@ extern thipMemsetD32Async *hipMemsetD32Async;
 extern thipArrayCreate *hipArrayCreate;
 extern thipArrayDestroy *hipArrayDestroy;
 extern thipArray3DCreate *hipArray3DCreate;
+extern thipPointerGetAttributes *hipPointerGetAttributes;
+extern thipStreamCreate* hipStreamCreate;
 extern thipStreamCreateWithFlags *hipStreamCreateWithFlags;
 extern thipStreamCreateWithPriority *hipStreamCreateWithPriority;
 extern thipStreamGetPriority *hipStreamGetPriority;
@ -1316,6 +1430,9 @@ extern thipGraphicsUnmapResources *hipGraphicsUnmapResources;

 extern thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
 extern thipGLGetDevices *hipGLGetDevices;
+extern thipImportExternalMemory *hipImportExternalMemory;
+extern thipExternalMemoryGetMappedBuffer *hipExternalMemoryGetMappedBuffer;
+extern thipDestroyExternalMemory *hipDestroyExternalMemory;

 extern thiprtcGetErrorString* hiprtcGetErrorString;
 extern thiprtcAddNameExpression* hiprtcAddNameExpression;
@ -1325,9 +1442,17 @@ extern thiprtcDestroyProgram* hiprtcDestroyProgram;
 extern thiprtcGetLoweredName* hiprtcGetLoweredName;
 extern thiprtcGetProgramLog* hiprtcGetProgramLog;
 extern thiprtcGetProgramLogSize* hiprtcGetProgramLogSize;
+extern thiprtcGetBitcode* hiprtcGetBitcode;
+extern thiprtcGetBitcodeSize* hiprtcGetBitcodeSize;
 extern thiprtcGetCode* hiprtcGetCode;
 extern thiprtcGetCodeSize* hiprtcGetCodeSize;
+extern thiprtcLinkCreate* hiprtcLinkCreate;
+extern thiprtcLinkAddFile* hiprtcLinkAddFile;
+extern thiprtcLinkAddData* hiprtcLinkAddData;
+extern thiprtcLinkComplete* hiprtcLinkComplete;
+extern thiprtcLinkDestroy* hiprtcLinkDestroy;

+/* HIPEW API. */

 enum {
  HIPEW_SUCCESS = 0,
@ -1344,7 +1469,6 @@ int hipewInit(hipuint32_t flags);
 const char *hipewErrorString(hipError_t result);
 const char *hipewCompilerPath(void);
 int hipewCompilerVersion(void);
-int hipewNvrtcVersion(void);

 #ifdef __cplusplus
 }
--- a/extern/hipew/include/hiprtew.h
+++ b/extern/hipew/include/hiprtew.h
@ -0,0 +1,102 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+#ifndef __HIPRTEW_H__
+#define __HIPRTEW_H__
+
+#include <hiprt/hiprt_types.h>
+
+#define HIPRT_MAJOR_VERSION 2
+#define HIPRT_MINOR_VERSION 0
+#define HIPRT_PATCH_VERSION 0xb68861
+
+#define HIPRT_API_VERSION 2000
+#define HIPRT_VERSION_STR "02000"
+
+typedef unsigned int hiprtuint32_t;
+
+/* Function types. */
+typedef hiprtError(thiprtCreateContext)(hiprtuint32_t hiprtApiVersion,
+                                        hiprtContextCreationInput &input,
+                                        hiprtContext *outContext);
+typedef hiprtError(thiprtDestroyContext)(hiprtContext context);
+typedef hiprtError(thiprtCreateGeometry)(hiprtContext context,
+                                         const hiprtGeometryBuildInput *buildInput,
+                                         const hiprtBuildOptions *buildOptions,
+                                         hiprtGeometry *outGeometry);
+typedef hiprtError(thiprtDestroyGeometry)(hiprtContext context,
+                                          hiprtGeometry outGeometry);
+typedef hiprtError(thiprtBuildGeometry)(hiprtContext context,
+                                        hiprtBuildOperation buildOperation,
+                                        const hiprtGeometryBuildInput *buildInput,
+                                        const hiprtBuildOptions *buildOptions,
+                                        hiprtDevicePtr temporaryBuffer,
+                                        hiprtApiStream stream,
+                                        hiprtGeometry outGeometry);
+typedef hiprtError(thiprtGetGeometryBuildTemporaryBufferSize)(
+    hiprtContext context,
+    const hiprtGeometryBuildInput *buildInput,
+    const hiprtBuildOptions *buildOptions,
+    size_t *outSize);
+typedef hiprtError(thiprtCreateScene)(hiprtContext context,
+                                      const hiprtSceneBuildInput *buildInput,
+                                      const hiprtBuildOptions *buildOptions,
+                                      hiprtScene *outScene);
+typedef hiprtError(thiprtDestroyScene)(hiprtContext context, hiprtScene outScene);
+typedef hiprtError(thiprtBuildScene)(hiprtContext context,
+                                     hiprtBuildOperation buildOperation,
+                                     const hiprtSceneBuildInput *buildInput,
+                                     const hiprtBuildOptions *buildOptions,
+                                     hiprtDevicePtr temporaryBuffer,
+                                     hiprtApiStream stream,
+                                     hiprtScene outScene);
+typedef hiprtError(thiprtGetSceneBuildTemporaryBufferSize)(
+    hiprtContext context,
+    const hiprtSceneBuildInput *buildInput,
+    const hiprtBuildOptions *buildOptions,
+    size_t *outSize);
+typedef hiprtError(thiprtCreateFuncTable)(hiprtContext context,
+                                          hiprtuint32_t numGeomTypes,
+                                          hiprtuint32_t numRayTypes,
+                                          hiprtFuncTable *outFuncTable);
+typedef hiprtError(thiprtSetFuncTable)(hiprtContext context,
+                                       hiprtFuncTable funcTable,
+                                       hiprtuint32_t geomType,
+                                       hiprtuint32_t rayType,
+                                        hiprtFuncDataSet set);
+typedef hiprtError(thiprtDestroyFuncTable)(hiprtContext context,
+                                           hiprtFuncTable funcTable);
+
+/* Function declarations. */
+extern thiprtCreateContext *hiprtCreateContext;
+extern thiprtDestroyContext *hiprtDestroyContext;
+extern thiprtCreateGeometry *hiprtCreateGeometry;
+extern thiprtDestroyGeometry *hiprtDestroyGeometry;
+extern thiprtBuildGeometry *hiprtBuildGeometry;
+extern thiprtGetGeometryBuildTemporaryBufferSize *hiprtGetGeometryBuildTemporaryBufferSize;
+extern thiprtCreateScene *hiprtCreateScene;
+extern thiprtDestroyScene *hiprtDestroyScene;
+extern thiprtBuildScene *hiprtBuildScene;
+extern thiprtGetSceneBuildTemporaryBufferSize *hiprtGetSceneBuildTemporaryBufferSize;
+extern thiprtCreateFuncTable *hiprtCreateFuncTable;
+extern thiprtSetFuncTable *hiprtSetFuncTable;
+extern thiprtDestroyFuncTable *hiprtDestroyFuncTable;
+
+/* HIPEW API. */
+
+bool hiprtewInit();
+
+#endif  /* __HIPRTEW_H__ */
--- a/extern/hipew/src/hipew.c
+++ b/extern/hipew/src/hipew.c
@ -1,5 +1,5 @@
 /*
- * Copyright 2011-2021 Blender Foundation
+ * Copyright 2011-2023 Blender Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -13,14 +13,7 @@
 * See the License for the specific language governing permissions and
 * limitations under the License
 */
-#ifdef _MSC_VER
-#  if _MSC_VER < 1900
-#    define snprintf _snprintf
-#  endif
-#  define popen _popen
-#  define pclose _pclose
-#  define _CRT_SECURE_NO_WARNINGS
-#endif
+#include "util.h"

 #include <hipew.h>
 #include <assert.h>
@ -28,44 +21,18 @@
 #include <string.h>
 #include <sys/stat.h>

-#ifdef _WIN32
-#  define WIN32_LEAN_AND_MEAN
-#  define VC_EXTRALEAN
-#  include <windows.h>
-
-/* Utility macros. */
-
-typedef HMODULE DynamicLibrary;
-
-#  define dynamic_library_open(path)         LoadLibraryA(path)
-#  define dynamic_library_close(lib)         FreeLibrary(lib)
-#  define dynamic_library_find(lib, symbol)  GetProcAddress(lib, symbol)
-#else
-#  include <dlfcn.h>
-
-typedef void* DynamicLibrary;
-
-#  define dynamic_library_open(path)         dlopen(path, RTLD_NOW)
-#  define dynamic_library_close(lib)         dlclose(lib)
-#  define dynamic_library_find(lib, symbol)  dlsym(lib, symbol)
-#endif
-
-#define _LIBRARY_FIND_CHECKED(lib, name) \
-        name = (t##name *)dynamic_library_find(lib, #name); \
-        assert(name);
-
-#define _LIBRARY_FIND(lib, name) \
-        name = (t##name *)dynamic_library_find(lib, #name);
+static DynamicLibrary hip_lib;

 #define HIP_LIBRARY_FIND_CHECKED(name) \
-        _LIBRARY_FIND_CHECKED(hip_lib, name)
-#define HIP_LIBRARY_FIND(name) _LIBRARY_FIND(hip_lib, name)
-
-
-static DynamicLibrary hip_lib;
+  name = (t##name *)dynamic_library_find(hip_lib, #name); \
+  assert(name);
+#define HIP_LIBRARY_FIND(name) \
+  name = (t##name *)dynamic_library_find(hip_lib, #name);

 /* Function definitions. */
 thipGetErrorName *hipGetErrorName;
+thipGetErrorString *hipGetErrorString;
+thipGetLastError *hipGetLastError;
 thipInit *hipInit;
 thipDriverGetVersion *hipDriverGetVersion;
 thipGetDevice *hipGetDevice;
@ -74,6 +41,8 @@ thipGetDeviceProperties *hipGetDeviceProperties;
 thipDeviceGet* hipDeviceGet;
 thipDeviceGetName *hipDeviceGetName;
 thipDeviceGetAttribute *hipDeviceGetAttribute;
+thipDeviceGetLimit *hipDeviceGetLimit;
+thipDeviceSetLimit *hipDeviceSetLimit;
 thipDeviceComputeCapability *hipDeviceComputeCapability;
 thipDevicePrimaryCtxRetain *hipDevicePrimaryCtxRetain;
 thipDevicePrimaryCtxRelease *hipDevicePrimaryCtxRelease;
@ -109,11 +78,14 @@ thipFree *hipFree;
 thipMemGetAddressRange *hipMemGetAddressRange;
 thipHostMalloc *hipHostMalloc;
 thipHostFree *hipHostFree;
+thipHostRegister *hipHostRegister;
 thipHostGetDevicePointer *hipHostGetDevicePointer;
 thipHostGetFlags *hipHostGetFlags;
+thipHostUnregister *hipHostUnregister;
 thipMallocManaged *hipMallocManaged;
 thipDeviceGetByPCIBusId *hipDeviceGetByPCIBusId;
 thipDeviceGetPCIBusId *hipDeviceGetPCIBusId;
+thipMemcpy *hipMemcpy;
 thipMemcpyPeer *hipMemcpyPeer;
 thipMemcpyHtoD *hipMemcpyHtoD;
 thipMemcpyDtoH *hipMemcpyDtoH;
@ -123,8 +95,10 @@ thipMemcpyParam2D *hipMemcpyParam2D;
 thipDrvMemcpy3D *hipDrvMemcpy3D;
 thipMemcpyHtoDAsync *hipMemcpyHtoDAsync;
 thipMemcpyDtoHAsync *hipMemcpyDtoHAsync;
+thipMemcpyDtoDAsync *hipMemcpyDtoDAsync;
 thipMemcpyParam2DAsync *hipMemcpyParam2DAsync;
 thipDrvMemcpy3DAsync *hipDrvMemcpy3DAsync;
+thipMemset *hipMemset;
 thipMemsetD8 *hipMemsetD8;
 thipMemsetD16 *hipMemsetD16;
 thipMemsetD32 *hipMemsetD32;
@ -134,6 +108,8 @@ thipMemsetD32Async *hipMemsetD32Async;
 thipArrayCreate *hipArrayCreate;
 thipArrayDestroy *hipArrayDestroy;
 thipArray3DCreate *hipArray3DCreate;
+thipPointerGetAttributes* hipPointerGetAttributes;
+thipStreamCreate* hipStreamCreate;
 thipStreamCreateWithFlags *hipStreamCreateWithFlags;
 thipStreamCreateWithPriority *hipStreamCreateWithPriority;
 thipStreamGetPriority *hipStreamGetPriority;
@ -179,6 +155,9 @@ thipGraphicsResourceGetMappedPointer *hipGraphicsResourceGetMappedPointer;

 thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
 thipGLGetDevices *hipGLGetDevices;
+thipImportExternalMemory *hipImportExternalMemory;
+thipExternalMemoryGetMappedBuffer *hipExternalMemoryGetMappedBuffer;
+thipDestroyExternalMemory *hipDestroyExternalMemory;

 thiprtcGetErrorString* hiprtcGetErrorString;
 thiprtcAddNameExpression* hiprtcAddNameExpression;
@ -188,10 +167,15 @@ thiprtcDestroyProgram* hiprtcDestroyProgram;
 thiprtcGetLoweredName* hiprtcGetLoweredName;
 thiprtcGetProgramLog* hiprtcGetProgramLog;
 thiprtcGetProgramLogSize* hiprtcGetProgramLogSize;
+thiprtcGetBitcode* hiprtcGetBitcode;
+thiprtcGetBitcodeSize* hiprtcGetBitcodeSize;
 thiprtcGetCode* hiprtcGetCode;
 thiprtcGetCodeSize* hiprtcGetCodeSize;
-
-
+thiprtcLinkCreate* hiprtcLinkCreate;
+thiprtcLinkAddFile* hiprtcLinkAddFile;
+thiprtcLinkAddData* hiprtcLinkAddData;
+thiprtcLinkComplete* hiprtcLinkComplete;
+thiprtcLinkDestroy* hiprtcLinkDestroy;

 static DynamicLibrary dynamic_library_open_find(const char **paths) {
  int i = 0;
@ -217,14 +201,14 @@ static void hipewHipExit(void) {
 #ifdef _WIN32
 static int hipewHasOldDriver(const char *hip_path) {
  DWORD verHandle = 0;
-  DWORD verSize = GetFileVersionInfoSize(hip_path, &verHandle);
+  DWORD verSize = GetFileVersionInfoSizeA(hip_path, &verHandle);
  int old_driver = 0;
  if (verSize != 0) {
    LPSTR verData = (LPSTR)malloc(verSize);
-    if (GetFileVersionInfo(hip_path, verHandle, verSize, verData)) {
+    if (GetFileVersionInfoA(hip_path, verHandle, verSize, verData)) {
      LPBYTE lpBuffer = NULL;
      UINT size = 0;
-      if (VerQueryValue(verData, "\\", (VOID FAR * FAR *)&lpBuffer, &size)) {
+      if (VerQueryValueA(verData, "\\", (VOID FAR * FAR *)&lpBuffer, &size)) {
        if (size) {
          VS_FIXEDFILEINFO *verInfo = (VS_FIXEDFILEINFO *)lpBuffer;
          /* Magic value from
@ -247,8 +231,9 @@ static int hipewHasOldDriver(const char *hip_path) {
 static int hipewHipInit(void) {
  /* Library paths. */
 #ifdef _WIN32
-  /* Expected in c:/windows/system or similar, no path needed. */
+  /* Expected in C:/Windows/System32 or similar, no path needed. */
  const char *hip_paths[] = {"amdhip64.dll", NULL};
+
 #elif defined(__APPLE__)
  /* Default installation path. */
  const char *hip_paths[] = {"", NULL};
@ -289,6 +274,8 @@ static int hipewHipInit(void) {

  /* Fetch all function pointers. */
  HIP_LIBRARY_FIND_CHECKED(hipGetErrorName);
+  HIP_LIBRARY_FIND_CHECKED(hipGetErrorString);
+  HIP_LIBRARY_FIND_CHECKED(hipGetLastError);
  HIP_LIBRARY_FIND_CHECKED(hipInit);
  HIP_LIBRARY_FIND_CHECKED(hipDriverGetVersion);
  HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
@ -297,6 +284,8 @@ static int hipewHipInit(void) {
  HIP_LIBRARY_FIND_CHECKED(hipDeviceGet);
  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
+  HIP_LIBRARY_FIND(hipDeviceGetLimit);
+  HIP_LIBRARY_FIND(hipDeviceSetLimit);
  HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRetain);
  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRelease);
@ -332,11 +321,14 @@ static int hipewHipInit(void) {
  HIP_LIBRARY_FIND_CHECKED(hipMemGetAddressRange);
  HIP_LIBRARY_FIND_CHECKED(hipHostMalloc);
  HIP_LIBRARY_FIND_CHECKED(hipHostFree);
+  HIP_LIBRARY_FIND_CHECKED(hipHostRegister);
  HIP_LIBRARY_FIND_CHECKED(hipHostGetDevicePointer);
  HIP_LIBRARY_FIND_CHECKED(hipHostGetFlags);
+  HIP_LIBRARY_FIND_CHECKED(hipHostUnregister);
  HIP_LIBRARY_FIND_CHECKED(hipMallocManaged);
  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetByPCIBusId);
  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetPCIBusId);
+  HIP_LIBRARY_FIND_CHECKED(hipMemcpy);
  HIP_LIBRARY_FIND_CHECKED(hipMemcpyPeer);
  HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoD);
  HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoH);
@ -345,9 +337,11 @@ static int hipewHipInit(void) {
  HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3D);
  HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoDAsync);
  HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoHAsync);
+  HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoDAsync);
  HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy2DUnaligned);
  HIP_LIBRARY_FIND_CHECKED(hipMemcpyParam2DAsync);
  HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3DAsync);
+  HIP_LIBRARY_FIND_CHECKED(hipMemset);
  HIP_LIBRARY_FIND_CHECKED(hipMemsetD8);
  HIP_LIBRARY_FIND_CHECKED(hipMemsetD16);
  HIP_LIBRARY_FIND_CHECKED(hipMemsetD32);
@ -357,6 +351,8 @@ static int hipewHipInit(void) {
  HIP_LIBRARY_FIND_CHECKED(hipArrayCreate);
  HIP_LIBRARY_FIND_CHECKED(hipArrayDestroy);
  HIP_LIBRARY_FIND_CHECKED(hipArray3DCreate);
+  HIP_LIBRARY_FIND_CHECKED(hipPointerGetAttributes);
+  HIP_LIBRARY_FIND_CHECKED(hipStreamCreate);
  HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithFlags);
  HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithPriority);
  HIP_LIBRARY_FIND_CHECKED(hipStreamGetPriority);
@ -399,16 +395,10 @@ static int hipewHipInit(void) {
  HIP_LIBRARY_FIND_CHECKED(hipGraphicsGLRegisterBuffer);
  HIP_LIBRARY_FIND_CHECKED(hipGLGetDevices);
 #endif
-  HIP_LIBRARY_FIND_CHECKED(hiprtcGetErrorString);
-  HIP_LIBRARY_FIND_CHECKED(hiprtcAddNameExpression);
-  HIP_LIBRARY_FIND_CHECKED(hiprtcCompileProgram);
-  HIP_LIBRARY_FIND_CHECKED(hiprtcCreateProgram);
-  HIP_LIBRARY_FIND_CHECKED(hiprtcDestroyProgram);
-  HIP_LIBRARY_FIND_CHECKED(hiprtcGetLoweredName);
-  HIP_LIBRARY_FIND_CHECKED(hiprtcGetProgramLog);
-  HIP_LIBRARY_FIND_CHECKED(hiprtcGetProgramLogSize);
-  HIP_LIBRARY_FIND_CHECKED(hiprtcGetCode);
-  HIP_LIBRARY_FIND_CHECKED(hiprtcGetCodeSize);
+  HIP_LIBRARY_FIND_CHECKED(hipImportExternalMemory);
+  HIP_LIBRARY_FIND_CHECKED(hipExternalMemoryGetMappedBuffer);
+  HIP_LIBRARY_FIND_CHECKED(hipDestroyExternalMemory);
+
  result = HIPEW_SUCCESS;
  return result;
 }
--- a/extern/hipew/src/hiprtew.cc
+++ b/extern/hipew/src/hiprtew.cc
@ -0,0 +1,97 @@
+/*
+ * Copyright 2011-2023 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+#include "util.h"
+
+#include <hiprtew.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+
+static DynamicLibrary hiprt_lib;
+
+#define HIPRT_LIBRARY_FIND(name) \
+  name = (t##name *)dynamic_library_find(hiprt_lib, #name);
+
+/* Function definitions. */
+thiprtCreateContext *hiprtCreateContext;
+thiprtDestroyContext *hiprtDestroyContext;
+thiprtCreateGeometry *hiprtCreateGeometry;
+thiprtDestroyGeometry *hiprtDestroyGeometry;
+thiprtBuildGeometry *hiprtBuildGeometry;
+thiprtGetGeometryBuildTemporaryBufferSize *hiprtGetGeometryBuildTemporaryBufferSize;
+thiprtCreateScene *hiprtCreateScene;
+thiprtDestroyScene *hiprtDestroyScene;
+thiprtBuildScene *hiprtBuildScene;
+thiprtGetSceneBuildTemporaryBufferSize *hiprtGetSceneBuildTemporaryBufferSize;
+thiprtCreateFuncTable *hiprtCreateFuncTable;
+thiprtSetFuncTable *hiprtSetFuncTable;
+thiprtDestroyFuncTable *hiprtDestroyFuncTable;
+
+static void hipewHipRtExit(void)
+{
+  if (hiprt_lib != NULL) {
+    /* Ignore errors. */
+    dynamic_library_close(hiprt_lib);
+    hiprt_lib = NULL;
+  }
+}
+
+bool hiprtewInit()
+{
+  static bool result = false;
+  static bool initialized = false;
+
+  if (initialized) {
+    return result;
+  }
+
+#ifdef _WIN32
+  initialized = true;
+
+  if (atexit(hipewHipRtExit)) {
+    return false;
+  }
+
+  std::string hiprt_ver(HIPRT_VERSION_STR);
+  std::string hiprt_path = "hiprt" + hiprt_ver + "64.dll";
+
+  hiprt_lib = dynamic_library_open(hiprt_path.c_str());
+
+  if (hiprt_lib == NULL) {
+    return false;
+  }
+
+  HIPRT_LIBRARY_FIND(hiprtCreateContext)
+  HIPRT_LIBRARY_FIND(hiprtDestroyContext)
+  HIPRT_LIBRARY_FIND(hiprtCreateGeometry)
+  HIPRT_LIBRARY_FIND(hiprtDestroyGeometry)
+  HIPRT_LIBRARY_FIND(hiprtBuildGeometry)
+  HIPRT_LIBRARY_FIND(hiprtGetGeometryBuildTemporaryBufferSize)
+  HIPRT_LIBRARY_FIND(hiprtCreateScene)
+  HIPRT_LIBRARY_FIND(hiprtDestroyScene)
+  HIPRT_LIBRARY_FIND(hiprtBuildScene)
+  HIPRT_LIBRARY_FIND(hiprtGetSceneBuildTemporaryBufferSize)
+  HIPRT_LIBRARY_FIND(hiprtCreateFuncTable)
+  HIPRT_LIBRARY_FIND(hiprtSetFuncTable)
+  HIPRT_LIBRARY_FIND(hiprtDestroyFuncTable)
+
+  result = true;
+#endif
+
+  return result;
+}
--- a/extern/hipew/src/util.h
+++ b/extern/hipew/src/util.h
@ -0,0 +1,51 @@
+/*
+ * Copyright 2011-2023 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+#pragma once
+
+/* Portable snprintf and popen/pclose. */
+
+#ifdef _MSC_VER
+#  if _MSC_VER < 1900
+#    define snprintf _snprintf
+#  endif
+#  define popen _popen
+#  define pclose _pclose
+#  define _CRT_SECURE_NO_WARNINGS
+#endif
+
+/* Macros for loading libraries. */
+
+#ifdef _WIN32
+#  define WIN32_LEAN_AND_MEAN
+#  define VC_EXTRALEAN
+#  include <windows.h>
+
+typedef HMODULE DynamicLibrary;
+
+#  define dynamic_library_open(path)         LoadLibraryA(path)
+#  define dynamic_library_close(lib)         FreeLibrary(lib)
+#  define dynamic_library_find(lib, symbol)  GetProcAddress(lib, symbol)
+#else
+#  include <dlfcn.h>
+
+typedef void* DynamicLibrary;
+
+#  define dynamic_library_open(path)         dlopen(path, RTLD_NOW)
+#  define dynamic_library_close(lib)         dlclose(lib)
+#  define dynamic_library_find(lib, symbol)  dlsym(lib, symbol)
+#endif
+
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@ -249,6 +249,13 @@ endif()
 if(WITH_CYCLES_DEVICE_HIP)
  add_definitions(-DWITH_HIP)

+  if(WITH_CYCLES_DEVICE_HIPRT)
+    include_directories(
+      ${HIPRT_INCLUDE_DIR}
+    )
+    add_definitions(-DWITH_HIPRT)
+  endif()
+
  if(WITH_HIP_DYNLOAD)
    include_directories(
      ../../extern/hipew/include
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@ -1507,7 +1507,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):

    def get_device_types(self, context):
        import _cycles
-        has_cuda, has_optix, has_hip, has_metal, has_oneapi = _cycles.get_device_types()
+        has_cuda, has_optix, has_hip, has_metal, has_oneapi, has_hiprt = _cycles.get_device_types()

        list = [('NONE', "None", "Don't use compute device", 0)]
        if has_cuda:
@ -1544,6 +1544,13 @@ class CyclesPreferences(bpy.types.AddonPreferences):
        default=False,
    )

+    use_hiprt: BoolProperty(
+        name="HIP RT (Experimental)",
+        description="HIP RT enables AMD hardware ray tracing on RDNA2 and above, with shader fallback on older cards. "
+                    "This feature is experimental and some scenes may render incorrectly",
+        default=False,
+    )
+
    use_oneapirt: BoolProperty(
        name="Embree on GPU (Experimental)",
        description="Embree GPU execution will allow to use hardware ray tracing on Intel GPUs, which will provide better performance. "
@ -1770,7 +1777,13 @@ class CyclesPreferences(bpy.types.AddonPreferences):
                    col.prop(self, "kernel_optimization_level")
                col.prop(self, "use_metalrt")

-        if compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
+        if compute_device_type == 'HIP':
+            has_cuda, has_optix, has_hip, has_metal, has_oneapi, has_hiprt = _cycles.get_device_types()
+            row = layout.row()
+            row.enabled = has_hiprt
+            row.prop(self, "use_hiprt")
+
+        elif compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
            row = layout.row()
            row.use_property_split = True
            row.prop(self, "use_oneapirt")
--- a/intern/cycles/blender/device.cpp
+++ b/intern/cycles/blender/device.cpp
@ -124,6 +124,10 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences,
      info.use_hardware_raytracing = false;
    }

+    if (info.type == DEVICE_HIP && !get_boolean(cpreferences, "use_hiprt")) {
+      info.use_hardware_raytracing = false;
+    }
+
    /* There is an accumulative logic here, because Multi-devices are support only for
     * the same backend + CPU in Blender right now, and both oneAPI and Metal have a
     * global boolean backend setting (see above) for enabling/disabling HW RT,
--- a/intern/cycles/blender/python.cpp
+++ b/intern/cycles/blender/python.cpp
@ -876,20 +876,23 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
 static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
 {
  vector<DeviceType> device_types = Device::available_types();
-  bool has_cuda = false, has_optix = false, has_hip = false, has_metal = false, has_oneapi = false;
+  bool has_cuda = false, has_optix = false, has_hip = false, has_metal = false, has_oneapi = false,
+       has_hiprt = false;
  foreach (DeviceType device_type, device_types) {
    has_cuda |= (device_type == DEVICE_CUDA);
    has_optix |= (device_type == DEVICE_OPTIX);
    has_hip |= (device_type == DEVICE_HIP);
    has_metal |= (device_type == DEVICE_METAL);
    has_oneapi |= (device_type == DEVICE_ONEAPI);
+    has_hiprt |= (device_type == DEVICE_HIPRT);
  }
-  PyObject *list = PyTuple_New(5);
+  PyObject *list = PyTuple_New(6);
  PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
  PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
  PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
  PyTuple_SET_ITEM(list, 3, PyBool_FromLong(has_metal));
  PyTuple_SET_ITEM(list, 4, PyBool_FromLong(has_oneapi));
+  PyTuple_SET_ITEM(list, 5, PyBool_FromLong(has_hiprt));
  return list;
 }

--- a/intern/cycles/bvh/CMakeLists.txt
+++ b/intern/cycles/bvh/CMakeLists.txt
@ -14,6 +14,7 @@ set(SRC
  binning.cpp
  build.cpp
  embree.cpp
+  hiprt.cpp
  multi.cpp
  node.cpp
  optix.cpp
@ -39,6 +40,7 @@ set(SRC_HEADERS
  binning.h
  build.h
  embree.h
+  hiprt.h
  multi.h
  node.h
  optix.h
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@ -6,6 +6,7 @@

 #include "bvh/bvh2.h"
 #include "bvh/embree.h"
+#include "bvh/hiprt.h"
 #include "bvh/metal.h"
 #include "bvh/multi.h"
 #include "bvh/optix.h"
@ -30,10 +31,14 @@ const char *bvh_layout_name(BVHLayout layout)
      return "OPTIX";
    case BVH_LAYOUT_METAL:
      return "METAL";
+    case BVH_LAYOUT_HIPRT:
+      return "HIPRT";
    case BVH_LAYOUT_MULTI_OPTIX:
    case BVH_LAYOUT_MULTI_METAL:
+    case BVH_LAYOUT_MULTI_HIPRT:
    case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
    case BVH_LAYOUT_MULTI_METAL_EMBREE:
+    case BVH_LAYOUT_MULTI_HIPRT_EMBREE:
      return "MULTI";
    case BVH_LAYOUT_ALL:
      return "ALL";
@ -101,11 +106,20 @@ BVH *BVH::create(const BVHParams &params,
 #else
      (void)device;
      break;
+#endif
+    case BVH_LAYOUT_HIPRT:
+#ifdef WITH_HIPRT
+      return new BVHHIPRT(params, geometry, objects, device);
+#else
+      (void)device;
+      break;
 #endif
    case BVH_LAYOUT_MULTI_OPTIX:
    case BVH_LAYOUT_MULTI_METAL:
+    case BVH_LAYOUT_MULTI_HIPRT:
    case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
    case BVH_LAYOUT_MULTI_METAL_EMBREE:
+    case BVH_LAYOUT_MULTI_HIPRT_EMBREE:
      return new BVHMulti(params, geometry, objects);
    case BVH_LAYOUT_NONE:
    case BVH_LAYOUT_ALL:
--- a/intern/cycles/bvh/hiprt.cpp
+++ b/intern/cycles/bvh/hiprt.cpp
@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2023 Blender Foundation */
+
+#ifdef WITH_HIPRT
+
+#  include "bvh/hiprt.h"
+
+#  include "scene/mesh.h"
+#  include "scene/object.h"
+
+#  include "util/foreach.h"
+#  include "util/progress.h"
+
+#  include "device/hiprt/device_impl.h"
+
+CCL_NAMESPACE_BEGIN
+
+BVHHIPRT::BVHHIPRT(const BVHParams &params,
+                   const vector<Geometry *> &geometry,
+                   const vector<Object *> &objects,
+                   Device *in_device)
+    : BVH(params, geometry, objects),
+      hiprt_geom(0),
+      custom_primitive_bound(in_device, "Custom Primitive Bound", MEM_READ_ONLY),
+      triangle_index(in_device, "HIPRT Triangle Index", MEM_READ_ONLY),
+      vertex_data(in_device, "vertex_data", MEM_READ_ONLY),
+      device(in_device)
+{
+  triangle_mesh = {0};
+  custom_prim_aabb = {0};
+}
+
+BVHHIPRT::~BVHHIPRT()
+{
+  HIPRTDevice *hiprt_device = static_cast<HIPRTDevice *>(device);
+  hiprtContext hiprt_context = hiprt_device->get_hiprt_context();
+  custom_primitive_bound.free();
+  triangle_index.free();
+  vertex_data.free();
+  hiprtDestroyGeometry(hiprt_context, hiprt_geom);
+}
+
+CCL_NAMESPACE_END
+
+#endif
--- a/intern/cycles/bvh/hiprt.h
+++ b/intern/cycles/bvh/hiprt.h
@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2023 Blender Foundation */
+
+#ifdef WITH_HIPRT
+
+#  pragma once
+
+#  include "bvh/bvh.h"
+#  include "bvh/params.h"
+
+#  ifdef WITH_HIP_DYNLOAD
+#    include "hiprtew.h"
+#  else
+#    include <hiprt/hiprt_types.h>
+#  endif
+
+#  include "device/memory.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BVHHIPRT : public BVH {
+ public:
+  friend class HIPDevice;
+
+  bool is_tlas()
+  {
+    return params.top_level;
+  }
+
+  hiprtGeometry hiprt_geom;
+  hiprtTriangleMeshPrimitive triangle_mesh;
+  hiprtAABBListPrimitive custom_prim_aabb;
+  hiprtGeometryBuildInput geom_input;
+
+  vector<int2> custom_prim_info; /* x: prim_id, y: prim_type */
+  vector<float2> prims_time;
+
+  /* Custom primitives. */
+  device_vector<BoundBox> custom_primitive_bound;
+  device_vector<int> triangle_index;
+  device_vector<float> vertex_data;
+
+ protected:
+  friend class BVH;
+  BVHHIPRT(const BVHParams &params,
+           const vector<Geometry *> &geometry,
+           const vector<Object *> &objects,
+           Device *in_device);
+
+  virtual ~BVHHIPRT();
+
+ private:
+  Device *device;
+};
+
+CCL_NAMESPACE_END
+
+#endif
--- a/intern/cycles/cmake/external_libs.cmake
+++ b/intern/cycles/cmake/external_libs.cmake
@ -41,20 +41,32 @@ endif()
 # HIP
 ###########################################################################

-if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
-  if(UNIX)
-    # Disabled until there is a HIP 5.5 release for Linux.
-    set(WITH_CYCLES_HIP_BINARIES OFF)
-    message(STATUS "HIP temporarily disabled due to compiler bugs")
-  else()
-    # Need at least HIP 5.5 to solve compiler bug affecting the kernel.
-    find_package(HIP 5.5.0)
-    set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
+if(WITH_CYCLES_DEVICE_HIP)
+  if(WITH_CYCLES_HIP_BINARIES)
+    if(UNIX)
+      # Disabled until there is a HIP 5.5 release for Linux.
+      set(WITH_CYCLES_HIP_BINARIES OFF)
+      message(STATUS "HIP temporarily disabled due to compiler bugs")
+    else()
+      # Need at least HIP 5.5 to solve compiler bug affecting the kernel.
+      find_package(HIP 5.5.0)
+      set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)

-    if(HIP_FOUND)
-      message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
+      if(HIP_FOUND)
+        message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
+      endif()
    endif()
  endif()
+
+  # HIP RT
+  if(WITH_CYCLES_DEVICE_HIP AND WITH_CYCLES_DEVICE_HIPRT)
+    find_package(HIPRT)
+    set_and_warn_library_found("HIP RT" HIPRT_FOUND WITH_CYCLES_DEVICE_HIPRT)
+  endif()
+endif()
+
+if(NOT WITH_CYCLES_DEVICE_HIP)
+  set(WITH_CYCLES_DEVICE_HIPRT OFF)
 endif()

 if(NOT WITH_HIP_DYNLOAD)
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@ -66,6 +66,13 @@ set(SRC_HIP
  hip/util.h
 )

+set(SRC_HIPRT
+  hiprt/device_impl.cpp
+  hiprt/device_impl.h
+  hiprt/queue.cpp
+  hiprt/queue.h
+)
+
 set(SRC_ONEAPI
  oneapi/device_impl.cpp
  oneapi/device_impl.h
@ -124,6 +131,7 @@ set(SRC
  ${SRC_CPU}
  ${SRC_CUDA}
  ${SRC_HIP}
+  ${SRC_HIPRT}
  ${SRC_DUMMY}
  ${SRC_MULTI}
  ${SRC_OPTIX}
@ -209,6 +217,7 @@ source_group("cpu" FILES ${SRC_CPU})
 source_group("cuda" FILES ${SRC_CUDA})
 source_group("dummy" FILES ${SRC_DUMMY})
 source_group("hip" FILES ${SRC_HIP})
+source_group("hiprt" FILES ${SRC_HIPRT})
 source_group("multi" FILES ${SRC_MULTI})
 source_group("metal" FILES ${SRC_METAL})
 source_group("optix" FILES ${SRC_OPTIX})
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@ -14,6 +14,7 @@
 #include "device/cuda/device.h"
 #include "device/dummy/device.h"
 #include "device/hip/device.h"
+#include "device/hiprt/device_impl.h"
 #include "device/metal/device.h"
 #include "device/multi/device.h"
 #include "device/oneapi/device.h"
@ -135,6 +136,8 @@ DeviceType Device::type_from_string(const char *name)
    return DEVICE_METAL;
  else if (strcmp(name, "ONEAPI") == 0)
    return DEVICE_ONEAPI;
+  else if (strcmp(name, "HIPRT") == 0)
+    return DEVICE_HIPRT;

  return DEVICE_NONE;
 }
@ -155,6 +158,8 @@ string Device::string_from_type(DeviceType type)
    return "METAL";
  else if (type == DEVICE_ONEAPI)
    return "ONEAPI";
+  else if (type == DEVICE_HIPRT)
+    return "HIPRT";

  return "";
 }
@ -177,6 +182,10 @@ vector<DeviceType> Device::available_types()
 #endif
 #ifdef WITH_ONEAPI
  types.push_back(DEVICE_ONEAPI);
+#endif
+#ifdef WITH_HIPRT
+  if (hiprtewInit())
+    types.push_back(DEVICE_HIPRT);
 #endif
  return types;
 }
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@ -40,6 +40,7 @@ enum DeviceType {
  DEVICE_MULTI,
  DEVICE_OPTIX,
  DEVICE_HIP,
+  DEVICE_HIPRT,
  DEVICE_METAL,
  DEVICE_ONEAPI,
  DEVICE_DUMMY,
@ -79,8 +80,7 @@ class DeviceInfo {
  bool has_profiling;           /* Supports runtime collection of profiling info. */
  bool has_peer_memory;         /* GPU has P2P access to memory of another GPU. */
  bool has_gpu_queue;           /* Device supports GPU queue. */
-  bool use_hardware_raytracing; /* Use hardware ray tracing to accelerate ray queries in a backend.
-                                 */
+  bool use_hardware_raytracing; /* Use hardware instructions to accelerate ray tracing. */
  KernelOptimizationLevel kernel_optimization_level; /* Optimization level applied to path tracing
                                                      * kernels (Metal only). */
  DenoiserTypeMask denoisers;                        /* Supported denoiser types. */
--- a/intern/cycles/device/hip/device.cpp
+++ b/intern/cycles/device/hip/device.cpp
@ -13,6 +13,10 @@
 #  include "util/windows.h"
 #endif /* WITH_HIP */

+#ifdef WITH_HIPRT
+#  include "device/hiprt/device_impl.h"
+#endif
+
 CCL_NAMESPACE_BEGIN

 bool device_hip_init()
@ -65,7 +69,12 @@ bool device_hip_init()

 Device *device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
 {
-#ifdef WITH_HIP
+#ifdef WITH_HIPRT
+  if (info.use_hardware_raytracing)
+    return new HIPRTDevice(info, stats, profiler);
+  else
+    return new HIPDevice(info, stats, profiler);
+#elif defined(WITH_HIP)
  return new HIPDevice(info, stats, profiler);
 #else
  (void)info;
@ -115,6 +124,12 @@ void device_hip_info(vector<DeviceInfo> &devices)
    return;
  }

+#  ifdef WITH_HIPRT
+  const bool has_hardware_raytracing = hiprtewInit();
+#  else
+  const bool has_hardware_raytracing = false;
+#  endif
+
  vector<DeviceInfo> display_devices;

  for (int num = 0; num < count; num++) {
@ -150,6 +165,8 @@ void device_hip_info(vector<DeviceInfo> &devices)
      }
    }

+    info.use_hardware_raytracing = has_hardware_raytracing;
+
    int pci_location[3] = {0, 0, 0};
    hipDeviceGetAttribute(&pci_location[0], hipDeviceAttributePciDomainID, num);
    hipDeviceGetAttribute(&pci_location[1], hipDeviceAttributePciBusId, num);
@ -176,6 +193,7 @@ void device_hip_info(vector<DeviceInfo> &devices)
      VLOG_INFO << "Device has compute preemption or is not used for display.";
      devices.push_back(info);
    }
+
    VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
  }

--- a/intern/cycles/device/hip/device_impl.h
+++ b/intern/cycles/device/hip/device_impl.h
@ -1,6 +1,8 @@
 /* SPDX-License-Identifier: Apache-2.0
 * Copyright 2011-2022 Blender Foundation */

+#pragma once
+
 #ifdef WITH_HIP

 #  include "device/device.h"
@ -49,9 +51,11 @@ class HIPDevice : public GPUDevice {

  bool use_adaptive_compilation();

-  string compile_kernel_get_common_cflags(const uint kernel_features);
+  virtual string compile_kernel_get_common_cflags(const uint kernel_features);

-  string compile_kernel(const uint kernel_features, const char *name, const char *base = "hip");
+  virtual string compile_kernel(const uint kernel_features,
+                                const char *name,
+                                const char *base = "hip");

  virtual bool load_kernels(const uint kernel_features) override;
  void reserve_local_memory(const uint kernel_features);
--- a/intern/cycles/device/hiprt/device_impl.cpp
+++ b/intern/cycles/device/hiprt/device_impl.cpp
--- a/intern/cycles/device/hiprt/device_impl.h
+++ b/intern/cycles/device/hiprt/device_impl.h
@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2023 Blender Foundation */
+
+#pragma once
+
+#ifdef WITH_HIPRT
+
+#  include "device/hip/device_impl.h"
+#  include "device/hip/kernel.h"
+#  include "device/hip/queue.h"
+#  include "device/hiprt/queue.h"
+
+#  ifdef WITH_HIP_DYNLOAD
+#    include "hiprtew.h"
+#  else
+#    include <hiprt/hiprt_types.h>
+#  endif
+
+#  include "kernel/device/hiprt/globals.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Mesh;
+class Hair;
+class PointCloud;
+class Geometry;
+class Object;
+class BVHHIPRT;
+
+class HIPRTDevice : public HIPDevice {
+
+ public:
+  virtual BVHLayoutMask get_bvh_layout_mask(const uint kernel_features) const override;
+
+  HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
+
+  virtual ~HIPRTDevice();
+  virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
+  string compile_kernel_get_common_cflags(const uint kernel_features) override;
+  virtual string compile_kernel(const uint kernel_features,
+                                const char *name,
+                                const char *base = "hiprt") override;
+
+  virtual bool load_kernels(const uint kernel_features) override;
+
+  virtual void const_copy_to(const char *name, void *host, size_t size) override;
+
+  virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
+
+  hiprtContext get_hiprt_context()
+  {
+    return hiprt_context;
+  }
+
+  device_vector<int> global_stack_buffer;
+
+ protected:
+  enum Filter_Function { Closest = 0, Shadows, Local, Volume, Max_Intersect_Filter_Function };
+  enum Primitive_Type { Triangle = 0, Curve, Motion_Triangle, Point, Max_Primitive_Type };
+
+  hiprtGeometryBuildInput prepare_triangle_blas(BVHHIPRT *bvh, Mesh *mesh);
+  hiprtGeometryBuildInput prepare_curve_blas(BVHHIPRT *bvh, Hair *hair);
+  hiprtGeometryBuildInput prepare_point_blas(BVHHIPRT *bvh, PointCloud *pointcloud);
+  void build_blas(BVHHIPRT *bvh, Geometry *geom, hiprtBuildOptions options);
+  hiprtScene build_tlas(BVHHIPRT *bvh,
+                        vector<Object *> objects,
+                        hiprtBuildOptions options,
+                        bool refit);
+
+  hiprtContext hiprt_context;
+  hiprtScene scene;
+  hiprtFuncTable functions_table;
+
+  thread_mutex hiprt_mutex;
+  size_t scratch_buffer_size;
+  device_vector<char> scratch_buffer;
+
+  /* The following vectors are to transfer scene information available on the host to the GPU
+   * visibility, instance_transform_matrix, transform_headers, and hiprt_blas_ptr are passed to
+   * hiprt to build bvh the rest are directly used in traversal functions/intersection kernels and
+   * are defined on the GPU side as members of KernelParamsHIPRT struct the host memory is copied
+   * to GPU through const_copy_to() function. */
+
+  device_vector<uint32_t> visibility;
+
+  /* instance_transform_matrix passes transform matrix of instances converted from Cycles Transform
+   * format to instanceFrames member of hiprtSceneBuildInput. */
+  device_vector<hiprtFrameMatrix> instance_transform_matrix;
+  /* Movement over a time interval for motion blur is captured through multiple transform matrices.
+   * In this case transform matrix of an instance cannot be directly retrieved by looking up
+   * instance_transform_matrix give the instance id. transform_headers maps the instance id to the
+   * appropriate index to retrieve instance transform matrix (frameIndex member of
+   * hiprtTransformHeader). transform_headers also has the information on how many transform
+   * matrices are associated with an instance (frameCount member of hiprtTransformHeader)
+   * transform_headers is passed to hiprt through instanceTransformHeaders member of
+   * hiprtSceneBuildInput. */
+  device_vector<hiprtTransformHeader> transform_headers;
+
+  /* Instance/object ids are not explicitly  passed to hiprt.
+   * HIP RT assigns the ids based on the order blas pointers are passed to it (through
+   * instanceGeometries member of hiprtSceneBuildInput). If blas is absent for a particular
+   * geometry (e.g. a plane), HIP RT removes that entry and in scenes with objects with no blas,
+   * the instance id that hiprt returns for a hit point will not necessarily match the instance id
+   * of the application. user_instance_id provides a map for retrieving original instance id from
+   * what HIP RT returns as instance id. hiprt_blas_ptr is the list of all the valid blas pointers.
+   * blas_ptr has all the valid pointers and null pointers and blas for any geometry can be
+   * directly retrieved from this array (used in subsurface scattering). */
+  device_vector<int> user_instance_id;
+  device_vector<uint64_t> hiprt_blas_ptr;
+  device_vector<uint64_t> blas_ptr;
+
+  /* custom_prim_info stores custom information for custom primitives for all the primitives in a
+   * scene. Primitive id that HIP RT returns is local to the geometry that was hit.
+   * custom_prim_info_offset returns the offset required to add to the primitive id to retrieve
+   * primitive info from custom_prim_info. */
+  device_vector<int2> custom_prim_info;
+  device_vector<int2> custom_prim_info_offset;
+
+  /* prims_time stores primitive time for geometries with motion blur.
+   * prim_time_offset returns the offset to add to primitive id to retrieve primitive time. */
+  device_vector<float2> prims_time;
+  device_vector<int> prim_time_offset;
+};
+CCL_NAMESPACE_END
+
+#endif
--- a/intern/cycles/device/hiprt/queue.cpp
+++ b/intern/cycles/device/hiprt/queue.cpp
@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#ifdef WITH_HIPRT
+
+#  include "device/hiprt/queue.h"
+
+#  include "device/hip/graphics_interop.h"
+#  include "device/hip/kernel.h"
+#  include "device/hiprt/device_impl.h"
+
+CCL_NAMESPACE_BEGIN
+
+HIPRTDeviceQueue::HIPRTDeviceQueue(HIPRTDevice *device)
+    : HIPDeviceQueue((HIPDevice *)device), hiprt_device_(device)
+{
+}
+
+bool HIPRTDeviceQueue::enqueue(DeviceKernel kernel,
+                               const int work_size,
+                               DeviceKernelArguments const &args)
+{
+  if (hiprt_device_->have_error()) {
+    return false;
+  }
+
+  if (!device_kernel_has_intersection(kernel)) {
+    return HIPDeviceQueue::enqueue(kernel, work_size, args);
+  }
+
+  debug_enqueue_begin(kernel, work_size);
+
+  const HIPContextScope scope(hiprt_device_);
+  const HIPDeviceKernel &hip_kernel = hiprt_device_->kernels.get(kernel);
+
+  if (!hiprt_device_->global_stack_buffer.device_pointer) {
+    int max_path = num_concurrent_states(0);
+    hiprt_device_->global_stack_buffer.alloc(max_path * HIPRT_SHARED_STACK_SIZE * sizeof(int));
+    hiprt_device_->global_stack_buffer.zero_to_device();
+  }
+
+  DeviceKernelArguments args_copy = args;
+  args_copy.add(&hiprt_device_->global_stack_buffer.device_pointer);
+
+  /* Compute kernel launch parameters. */
+  const int num_threads_per_block = HIPRT_THREAD_GROUP_SIZE;
+  const int num_blocks = divide_up(work_size, num_threads_per_block);
+  int shared_mem_bytes = 0;
+
+  assert_success(hipModuleLaunchKernel(hip_kernel.function,
+                                       num_blocks,
+                                       1,
+                                       1,
+                                       num_threads_per_block,
+                                       1,
+                                       1,
+                                       shared_mem_bytes,
+                                       hip_stream_,
+                                       const_cast<void **>(args_copy.values),
+                                       0),
+                 "enqueue");
+
+  return !(hiprt_device_->have_error());
+}
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_HIPRT */
--- a/intern/cycles/device/hiprt/queue.h
+++ b/intern/cycles/device/hiprt/queue.h
@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+#ifdef WITH_HIPRT
+
+#  include "device/kernel.h"
+#  include "device/memory.h"
+#  include "device/queue.h"
+
+#  include "device/hip/queue.h"
+#  include "device/hip/util.h"
+
+CCL_NAMESPACE_BEGIN
+
+class HIPRTDevice;
+
+class HIPRTDeviceQueue : public HIPDeviceQueue {
+ public:
+  HIPRTDeviceQueue(HIPRTDevice *device);
+  ~HIPRTDeviceQueue() {}
+  virtual bool enqueue(DeviceKernel kernel,
+                       const int work_size,
+                       DeviceKernelArguments const &args) override;
+
+ protected:
+  HIPRTDevice *hiprt_device_;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_HIPRT */
--- a/intern/cycles/device/multi/device.cpp
+++ b/intern/cycles/device/multi/device.cpp
@ -117,6 +117,10 @@ class MultiDevice : public Device {
      return BVH_LAYOUT_MULTI_METAL;
    }

+    if (bvh_layout_mask == BVH_LAYOUT_HIPRT) {
+      return BVH_LAYOUT_MULTI_HIPRT;
+    }
+
    /* When devices do not share a common BVH layout, fall back to creating one for each */
    const BVHLayoutMask BVH_LAYOUT_OPTIX_EMBREE = (BVH_LAYOUT_OPTIX | BVH_LAYOUT_EMBREE);
    if ((bvh_layout_mask_all & BVH_LAYOUT_OPTIX_EMBREE) == BVH_LAYOUT_OPTIX_EMBREE) {
@ -158,8 +162,10 @@ class MultiDevice : public Device {

    assert(bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX ||
           bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL ||
+           bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT ||
           bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE ||
-           bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE);
+           bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE ||
+           bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT_EMBREE);

    BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh);
    bvh_multi->sub_bvhs.resize(devices.size());
@ -184,12 +190,17 @@ class MultiDevice : public Device {
          params.bvh_layout = BVH_LAYOUT_OPTIX;
        else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL)
          params.bvh_layout = BVH_LAYOUT_METAL;
+        else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT)
+          params.bvh_layout = BVH_LAYOUT_HIPRT;
        else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE)
          params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
                                                                      BVH_LAYOUT_EMBREE;
        else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE)
          params.bvh_layout = sub.device->info.type == DEVICE_METAL ? BVH_LAYOUT_METAL :
                                                                      BVH_LAYOUT_EMBREE;
+        else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT_EMBREE)
+          params.bvh_layout = sub.device->info.type == DEVICE_HIPRT ? BVH_LAYOUT_HIPRT :
+                                                                      BVH_LAYOUT_EMBREE;

        /* Skip building a bottom level acceleration structure for non-instanced geometry on Embree
         * (since they are put into the top level directly, see bvh_embree.cpp) */
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@ -1149,6 +1149,8 @@ static const char *device_type_for_description(const DeviceType type)
      return "OptiX";
    case DEVICE_HIP:
      return "HIP";
+    case DEVICE_HIPRT:
+      return "HIPRT";
    case DEVICE_ONEAPI:
      return "oneAPI";
    case DEVICE_DUMMY:
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@ -26,6 +26,10 @@ set(SRC_KERNEL_DEVICE_HIP
  device/hip/kernel.cpp
 )

+set(SRC_KERNEL_DEVICE_HIPRT
+  device/hiprt/kernel.cpp
+)
+
 set(SRC_KERNEL_DEVICE_METAL
  device/metal/kernel.metal
 )
@ -77,6 +81,13 @@ set(SRC_KERNEL_DEVICE_HIP_HEADERS
  device/hip/globals.h
 )

+set(SRC_KERNEL_DEVICE_HIPRT_HEADERS
+  device/hiprt/bvh.h
+  device/hiprt/common.h
+  device/hiprt/globals.h
+  device/hiprt/hiprt_kernels.h
+)
+
 set(SRC_KERNEL_DEVICE_OPTIX_HEADERS
  device/optix/bvh.h
  device/optix/compat.h
@ -643,6 +654,86 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
  cycles_set_solution_folder(cycles_kernel_hip)
 endif()

+# HIP RT module
+
+if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
+  set(hiprt_sources device/hiprt/kernel.cpp
+    ${SRC_KERNEL_HEADERS}
+    ${SRC_KERNEL_DEVICE_GPU_HEADERS}
+    ${SRC_KERNEL_DEVICE_HIPRT_HEADERS}
+    ${SRC_UTIL_HEADERS})
+  set(bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc)
+  set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.hipfb)
+  set(kernel_sources ${sources})
+  set(hiprt_kernel_src "/device/hiprt/kernel.cpp")
+  if(WIN32)
+    set(hiprt_compile_command ${CMAKE_COMMAND})
+    set(hiprt_compile_flags
+      -E env "HIP_PATH=${HIP_ROOT_DIR}"
+      ${HIP_HIPCC_EXECUTABLE}.bat)
+  else()
+    set(hiprt_compile_command ${HIP_HIPCC_EXECUTABLE})
+    set(hiprt_compile_flags)
+  endif()
+  set(target_gpus)
+  foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
+    list(APPEND target_gpus "--offload-arch=${arch}")
+  endforeach()
+  set(hiprt_compile_flags
+    ${hiprt_compile_flags}
+    ${target_gpus}
+    ${HIP_HIPCC_FLAGS}
+    ${CMAKE_CURRENT_SOURCE_DIR}${hiprt_kernel_src}
+    ${flags}
+    -D CCL_NAMESPACE_BEGIN=
+    -D CCL_NAMESPACE_END=
+    -D HIPCC
+    -D __HIPRT__
+    -std=c++17
+    -fgpu-rdc
+    -c
+    --gpu-bundle-output
+    -emit-llvm
+    -I ${CMAKE_CURRENT_SOURCE_DIR}/..
+    -I ${CMAKE_CURRENT_SOURCE_DIR}/device/hiprt
+    -I ${HIPRT_INCLUDE_DIR}
+    -Wno-parentheses-equality
+    -Wno-unused-value
+    --hipcc-func-supp
+    -ffast-math
+    -o ${bitcode_file})
+  if(WITH_CYCLES_DEBUG)
+    set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_CYCLES_DEBUG)
+  endif()
+  add_custom_command(
+    OUTPUT  ${bitcode_file}
+    COMMAND ${hiprt_compile_command} ${hiprt_compile_flags}
+    DEPENDS ${kernel_sources})
+  if(WIN32)
+    set(hiprt_link_command ${CMAKE_COMMAND})
+    set(hiprt_link_flags   -E env "HIP_PATH=${HIP_ROOT_DIR}"
+      ${HIP_LINKER_EXECUTABLE})
+  else()
+    # not implemented yet
+  endif()
+  set(hiprt_link_flags
+    ${hiprt_link_flags}
+    ${target_gpus}
+    -fgpu-rdc
+    --hip-link
+    --cuda-device-only
+    ${bitcode_file}
+    ${HIPRT_BITCODE}
+    -o ${hiprt_file})
+  add_custom_command(
+    OUTPUT  ${hiprt_file}
+    COMMAND ${hiprt_link_command} ${hiprt_link_flags}
+    DEPENDS ${bitcode_file})
+  delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file}" ${CYCLES_INSTALL_PATH}/lib)
+  add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file})
+  cycles_set_solution_folder(cycles_kernel_hiprt)
+endif()
+
 # OptiX PTX modules

 if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
@ -1033,6 +1124,7 @@ cycles_add_library(cycles_kernel "${LIB}"
  ${SRC_KERNEL_DEVICE_CPU}
  ${SRC_KERNEL_DEVICE_CUDA}
  ${SRC_KERNEL_DEVICE_HIP}
+  ${SRC_KERNEL_DEVICE_HIPRT}
  ${SRC_KERNEL_DEVICE_OPTIX}
  ${SRC_KERNEL_DEVICE_METAL}
  ${SRC_KERNEL_HEADERS}
@ -1040,6 +1132,7 @@ cycles_add_library(cycles_kernel "${LIB}"
  ${SRC_KERNEL_DEVICE_GPU_HEADERS}
  ${SRC_KERNEL_DEVICE_CUDA_HEADERS}
  ${SRC_KERNEL_DEVICE_HIP_HEADERS}
+  ${SRC_KERNEL_DEVICE_HIPRT_HEADERS}
  ${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
  ${SRC_KERNEL_DEVICE_METAL_HEADERS}
  ${SRC_KERNEL_DEVICE_ONEAPI_HEADERS}
@ -1053,6 +1146,7 @@ source_group("device\\cpu" FILES ${SRC_KERNEL_DEVICE_CPU} ${SRC_KERNEL_DEVICE_CP
 source_group("device\\cuda" FILES ${SRC_KERNEL_DEVICE_CUDA} ${SRC_KERNEL_DEVICE_CUDA_HEADERS})
 source_group("device\\gpu" FILES ${SRC_KERNEL_DEVICE_GPU_HEADERS})
 source_group("device\\hip" FILES ${SRC_KERNEL_DEVICE_HIP} ${SRC_KERNEL_DEVICE_HIP_HEADERS})
+source_group("device\\hiprt" FILES ${SRC_KERNEL_DEVICE_HIPRT} ${SRC_KERNEL_DEVICE_HIPRT_HEADERS})
 source_group("device\\optix" FILES ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS})
 source_group("device\\metal" FILES ${SRC_KERNEL_DEVICE_METAL} ${SRC_KERNEL_DEVICE_METAL_HEADERS})
 source_group("device\\oneapi" FILES ${SRC_KERNEL_DEVICE_ONEAPI} ${SRC_KERNEL_DEVICE_ONEAPI_HEADERS})
@ -1090,6 +1184,8 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_CUDA_HEADERS}"
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_GPU_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/gpu)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIPRT}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIPRT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@ -17,6 +17,8 @@
 #  include "kernel/device/metal/bvh.h"
 #elif defined(__KERNEL_OPTIX__)
 #  include "kernel/device/optix/bvh.h"
+#elif defined(__HIPRT__)
+#  include "kernel/device/hiprt/bvh.h"
 #else
 #  define __BVH2__
 #endif
--- a/intern/cycles/kernel/bvh/util.h
+++ b/intern/cycles/kernel/bvh/util.h
@ -23,14 +23,35 @@ ccl_device_inline bool intersection_ray_valid(ccl_private const Ray *ray)
 /* Offset intersection distance by the smallest possible amount, to skip
 * intersections at this distance. This works in cases where the ray start
 * position is unchanged and only tmin is updated, since for self
- * intersection we'll be comparing against the exact same distances. */
+ * intersection we'll be comparing against the exact same distances.
+ *
+ * Always returns normalized floating point value. */
 ccl_device_forceinline float intersection_t_offset(const float t)
 {
  /* This is a simplified version of `nextafterf(t, FLT_MAX)`, only dealing with
   * non-negative and finite t. */
  kernel_assert(t >= 0.0f && isfinite_safe(t));
-  const uint32_t bits = (t == 0.0f) ? 1 : __float_as_uint(t) + 1;
-  return __uint_as_float(bits);
+
+  /* Special handling of zero, which also includes handling of denormal values:
+   * always return smallest normalized value. If a denormalized zero is returned
+   * it will cause false-positive intersection detection with a distance of 0.
+   *
+   * The check relies on the fact that comparison of de-normal values with zero
+   * returns true. */
+  if (t == 0.0f) {
+    /* The value of std::numeric_limits<float>::min() and __FLT_MIN__, inlined
+     * to ensure matched behavior on all platforms and compilers. */
+    return 0x1p-126;
+  }
+
+  const uint32_t bits = __float_as_uint(t) + 1;
+  const float result = __uint_as_float(bits);
+
+  /* Assert that the calculated value is indeed considered to be offset from the
+   * original value. */
+  kernel_assert(result > t);
+
+  return result;
 }

 /* Ray offset to avoid self intersection.
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@ -48,6 +48,9 @@

 #include "kernel/film/read.h"

+#if defined(__HIPRT__)
+#  include "kernel/device/hiprt/hiprt_kernels.h"
+#endif
 /* --------------------------------------------------------------------
 * Integrator.
 */
@ -128,11 +131,13 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
 }
 ccl_gpu_kernel_postfix

+#if !defined(__HIPRT__)
+
 /* Intersection kernels need access to the kernel handler for specialization constants to work
 * properly. */
-#ifdef __KERNEL_ONEAPI__
-#  include "kernel/device/oneapi/context_intersect_begin.h"
-#endif
+#  ifdef __KERNEL_ONEAPI__
+#    include "kernel/device/oneapi/context_intersect_begin.h"
+#  endif

 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
    ccl_gpu_kernel_signature(integrator_intersect_closest,
@ -191,8 +196,10 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
 }
 ccl_gpu_kernel_postfix

-#ifdef __KERNEL_ONEAPI__
-#  include "kernel/device/oneapi/context_intersect_end.h"
+#  ifdef __KERNEL_ONEAPI__
+#    include "kernel/device/oneapi/context_intersect_end.h"
+#  endif
+
 #endif

 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
@ -259,11 +266,13 @@ ccl_gpu_kernel_postfix
 constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]];
 #endif

+#if !defined(__HIPRT__)
+
 /* Kernels using intersections need access to the kernel handler for specialization constants to
 * work properly. */
-#ifdef __KERNEL_ONEAPI__
-#  include "kernel/device/oneapi/context_intersect_begin.h"
-#endif
+#  ifdef __KERNEL_ONEAPI__
+#    include "kernel/device/oneapi/context_intersect_begin.h"
+#  endif

 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
    ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
@ -276,15 +285,15 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
  if (ccl_gpu_kernel_within_bounds(global_index, work_size)) {
    const int state = (path_index_array) ? path_index_array[global_index] : global_index;

-#if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
+#  if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
    KernelGlobals kg = NULL;
    /* Workaround Ambient Occlusion and Bevel nodes not working with Metal.
     * Dummy offset should not affect result, but somehow fixes bug! */
    kg += __dummy_constant;
    ccl_gpu_kernel_call(integrator_shade_surface_raytrace(kg, state, render_buffer));
-#else
+#  else
    ccl_gpu_kernel_call(integrator_shade_surface_raytrace(NULL, state, render_buffer));
-#endif
+#  endif
  }
 }
 ccl_gpu_kernel_postfix
@ -303,8 +312,11 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
  }
 }
 ccl_gpu_kernel_postfix
-#ifdef __KERNEL_ONEAPI__
-#  include "kernel/device/oneapi/context_intersect_end.h"
+
+#  ifdef __KERNEL_ONEAPI__
+#    include "kernel/device/oneapi/context_intersect_end.h"
+#  endif
+
 #endif

 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
--- a/intern/cycles/kernel/device/hip/config.h
+++ b/intern/cycles/kernel/device/hip/config.h
@ -20,6 +20,10 @@
 #define GPU_KERNEL_BLOCK_NUM_THREADS 1024
 #define GPU_KERNEL_MAX_REGISTERS 64

+/* For performance tuning of hiprt kernels we might have to change the number
+ * that's why we don't use GPU_KERNEL_BLOCK_NUM_THREADS. */
+#define GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS 1024
+
 /* Compute number of threads per block and minimum blocks per multiprocessor
 * given the maximum number of registers per thread. */
 #define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
--- a/intern/cycles/kernel/device/hiprt/bvh.h
+++ b/intern/cycles/kernel/device/hiprt/bvh.h
@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+#include "kernel/device/hiprt/common.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray)
+{
+  return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
+}
+
+ccl_device_intersect bool scene_intersect(KernelGlobals kg,
+                                          ccl_private const Ray *ray,
+                                          const uint visibility,
+                                          ccl_private Intersection *isect)
+{
+  isect->t = ray->tmax;
+  isect->u = 0.0f;
+  isect->v = 0.0f;
+  isect->prim = PRIM_NONE;
+  isect->object = OBJECT_NONE;
+  isect->type = PRIMITIVE_NONE;
+  if (!scene_intersect_valid(ray)) {
+    isect->t = ray->tmax;
+    isect->type = PRIMITIVE_NONE;
+    return false;
+  }
+
+  hiprtRay ray_hip;
+
+  SET_HIPRT_RAY(ray_hip, ray)
+
+  RayPayload payload;
+  payload.self = ray->self;
+  payload.kg = kg;
+  payload.visibility = visibility;
+  payload.prim_type = PRIMITIVE_NONE;
+  payload.ray_time = ray->time;
+
+  hiprtHit hit = {};
+
+  GET_TRAVERSAL_STACK()
+
+  if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+    GET_TRAVERSAL_ANY_HIT(table_closest_intersect, 0)
+    hit = traversal.getNextHit();
+  }
+  else {
+    GET_TRAVERSAL_CLOSEST_HIT(table_closest_intersect, 0)
+    hit = traversal.getNextHit();
+  }
+  if (hit.hasHit()) {
+    set_intersect_point(kg, hit, isect);
+    if (isect->type > 1) {  // should be applied only for curves
+      isect->type = payload.prim_type;
+      isect->prim = hit.primID;
+    }
+    return true;
+  }
+  return false;
+}
+
+#ifdef __BVH_LOCAL__
+ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
+                                                ccl_private const Ray *ray,
+                                                ccl_private LocalIntersection *local_isect,
+                                                int local_object,
+                                                ccl_private uint *lcg_state,
+                                                int max_hits)
+{
+  if (!scene_intersect_valid(ray)) {
+    if (local_isect) {
+      local_isect->num_hits = 0;
+    }
+    return false;
+  }
+
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+
+  if (local_isect != NULL) {
+    local_isect->num_hits = 0;
+  }
+
+  const int object_flag = kernel_data_fetch(object_flag, local_object);
+  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+
+#  if BVH_FEATURE(BVH_MOTION)
+    bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
+#  else
+    bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
+#  endif
+  }
+
+  hiprtRay ray_hip;
+  ray_hip.origin = P;
+  ray_hip.direction = dir;
+  ray_hip.maxT = ray->tmax;
+  ray_hip.minT = ray->tmin;
+
+  LocalPayload payload = {0};
+  payload.kg = kg;
+  payload.self = ray->self;
+  payload.local_object = local_object;
+  payload.max_hits = max_hits;
+  payload.lcg_state = lcg_state;
+  payload.local_isect = local_isect;
+
+  GET_TRAVERSAL_STACK()
+
+  void *local_geom = (void *)(kernel_data_fetch(blas_ptr, local_object));
+  // we don't need custom intersection functions for SSR
+#  ifdef HIPRT_SHARED_STACK
+  hiprtGeomTraversalAnyHitCustomStack<Stack> traversal(local_geom,
+                                                       ray_hip,
+                                                       stack,
+                                                       hiprtTraversalHintDefault,
+                                                       &payload,
+                                                       kernel_params.table_local_intersect,
+                                                       2);
+#  else
+  hiprtGeomTraversalAnyHit traversal(
+      local_geom, ray_hip, table, hiprtTraversalHintDefault, &payload);
+#  endif
+  hiprtHit hit = traversal.getNextHit();
+  return hit.hasHit();
+}
+#endif  //__BVH_LOCAL__
+
+#ifdef __SHADOW_RECORD_ALL__
+ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
+                                                     IntegratorShadowState state,
+                                                     ccl_private const Ray *ray,
+                                                     uint visibility,
+                                                     uint max_hits,
+                                                     ccl_private uint *num_recorded_hits,
+                                                     ccl_private float *throughput)
+{
+  *throughput = 1.0f;
+  *num_recorded_hits = 0;
+
+  if (!scene_intersect_valid(ray)) {
+    return false;
+  }
+
+  hiprtRay ray_hip;
+
+  SET_HIPRT_RAY(ray_hip, ray)
+  ShadowPayload payload;
+  payload.kg = kg;
+  payload.self = ray->self;
+  payload.in_state = state;
+  payload.max_hits = max_hits;
+  payload.visibility = visibility;
+  payload.prim_type = PRIMITIVE_TRIANGLE;
+  payload.ray_time = ray->time;
+  payload.num_hits = 0;
+  payload.r_num_recorded_hits = num_recorded_hits;
+  payload.r_throughput = throughput;
+  GET_TRAVERSAL_STACK()
+  GET_TRAVERSAL_ANY_HIT(table_shadow_intersect, 1)
+  hiprtHit hit = traversal.getNextHit();
+  num_recorded_hits = payload.r_num_recorded_hits;
+  throughput = payload.r_throughput;
+  return hit.hasHit();
+}
+#endif /* __SHADOW_RECORD_ALL__ */
+
+#ifdef __VOLUME__
+ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
+                                                 ccl_private const Ray *ray,
+                                                 ccl_private Intersection *isect,
+                                                 const uint visibility)
+{
+  isect->t = ray->tmax;
+  isect->u = 0.0f;
+  isect->v = 0.0f;
+  isect->prim = PRIM_NONE;
+  isect->object = OBJECT_NONE;
+  isect->type = PRIMITIVE_NONE;
+
+  if (!scene_intersect_valid(ray)) {
+    return false;
+  }
+
+  hiprtRay ray_hip;
+
+  SET_HIPRT_RAY(ray_hip, ray)
+
+  RayPayload payload;
+  payload.self = ray->self;
+  payload.kg = kg;
+  payload.visibility = visibility;
+  payload.prim_type = PRIMITIVE_NONE;
+  payload.ray_time = ray->time;
+
+  GET_TRAVERSAL_STACK()
+
+  GET_TRAVERSAL_CLOSEST_HIT(table_volume_intersect, 3)
+  hiprtHit hit = traversal.getNextHit();
+  // return hit.hasHit();
+  if (hit.hasHit()) {
+    set_intersect_point(kg, hit, isect);
+    if (isect->type > 1) {  // should be applied only for curves
+      isect->type = payload.prim_type;
+      isect->prim = hit.primID;
+    }
+    return true;
+  }
+  else
+    return false;
+}
+#endif /* __VOLUME__ */
+
+CCL_NAMESPACE_END
--- a/intern/cycles/kernel/device/hiprt/common.h
+++ b/intern/cycles/kernel/device/hiprt/common.h
@ -0,0 +1,637 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#ifdef __HIPRT__
+
+struct RayPayload {
+  KernelGlobals kg;
+  RaySelfPrimitives self;
+  uint visibility;
+  int prim_type;
+  float ray_time;
+};
+
+struct ShadowPayload {
+  KernelGlobals kg;
+  RaySelfPrimitives self;
+  uint visibility;
+  int prim_type;
+  float ray_time;
+  int in_state;
+  uint max_hits;
+  uint num_hits;
+  uint *r_num_recorded_hits;
+  float *r_throughput;
+};
+
+struct LocalPayload {
+  KernelGlobals kg;
+  RaySelfPrimitives self;
+  int prim_type;
+  float ray_time;
+  int local_object;
+  uint max_hits;
+  uint *lcg_state;
+  LocalIntersection *local_isect;
+};
+
+#  define SET_HIPRT_RAY(RAY_RT, RAY) \
+    RAY_RT.direction = RAY->D; \
+    RAY_RT.origin = RAY->P; \
+    RAY_RT.maxT = RAY->tmax; \
+    RAY_RT.minT = RAY->tmin;
+
+#  if defined(HIPRT_SHARED_STACK)
+#    define GET_TRAVERSAL_STACK() \
+      Stack stack(&kg->global_stack_buffer[0], \
+                  HIPRT_THREAD_STACK_SIZE, \
+                  kg->shared_stack, \
+                  HIPRT_SHARED_STACK_SIZE);
+#  else
+#    define GET_TRAVERSAL_STACK()
+#  endif
+
+#  ifdef HIPRT_SHARED_STACK
+#    define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE, RAY_TYPE) \
+      hiprtSceneTraversalAnyHitCustomStack<Stack> traversal(kernel_data.device_bvh, \
+                                                            ray_hip, \
+                                                            stack, \
+                                                            visibility, \
+                                                            hiprtTraversalHintDefault, \
+                                                            &payload, \
+                                                            kernel_params.FUNCTION_TABLE, \
+                                                            RAY_TYPE); \
+      hiprtSceneTraversalAnyHitCustomStack<Stack> traversal_simple( \
+          kernel_data.device_bvh, ray_hip, stack, visibility);
+#    define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE, RAY_TYPE) \
+      hiprtSceneTraversalClosestCustomStack<Stack> traversal(kernel_data.device_bvh, \
+                                                             ray_hip, \
+                                                             stack, \
+                                                             visibility, \
+                                                             hiprtTraversalHintDefault, \
+                                                             &payload, \
+                                                             kernel_params.FUNCTION_TABLE, \
+                                                             RAY_TYPE); \
+      hiprtSceneTraversalClosestCustomStack<Stack> traversal_simple( \
+          kernel_data.device_bvh, ray_hip, stack, visibility);
+#  else
+#    define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE) \
+      hiprtSceneTraversalAnyHit traversal(kernel_data.device_bvh, \
+                                          ray_hip, \
+                                          visibility, \
+                                          FUNCTION_TABLE, \
+                                          hiprtTraversalHintDefault, \
+                                          &payload); \
+      hiprtSceneTraversalAnyHit traversal_simple(kernel_data.device_bvh, ray_hip, visibility);
+#    define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE) \
+      hiprtSceneTraversalClosest traversal(kernel_data.device_bvh, \
+                                           ray_hip, \
+                                           visibility, \
+                                           FUNCTION_TABLE, \
+                                           hiprtTraversalHintDefault, \
+                                           &payload); \
+      hiprtSceneTraversalClosest traversal_simple(kernel_data.device_bvh, ray_hip, visibility);
+#  endif
+
+ccl_device_inline void set_intersect_point(KernelGlobals kg,
+                                           hiprtHit &hit,
+                                           ccl_private Intersection *isect)
+{
+  int prim_offset = 0;
+  int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
+  prim_offset = kernel_data_fetch(object_prim_offset, object_id);
+  isect->type = kernel_data_fetch(objects, object_id).primitive_type;
+
+  isect->t = hit.t;
+  isect->prim = hit.primID + prim_offset;
+  isect->object = object_id;
+  isect->u = hit.uv.x;
+  isect->v = hit.uv.y;
+}
+
+// custom intersection functions
+
+ccl_device_inline bool curve_custom_intersect(const hiprtRay &ray,
+                                              const void *userPtr,
+                                              void *payload,
+                                              hiprtHit &hit)
+
+{
+  Intersection isect;
+  RayPayload *local_payload = (RayPayload *)payload;
+  // could also cast shadow payload to get the elements needed to do the intersection
+  // no need to write a separate function for shadow intersection
+
+  KernelGlobals kg = local_payload->kg;
+
+  int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
+  int2 data_offset = kernel_data_fetch(custom_prim_info_offset, object_id);
+  // data_offset.x: where the data (prim id, type )for the geometry of the current object begins
+  // the prim_id that is in hiprtHit hit is local to the partciular geometry so we add the above
+  // ofstream
+  // to map prim id in hiprtHit to the one compatible to what next stage expects
+
+  // data_offset.y: the offset that has to be added to a local primitive to get the global
+  // primitive id = kernel_data_fetch(object_prim_offset, object_id);
+
+  int prim_offset = data_offset.y;
+
+  int curve_index = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x).x;
+  int key_value = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x).y;
+
+  if (intersection_skip_self_shadow(local_payload->self, object_id, curve_index + prim_offset))
+    return false;
+
+  float ray_time = local_payload->ray_time;
+
+  if ((key_value & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) {
+
+    int time_offset = kernel_data_fetch(prim_time_offset, object_id);
+    float2 prims_time = kernel_data_fetch(prims_time, hit.primID + time_offset);
+
+    if (ray_time < prims_time.x || ray_time > prims_time.y) {
+      return false;
+    }
+  }
+
+  bool b_hit = curve_intersect(kg,
+                               &isect,
+                               ray.origin,
+                               ray.direction,
+                               ray.minT,
+                               ray.maxT,
+                               object_id,
+                               curve_index + prim_offset,
+                               ray_time,
+                               key_value);
+  if (b_hit) {
+    hit.uv.x = isect.u;
+    hit.uv.y = isect.v;
+    hit.t = isect.t;
+    hit.primID = isect.prim;
+    local_payload->prim_type = isect.type;  // packed_curve_type;
+  }
+  return b_hit;
+}
+
+ccl_device_inline bool motion_triangle_custom_intersect(const hiprtRay &ray,
+                                                        const void *userPtr,
+                                                        void *payload,
+                                                        hiprtHit &hit)
+{
+#  ifdef MOTION_BLUR
+  RayPayload *local_payload = (RayPayload *)payload;
+  KernelGlobals kg = local_payload->kg;
+  int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
+  int2 data_offset = kernel_data_fetch(custom_prim_info_offset, object_id);
+  int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
+
+  int prim_id_local = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x).x;
+  int prim_id_global = prim_id_local + prim_offset;
+
+  if (intersection_skip_self_shadow(local_payload->self, object_id, prim_id_global))
+    return false;
+
+  Intersection isect;
+
+  bool b_hit = motion_triangle_intersect(kg,
+                                         &isect,
+                                         ray.origin,
+                                         ray.direction,
+                                         ray.minT,
+                                         ray.maxT,
+                                         local_payload->ray_time,
+                                         local_payload->visibility,
+                                         object_id,
+                                         prim_id_global,
+                                         prim_id_local);
+
+  if (b_hit) {
+    hit.uv.x = isect.u;
+    hit.uv.y = isect.v;
+    hit.t = isect.t;
+    hit.primID = isect.prim;
+    local_payload->prim_type = isect.type;
+  }
+  return b_hit;
+#  else
+  return false;
+#  endif
+}
+
+ccl_device_inline bool motion_triangle_custom_local_intersect(const hiprtRay &ray,
+                                                              const void *userPtr,
+                                                              void *payload,
+                                                              hiprtHit &hit)
+{
+#  ifdef MOTION_BLUR
+  LocalPayload *local_payload = (LocalPayload *)payload;
+  KernelGlobals kg = local_payload->kg;
+  int object_id = local_payload->local_object;
+
+  int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
+  int2 data_offset = kernel_data_fetch(custom_prim_info_offset, object_id);
+
+  int prim_id_local = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x).x;
+  int prim_id_global = prim_id_local + prim_offset;
+
+  if (intersection_skip_self_local(local_payload->self, prim_id_global))
+    return false;
+
+  LocalIntersection *local_isect = local_payload->local_isect;
+
+  bool b_hit = motion_triangle_intersect_local(kg,
+                                               local_isect,
+                                               ray.origin,
+                                               ray.direction,
+                                               local_payload->ray_time,
+                                               object_id,
+                                               prim_id_global,
+                                               prim_id_local,
+                                               ray.minT,
+                                               ray.maxT,
+                                               local_payload->lcg_state,
+                                               local_payload->max_hits);
+
+  if (b_hit) {
+    local_payload->prim_type = PRIMITIVE_MOTION_TRIANGLE;
+  }
+  return b_hit;
+#  else
+  return false;
+#  endif
+}
+
+ccl_device_inline bool motion_triangle_custom_volume_intersect(const hiprtRay &ray,
+                                                               const void *userPtr,
+                                                               void *payload,
+                                                               hiprtHit &hit)
+{
+#  ifdef MOTION_BLUR
+
+  RayPayload *local_payload = (RayPayload *)payload;
+  KernelGlobals kg = local_payload->kg;
+  int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
+  int object_flag = kernel_data_fetch(object_flag, object_id);
+
+  if (!(object_flag & SD_OBJECT_HAS_VOLUME))
+    return false;
+
+  int2 data_offset = kernel_data_fetch(custom_prim_info_offset, object_id);
+  int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
+
+  int prim_id_local = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x).x;
+  int prim_id_global = prim_id_local + prim_offset;
+
+  if (intersection_skip_self_shadow(local_payload->self, object_id, prim_id_global))
+    return false;
+
+  Intersection isect;
+
+  bool b_hit = motion_triangle_intersect(kg,
+                                         &isect,
+                                         ray.origin,
+                                         ray.direction,
+                                         ray.minT,
+                                         ray.maxT,
+                                         local_payload->ray_time,
+                                         local_payload->visibility,
+                                         object_id,
+                                         prim_id_global,
+                                         prim_id_local);
+
+  if (b_hit) {
+    hit.uv.x = isect.u;
+    hit.uv.y = isect.v;
+    hit.t = isect.t;
+    hit.primID = isect.prim;
+    local_payload->prim_type = isect.type;
+  }
+  return b_hit;
+#  else
+  return false;
+#  endif
+}
+
+ccl_device_inline bool point_custom_intersect(const hiprtRay &ray,
+                                              const void *userPtr,
+                                              void *payload,
+                                              hiprtHit &hit)
+{
+#  ifdef POINT_CLOUD
+  RayPayload *local_payload = (RayPayload *)payload;
+  KernelGlobals kg = local_payload->kg;
+  int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
+
+  int2 data_offset = kernel_data_fetch(custom_prim_info_offset, object_id);
+  int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
+
+  int2 prim_info = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x);
+  int prim_id_local = prim_info.x;
+  int prim_id_global = prim_id_local + prim_offset;
+
+  int type = prim_info.y;
+
+  if (intersection_skip_self_shadow(local_payload->self, object_id, prim_id_global))
+    return false;
+
+  float ray_time = local_payload->ray_time;
+
+  if ((type & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) {
+
+    int time_offset = kernel_data_fetch(prim_time_offset, object_id);
+    float2 prims_time = kernel_data_fetch(prims_time, hit.primID + time_offset);
+
+    if (ray_time < prims_time.x || ray_time > prims_time.y) {
+      return false;
+    }
+  }
+
+  Intersection isect;
+
+  bool b_hit = point_intersect(kg,
+                               &isect,
+                               ray.origin,
+                               ray.direction,
+                               ray.minT,
+                               ray.maxT,
+                               object_id,
+                               prim_id_global,
+                               ray_time,
+                               type);
+
+  if (b_hit) {
+    hit.uv.x = isect.u;
+    hit.uv.y = isect.v;
+    hit.t = isect.t;
+    hit.primID = isect.prim;
+    local_payload->prim_type = isect.type;
+  }
+  return b_hit;
+#  else
+  return false;
+#  endif
+}
+
+// intersection filters
+
+ccl_device_inline bool closest_intersection_filter(const hiprtRay &ray,
+                                                   const void *data,
+                                                   void *user_data,
+                                                   const hiprtHit &hit)
+{
+  RayPayload *payload = (RayPayload *)user_data;
+  int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
+  int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
+  int prim = hit.primID + prim_offset;
+
+  if (intersection_skip_self_shadow(payload->self, object_id, prim))
+    return true;
+  else
+    return false;
+}
+
+ccl_device_inline bool shadow_intersection_filter(const hiprtRay &ray,
+                                                  const void *data,
+                                                  void *user_data,
+                                                  const hiprtHit &hit)
+
+{
+  ShadowPayload *payload = (ShadowPayload *)user_data;
+
+  uint num_hits = payload->num_hits;
+  uint num_recorded_hits = *(payload->r_num_recorded_hits);
+  uint max_hits = payload->max_hits;
+  int state = payload->in_state;
+  KernelGlobals kg = payload->kg;
+  RaySelfPrimitives self = payload->self;
+
+  int object = kernel_data_fetch(user_instance_id, hit.instanceID);
+  int prim_offset = kernel_data_fetch(object_prim_offset, object);
+  int prim = hit.primID + prim_offset;
+
+  float ray_tmax = hit.t;
+
+#  ifdef __VISIBILITY_FLAG__
+
+  if ((kernel_data_fetch(objects, object).visibility & payload->visibility) == 0) {
+    return true;  // no hit - continue traversal
+  }
+#  endif
+
+  if (intersection_skip_self_shadow(self, object, prim)) {
+    return true;  // no hit -continue traversal
+  }
+
+  float u = hit.uv.x;
+  float v = hit.uv.y;
+  int type = kernel_data_fetch(objects, object).primitive_type;
+#  ifdef __HAIR__
+  if (type & (PRIMITIVE_CURVE_THICK | PRIMITIVE_CURVE_RIBBON)) {
+
+    const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
+    type = segment.type;
+    prim = segment.prim;
+  }
+#  endif
+
+#  ifndef __TRANSPARENT_SHADOWS__
+
+  return false;
+
+#  else
+
+  if (num_hits >= max_hits ||
+      !(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
+    return false;
+  }
+
+  if (type & PRIMITIVE_CURVE) {
+    float throughput = *payload->r_throughput;
+    throughput *= intersection_curve_shadow_transparency(kg, object, prim, type, u);
+    *payload->r_throughput = throughput;
+    payload->num_hits += 1;
+
+    if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
+      return false;
+    }
+    else {
+      return true;
+    }
+  }
+
+  uint record_index = num_recorded_hits;
+
+  num_hits += 1;
+  num_recorded_hits += 1;
+  payload->num_hits = num_hits;
+  *(payload->r_num_recorded_hits) = num_recorded_hits;
+
+  const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+  if (record_index >= max_record_hits) {
+    float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
+    uint max_recorded_hit = 0;
+
+    for (int i = 1; i < max_record_hits; i++) {
+      const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
+      if (isect_t > max_recorded_t) {
+        max_recorded_t = isect_t;
+        max_recorded_hit = i;
+      }
+    }
+
+    if (ray_tmax >= max_recorded_t) {
+
+      return true;
+    }
+
+    record_index = max_recorded_hit;
+  }
+
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = ray_tmax;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
+  return true;
+
+#  endif /* __TRANSPARENT_SHADOWS__ */
+}
+
+ccl_device_inline bool local_intersection_filter(const hiprtRay &ray,
+                                                 const void *data,
+                                                 void *user_data,
+                                                 const hiprtHit &hit)
+{
+#  ifdef __BVH_LOCAL__
+  LocalPayload *payload = (LocalPayload *)user_data;
+  KernelGlobals kg = payload->kg;
+  int object_id = payload->local_object;
+  int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
+  int prim = hit.primID + prim_offset;
+#    ifndef __RAY_OFFSET__
+  if (intersection_skip_self_local(payload->self, prim)) {
+    return true;  // continue search
+  }
+#    endif
+  uint max_hits = payload->max_hits;
+  if (max_hits == 0) {
+    return false;  // stop search
+  }
+  int hit_index = 0;
+  if (payload->lcg_state) {
+    for (int i = min(max_hits, payload->local_isect->num_hits) - 1; i >= 0; --i) {
+      if (hit.t == payload->local_isect->hits[i].t) {
+        return true;  // continue search
+      }
+    }
+    hit_index = payload->local_isect->num_hits++;
+    if (payload->local_isect->num_hits > max_hits) {
+      hit_index = lcg_step_uint(payload->lcg_state) % payload->local_isect->num_hits;
+      if (hit_index >= max_hits) {
+        return true;  // continue search
+      }
+    }
+  }
+  else {
+    if (payload->local_isect->num_hits && hit.t > payload->local_isect->hits[0].t) {
+      return true;
+    }
+    payload->local_isect->num_hits = 1;
+  }
+  Intersection *isect = &payload->local_isect->hits[hit_index];
+  isect->t = hit.t;
+  isect->prim = prim;
+  isect->object = object_id;
+  isect->type = PRIMITIVE_TRIANGLE;  // kernel_data_fetch(__objects, object_id).primitive_type;
+
+  isect->u = hit.uv.x;
+  isect->v = hit.uv.y;
+
+  payload->local_isect->Ng[hit_index] = hit.normal;
+
+  return true;
+
+#  endif
+}
+
+ccl_device_inline bool volume_intersection_filter(const hiprtRay &ray,
+                                                  const void *data,
+                                                  void *user_data,
+                                                  const hiprtHit &hit)
+{
+  RayPayload *payload = (RayPayload *)user_data;
+  int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
+  int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
+  int prim = hit.primID + prim_offset;
+  int object_flag = kernel_data_fetch(object_flag, object_id);
+
+  if (intersection_skip_self(payload->self, object_id, prim))
+    return true;
+  else if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0)
+    return true;
+  else
+    return false;
+}
+
+HIPRT_DEVICE bool intersectFunc(u32 geomType,
+                                u32 rayType,
+                                const hiprtFuncTableHeader &tableHeader,
+                                const hiprtRay &ray,
+                                void *payload,
+                                hiprtHit &hit)
+{
+  const u32 index = tableHeader.numGeomTypes * rayType + geomType;
+  const void *data = tableHeader.funcDataSets[index].filterFuncData;
+  switch (index) {
+    case Curve_Intersect_Function:
+    case Curve_Intersect_Shadow:
+      return curve_custom_intersect(ray, data, payload, hit);
+    case Motion_Triangle_Intersect_Function:
+    case Motion_Triangle_Intersect_Shadow:
+      return motion_triangle_custom_intersect(ray, data, payload, hit);
+    case Motion_Triangle_Intersect_Local:
+      return motion_triangle_custom_local_intersect(ray, data, payload, hit);
+    case Motion_Triangle_Intersect_Volume:
+      return motion_triangle_custom_volume_intersect(ray, data, payload, hit);
+    case Point_Intersect_Function:
+    case Point_Intersect_Shadow:
+      return point_custom_intersect(ray, data, payload, hit);
+    default:
+      break;
+  }
+  return false;
+}
+
+HIPRT_DEVICE bool filterFunc(u32 geomType,
+                             u32 rayType,
+                             const hiprtFuncTableHeader &tableHeader,
+                             const hiprtRay &ray,
+                             void *payload,
+                             const hiprtHit &hit)
+{
+  const u32 index = tableHeader.numGeomTypes * rayType + geomType;
+  const void *data = tableHeader.funcDataSets[index].intersectFuncData;
+  switch (index) {
+    case Triangle_Filter_Closest:
+      return closest_intersection_filter(ray, data, payload, hit);
+    case Triangle_Filter_Shadow:
+    case Curve_Filter_Shadow:
+    case Motion_Triangle_Filter_Shadow:
+    case Point_Filter_Shadow:
+      return shadow_intersection_filter(ray, data, payload, hit);
+    case Triangle_Filter_Local:
+    case Motion_Triangle_Filter_Local:
+      return local_intersection_filter(ray, data, payload, hit);
+    case Triangle_Filter_Volume:
+    case Motion_Triangle_Filter_Volume:
+      return volume_intersection_filter(ray, data, payload, hit);
+    default:
+      break;
+  }
+
+  return false;
+}
+
+#endif
--- a/intern/cycles/kernel/device/hiprt/globals.h
+++ b/intern/cycles/kernel/device/hiprt/globals.h
@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+#include "kernel/types.h"
+
+#include "kernel/integrator/state.h"
+
+#include "kernel/util/profiling.h"
+
+#define HIPRT_SHARED_STACK
+
+/* The size of global stack  available to each thread (memory reserved for each thread in
+ * global_stack_buffer). */
+#define HIPRT_THREAD_STACK_SIZE 64
+
+/* LDS (Local Data Storage) allocation for each thread, the number is obtained empirically. */
+#define HIPRT_SHARED_STACK_SIZE 24
+
+/* HIPRT_THREAD_GROUP_SIZE is the number of threads per work group for intersection kernels
+ * The default number of threads per work-group is 1024, however, since HIP RT intersection kernels
+ * use local memory, and the local memory size in those kernels scales up with the number of
+ * threads, the number of threads to is scaled down to 256 to avoid going over maximum local memory
+ * and to strike a balance between memory access and the number of waves.
+ *
+ * Total local stack size would be number of threads * HIPRT_SHARED_STACK_SIZE. */
+#define HIPRT_THREAD_GROUP_SIZE 256
+
+CCL_NAMESPACE_BEGIN
+
+struct KernelGlobalsGPU {
+  int *global_stack_buffer;
+#ifdef HIPRT_SHARED_STACK
+  int *shared_stack;
+#endif
+};
+
+typedef ccl_global KernelGlobalsGPU *ccl_restrict KernelGlobals;
+
+#if defined(HIPRT_SHARED_STACK)
+
+/* This macro allocates shared memory and to pass the shared memory down to intersection functions
+ * KernelGlobals is used. */
+#  define HIPRT_INIT_KERNEL_GLOBAL() \
+    ccl_gpu_shared int shared_stack[HIPRT_SHARED_STACK_SIZE * HIPRT_THREAD_GROUP_SIZE]; \
+    ccl_global KernelGlobalsGPU kg_gpu; \
+    KernelGlobals kg = &kg_gpu; \
+    kg->shared_stack = &shared_stack[0]; \
+    kg->global_stack_buffer = stack_buffer;
+#else
+#  define HIPRT_INIT_KERNEL_GLOBAL() \
+    KernelGlobals kg = NULL; \
+    kg->global_stack_buffer = stack_buffer;
+#endif
+
+struct KernelParamsHIPRT {
+  KernelData data;
+#define KERNEL_DATA_ARRAY(type, name) const type *name;
+  KERNEL_DATA_ARRAY(int, user_instance_id)
+  KERNEL_DATA_ARRAY(uint64_t, blas_ptr)
+  KERNEL_DATA_ARRAY(int2, custom_prim_info)
+  KERNEL_DATA_ARRAY(int2, custom_prim_info_offset)
+  KERNEL_DATA_ARRAY(float2, prims_time)
+  KERNEL_DATA_ARRAY(int, prim_time_offset)
+#include "kernel/data_arrays.h"
+
+  /* Integrator state */
+  IntegratorStateGPU integrator_state;
+
+  hiprtFuncTable table_closest_intersect;
+  hiprtFuncTable table_shadow_intersect;
+  hiprtFuncTable table_local_intersect;
+  hiprtFuncTable table_volume_intersect;
+};
+
+/* Intersection_Function_Table_Index defines index values to retrieve custom intersection
+ * functions from function table. */
+
+enum Intersection_Function_Table_Index {
+  // Triangles use the intersection function provided by HIP RT and don't need custom intersection
+  // functions
+  // Custom intersection functions for closest intersect.
+  Curve_Intersect_Function = 1,        // Custom intersection for curves
+  Motion_Triangle_Intersect_Function,  // Custom intersection for triangles with vertex motion blur
+                                       // attributes.
+  Point_Intersect_Function,            // Custom intersection for point cloud.
+  // Custom intersection functions for shadow rendering are the same as the function for closest
+  // intersect.
+  // However, the table indices are different
+  Triangle_Intersect_Shadow_None,
+  Curve_Intersect_Shadow,
+  Motion_Triangle_Intersect_Shadow,
+  Point_Intersect_Shadow,
+  // Custom intersection functions for subsurface scattering.
+  // Only motion triangles have valid custom intersection function
+  Triangle_Intersect_Local_None,
+  Curve_Intersect_Local_None,
+  Motion_Triangle_Intersect_Local,
+  Point_Intersect_Local_None,
+  // Custom intersection functions for volume rendering.
+  // Only motion triangles have valid custom intersection function
+  Triangle_Intersect_Volume_None,
+  Curve_Intersect_Volume_None,
+  Motion_Triangle_Intersect_Volume,
+  Point_Intersect_Volume_None,
+};
+
+// Filter functions, filter hits, i.e. test whether a hit should be accepted or not, and whether
+// traversal should stop or continue.
+enum Filter_Function_Table_Index {
+  Triangle_Filter_Closest = 0,  // Filter function for triangles for closest intersect, no custom
+                                // intersection function is needed.
+  Curve_Filter_Opaque_None,     // No filter function is needed and everything is handled in the
+                                // intersection function.
+  Motion_Triangle_Filter_Opaque_None,  // No filter function is needed and everything is handled in
+                                       // intersection function.
+  Point_Filter_Opaque_Non,             // No filter function is needed.
+  // Filter function for all primitives for shadow intersection.
+  // All primitives use the same function but each has a different index in the table.
+  Triangle_Filter_Shadow,
+  Curve_Filter_Shadow,
+  Motion_Triangle_Filter_Shadow,
+  Point_Filter_Shadow,
+  // Filter functions for subsurface scattering. Triangles and motion triangles need function
+  // assignment. They indices for triangles and motion triangles point to the same function. Points
+  // and curves dont need any function since subsurface scattering is not applied on either.
+  Triangle_Filter_Local,    // Filter functions for triangles
+  Curve_Filter_Local_None,  // Subsurface scattering is not applied on curves, no filter function
+                            // is
+                            // needed.
+  Motion_Triangle_Filter_Local,
+  Point_Filter_Local_None,
+  // Filter functions for volume rendering.
+  // Volume rendering only applies to triangles and motion triangles.
+  // Triangles and motion triangles use the same filter functions for volume rendering
+  Triangle_Filter_Volume,
+  Curve_Filter_Volume_None,
+  Motion_Triangle_Filter_Volume,
+  Point_Filter_Volume_None,
+};
+
+#ifdef __KERNEL_GPU__
+__constant__ KernelParamsHIPRT kernel_params;
+
+#  ifdef HIPRT_SHARED_STACK
+typedef hiprtGlobalStack Stack;
+#  endif
+
+#endif
+
+/* Abstraction macros */
+#define kernel_data kernel_params.data
+#define kernel_data_fetch(name, index) kernel_params.name[(index)]
+#define kernel_data_array(name) (kernel_params.name)
+#define kernel_integrator_state kernel_params.integrator_state
+
+CCL_NAMESPACE_END
--- a/intern/cycles/kernel/device/hiprt/hiprt_kernels.h
+++ b/intern/cycles/kernel/device/hiprt/hiprt_kernels.h
@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#ifdef __HIPRT__
+
+ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
+    ccl_gpu_kernel_signature(integrator_intersect_closest,
+                             ccl_global const int *path_index_array,
+                             ccl_global float *render_buffer,
+                             const int work_size,
+                             ccl_global int *stack_buffer)
+{
+  const int global_index = ccl_gpu_global_id_x();
+
+  if (global_index < work_size) {
+    HIPRT_INIT_KERNEL_GLOBAL()
+    const int state = (path_index_array) ? path_index_array[global_index] : global_index;
+    ccl_gpu_kernel_call(integrator_intersect_closest(kg, state, render_buffer));
+  }
+}
+ccl_gpu_kernel_postfix
+
+ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
+    ccl_gpu_kernel_signature(integrator_intersect_shadow,
+                             ccl_global const int *path_index_array,
+                             const int work_size,
+                             ccl_global int *stack_buffer)
+{
+  const int global_index = ccl_gpu_global_id_x();
+
+  if (global_index < work_size) {
+    HIPRT_INIT_KERNEL_GLOBAL()
+    const int state = (path_index_array) ? path_index_array[global_index] : global_index;
+    ccl_gpu_kernel_call(integrator_intersect_shadow(kg, state));
+  }
+}
+ccl_gpu_kernel_postfix
+
+ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
+    ccl_gpu_kernel_signature(integrator_intersect_subsurface,
+                             ccl_global const int *path_index_array,
+                             const int work_size,
+                             ccl_global int *stack_buffer)
+{
+  const int global_index = ccl_gpu_global_id_x();
+
+  if (global_index < work_size) {
+    HIPRT_INIT_KERNEL_GLOBAL()
+    const int state = (path_index_array) ? path_index_array[global_index] : global_index;
+    ccl_gpu_kernel_call(integrator_intersect_subsurface(kg, state));
+  }
+}
+ccl_gpu_kernel_postfix
+
+ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
+    ccl_gpu_kernel_signature(integrator_intersect_volume_stack,
+                             ccl_global const int *path_index_array,
+                             const int work_size,
+                             ccl_global int *stack_buffer)
+{
+  const int global_index = ccl_gpu_global_id_x();
+
+  if (global_index < work_size) {
+    HIPRT_INIT_KERNEL_GLOBAL()
+    const int state = (path_index_array) ? path_index_array[global_index] : global_index;
+    ccl_gpu_kernel_call(integrator_intersect_volume_stack(kg, state));
+  }
+}
+ccl_gpu_kernel_postfix
+ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
+    ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
+                             ccl_global const int *path_index_array,
+                             ccl_global float *render_buffer,
+                             const int work_size,
+                             ccl_global int *stack_buffer)
+{
+  const int global_index = ccl_gpu_global_id_x();
+  if (global_index < work_size) {
+    HIPRT_INIT_KERNEL_GLOBAL()
+    const int state = (path_index_array) ? path_index_array[global_index] : global_index;
+    ccl_gpu_kernel_call(integrator_shade_surface_raytrace(kg, state, render_buffer));
+  }
+}
+ccl_gpu_kernel_postfix
+ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
+    ccl_gpu_kernel_signature(integrator_shade_surface_mnee,
+                             ccl_global const int *path_index_array,
+                             ccl_global float *render_buffer,
+                             const int work_size,
+                             ccl_global int *stack_buffer)
+{
+  const int global_index = ccl_gpu_global_id_x();
+  if (global_index < work_size) {
+    HIPRT_INIT_KERNEL_GLOBAL()
+    const int state = (path_index_array) ? path_index_array[global_index] : global_index;
+    ccl_gpu_kernel_call(integrator_shade_surface_mnee(kg, state, render_buffer));
+  }
+}
+ccl_gpu_kernel_postfix
+
+#endif /* __HIPRT__ */
--- a/intern/cycles/kernel/device/hiprt/kernel.cpp
+++ b/intern/cycles/kernel/device/hiprt/kernel.cpp
@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2023 Blender Foundation */
+
+#ifdef __HIP_DEVICE_COMPILE__
+
+#  include "kernel/device/hip/compat.h"
+#  include "kernel/device/hip/config.h"
+
+#  include <hiprt/hiprt_device.h>
+
+#  include "kernel/device/hiprt/globals.h"
+
+#  include "kernel/device/gpu/image.h"
+#  include "kernel/device/gpu/kernel.h"
+
+#endif
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@ -1169,10 +1169,14 @@ typedef enum KernelBVHLayout {
  BVH_LAYOUT_METAL = (1 << 5),
  BVH_LAYOUT_MULTI_METAL = (1 << 6),
  BVH_LAYOUT_MULTI_METAL_EMBREE = (1 << 7),
+  BVH_LAYOUT_HIPRT = (1 << 8),
+  BVH_LAYOUT_MULTI_HIPRT = (1 << 9),
+  BVH_LAYOUT_MULTI_HIPRT_EMBREE = (1 << 10),

  /* Default BVH layout to use for CPU. */
  BVH_LAYOUT_AUTO = BVH_LAYOUT_EMBREE,
-  BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX | BVH_LAYOUT_METAL,
+  BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX | BVH_LAYOUT_METAL |
+                   BVH_LAYOUT_HIPRT | BVH_LAYOUT_MULTI_HIPRT | BVH_LAYOUT_MULTI_HIPRT_EMBREE,
 } KernelBVHLayout;

 /* Specialized struct that can become constants in dynamic compilation. */
@ -1225,6 +1229,8 @@ typedef struct KernelData {
  OptixTraversableHandle device_bvh;
 #elif defined __METALRT__
  metalrt_as_type device_bvh;
+#elif defined(__HIPRT__)
+  void *device_bvh;
 #else
 #  ifdef __EMBREE__
  RTCScene device_bvh;
--- a/intern/cycles/scene/geometry.cpp
+++ b/intern/cycles/scene/geometry.cpp
@ -114,7 +114,9 @@ bool Geometry::need_build_bvh(BVHLayout layout) const
 {
  return is_instanced() || layout == BVH_LAYOUT_OPTIX || layout == BVH_LAYOUT_MULTI_OPTIX ||
         layout == BVH_LAYOUT_METAL || layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE ||
-         layout == BVH_LAYOUT_MULTI_METAL || layout == BVH_LAYOUT_MULTI_METAL_EMBREE;
+         layout == BVH_LAYOUT_MULTI_METAL || layout == BVH_LAYOUT_MULTI_METAL_EMBREE ||
+         layout == BVH_LAYOUT_HIPRT || layout == BVH_LAYOUT_MULTI_HIPRT ||
+         layout == BVH_LAYOUT_MULTI_HIPRT_EMBREE;
 }

 bool Geometry::is_instanced() const
--- a/intern/cycles/scene/object.cpp
+++ b/intern/cycles/scene/object.cpp
@ -597,7 +597,8 @@ void ObjectManager::device_update_prim_offsets(Device *device, DeviceScene *dsce
  if (!scene->integrator->get_use_light_tree()) {
    BVHLayoutMask layout_mask = device->get_bvh_layout_mask(dscene->data.kernel_features);
    if (layout_mask != BVH_LAYOUT_METAL && layout_mask != BVH_LAYOUT_MULTI_METAL &&
-        layout_mask != BVH_LAYOUT_MULTI_METAL_EMBREE) {
+        layout_mask != BVH_LAYOUT_MULTI_METAL_EMBREE && layout_mask != BVH_LAYOUT_HIPRT &&
+        layout_mask != BVH_LAYOUT_MULTI_HIPRT && layout_mask != BVH_LAYOUT_MULTI_HIPRT_EMBREE) {
      return;
    }
  }
--- a/intern/cycles/scene/scene.cpp
+++ b/intern/cycles/scene/scene.cpp
@ -10,6 +10,7 @@
 #include "scene/bake.h"
 #include "scene/camera.h"
 #include "scene/curves.h"
+#include "scene/devicescene.h"
 #include "scene/film.h"
 #include "scene/integrator.h"
 #include "scene/light.h"
@ -24,7 +25,6 @@
 #include "scene/svm.h"
 #include "scene/tables.h"
 #include "scene/volume.h"
-#include "scene/devicescene.h"
 #include "session/session.h"

 #include "util/foreach.h"
@ -34,8 +34,6 @@

 CCL_NAMESPACE_BEGIN

-
-
 Scene::Scene(const SceneParams &params_, Device *device)
    : name("Scene"),
      bvh(NULL),
--- a/intern/cycles/util/math_float4.h
+++ b/intern/cycles/util/math_float4.h
@ -595,7 +595,7 @@ ccl_device_inline float4 ensure_finite(float4 v)

 ccl_device_inline float4 pow(float4 v, float e)
 {
-  return make_float4(powf(v.x, e), powf(v.y, e), powf(v.z, e), powf(v.z, e));
+  return make_float4(powf(v.x, e), powf(v.y, e), powf(v.z, e), powf(v.w, e));
 }

 CCL_NAMESPACE_END
--- a/scripts/presets/keyconfig/keymap_data/blender_default.py
+++ b/scripts/presets/keyconfig/keymap_data/blender_default.py
@ -6306,8 +6306,7 @@ def km_view3d_rotate_modal(_params):
    )

    items.extend([
-        ("CONFIRM", {"type": 'MIDDLEMOUSE', "value": 'RELEASE', "any": True}, None),
-        ("CONFIRM", {"type": 'ESC', "value": 'PRESS', "any": True}, None),
+        ("CANCEL", {"type": 'RIGHTMOUSE', "value": 'PRESS', "any": True}, None),
        ("AXIS_SNAP_ENABLE", {"type": 'LEFT_ALT', "value": 'PRESS', "any": True}, None),
        ("AXIS_SNAP_DISABLE", {"type": 'LEFT_ALT', "value": 'RELEASE', "any": True}, None),
        ("AXIS_SNAP_ENABLE", {"type": 'RIGHT_ALT', "value": 'PRESS', "any": True}, None),
@ -6326,8 +6325,7 @@ def km_view3d_move_modal(_params):
    )

    items.extend([
-        ("CONFIRM", {"type": 'MIDDLEMOUSE', "value": 'RELEASE', "any": True}, None),
-        ("CONFIRM", {"type": 'ESC', "value": 'PRESS', "any": True}, None),
+        ("CANCEL", {"type": 'RIGHTMOUSE', "value": 'PRESS', "any": True}, None),
    ])

    return keymap
@ -6342,8 +6340,7 @@ def km_view3d_zoom_modal(_params):
    )

    items.extend([
-        ("CONFIRM", {"type": 'MIDDLEMOUSE', "value": 'RELEASE', "any": True}, None),
-        ("CONFIRM", {"type": 'ESC', "value": 'PRESS', "any": True}, None),
+        ("CANCEL", {"type": 'RIGHTMOUSE', "value": 'PRESS', "any": True}, None),
    ])

    return keymap
@ -6358,8 +6355,7 @@ def km_view3d_dolly_modal(_params):
    )

    items.extend([
-        ("CONFIRM", {"type": 'MIDDLEMOUSE', "value": 'RELEASE', "any": True}, None),
-        ("CONFIRM", {"type": 'ESC', "value": 'PRESS', "any": True}, None),
+        ("CANCEL", {"type": 'RIGHTMOUSE', "value": 'PRESS', "any": True}, None),
    ])

    return keymap
--- a/scripts/startup/bl_ui/space_userpref.py
+++ b/scripts/startup/bl_ui/space_userpref.py
@ -1491,6 +1491,7 @@ class USERPREF_PT_file_paths_asset_libraries(FilePathsPanel, Panel):
        active_library = paths.asset_libraries[active_library_index]
        layout.prop(active_library, "path")
        layout.prop(active_library, "import_method", text="Import Method")
+        layout.prop(active_library, "use_relative_path")


 class USERPREF_UL_asset_libraries(bpy.types.UIList):
--- a/source/blender/CMakeLists.txt
+++ b/source/blender/CMakeLists.txt
@ -121,6 +121,7 @@ set(SRC_DNA_DEFAULTS_INC
  ${CMAKE_CURRENT_SOURCE_DIR}/makesdna/DNA_space_defaults.h
  ${CMAKE_CURRENT_SOURCE_DIR}/makesdna/DNA_speaker_defaults.h
  ${CMAKE_CURRENT_SOURCE_DIR}/makesdna/DNA_texture_defaults.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/makesdna/DNA_userdef_defaults.h
  ${CMAKE_CURRENT_SOURCE_DIR}/makesdna/DNA_vec_defaults.h
  ${CMAKE_CURRENT_SOURCE_DIR}/makesdna/DNA_view3d_defaults.h
  ${CMAKE_CURRENT_SOURCE_DIR}/makesdna/DNA_volume_defaults.h
--- a/source/blender/asset_system/AS_asset_library.hh
+++ b/source/blender/asset_system/AS_asset_library.hh
@ -67,6 +67,8 @@ class AssetLibrary {
   * #import_method_ above, it's just a default. */
  bool may_override_import_method_ = false;

+  bool use_relative_path_ = true;
+
  bCallbackFuncStore on_save_callback_store_{};

 public:
--- a/source/blender/asset_system/AS_asset_representation.hh
+++ b/source/blender/asset_system/AS_asset_representation.hh
@ -88,6 +88,7 @@ class AssetRepresentation {
   * #get_import_method(). Also returns true if there is no predefined import method
   * (when #get_import_method() returns no value). */
  bool may_override_import_method() const;
+  bool get_use_relative_path() const;
  /** If this asset is stored inside this current file (#is_local_id() is true), this returns the
   * ID's pointer, otherwise null. */
  ID *local_id() const;
@ -109,3 +110,4 @@ std::string AS_asset_representation_full_library_path_get(const ::AssetRepresent
 std::optional<eAssetImportMethod> AS_asset_representation_import_method_get(
    const ::AssetRepresentation *asset_handle);
 bool AS_asset_representation_may_override_import_method(const ::AssetRepresentation *asset_handle);
+bool AS_asset_representation_use_relative_path_get(const ::AssetRepresentation *asset_handle);
--- a/source/blender/asset_system/intern/asset_library_service.cc
+++ b/source/blender/asset_system/intern/asset_library_service.cc
@ -101,6 +101,7 @@ AssetLibrary *AssetLibraryService::get_asset_library(
      AssetLibrary *library = get_asset_library_on_disk_custom(custom_library->name, root_path);
      library->import_method_ = eAssetImportMethod(custom_library->import_method);
      library->may_override_import_method_ = true;
+      library->use_relative_path_ = (custom_library->flag & ASSET_LIBRARY_RELATIVE_PATH) != 0;

      return library;
    }
--- a/source/blender/asset_system/intern/asset_representation.cc
+++ b/source/blender/asset_system/intern/asset_representation.cc
@ -106,6 +106,14 @@ bool AssetRepresentation::may_override_import_method() const
  return owner_asset_library_->may_override_import_method_;
 }

+bool AssetRepresentation::get_use_relative_path() const
+{
+  if (!owner_asset_library_) {
+    return false;
+  }
+  return owner_asset_library_->use_relative_path_;
+}
+
 ID *AssetRepresentation::local_id() const
 {
  return is_local_id_ ? local_asset_id_ : nullptr;
@ -155,6 +163,13 @@ bool AS_asset_representation_may_override_import_method(const AssetRepresentatio
  return asset->may_override_import_method();
 }

+bool AS_asset_representation_use_relative_path_get(const AssetRepresentation *asset_handle)
+{
+  const asset_system::AssetRepresentation *asset =
+      reinterpret_cast<const asset_system::AssetRepresentation *>(asset_handle);
+  return asset->get_use_relative_path();
+}
+
 /* ---------------------------------------------------------------------- */
 /** \name C-API
 * \{ */
--- a/source/blender/blenkernel/BKE_blender_version.h
+++ b/source/blender/blenkernel/BKE_blender_version.h
@ -25,7 +25,7 @@ extern "C" {

 /* Blender file format version. */
 #define BLENDER_FILE_VERSION BLENDER_VERSION
-#define BLENDER_FILE_SUBVERSION 5
+#define BLENDER_FILE_SUBVERSION 6

 /* Minimum Blender version that supports reading file written with the current
 * version. Older Blender versions will test this and show a warning if the file
--- a/source/blender/blenkernel/BKE_lib_remap.h
+++ b/source/blender/blenkernel/BKE_lib_remap.h
@ -92,7 +92,7 @@ enum {
   * Do not attempt to access original ID pointers (triggers usages of
   * `IDWALK_NO_ORIG_POINTERS_ACCESS` too).
   *
-   * Use when original ID pointers values are (probably) not valid, e.g. dureing readfile process.
+   * Use when original ID pointers values are (probably) not valid, e.g. during read-file process.
   */
  ID_REMAP_NO_ORIG_POINTERS_ACCESS = 1 << 20,
 };
--- a/source/blender/blenkernel/intern/fcurve.c
+++ b/source/blender/blenkernel/intern/fcurve.c
@ -1900,7 +1900,7 @@ void BKE_fcurve_deduplicate_keys(FCurve *fcu)
      }
      else {
        /* Move the retained key to the old X-coordinate, to 'anchor' the X-coordinate used for
-         * subsequente comparisons. Without this, the reference X-coordinate would keep moving
+         * subsequent comparisons. Without this, the reference X-coordinate would keep moving
         * forward in time, potentially merging in more keys than desired. */
        BKE_fcurve_keyframe_move_time_with_handles(prev_bezt, prev_x);
      }
--- a/source/blender/blenkernel/intern/geometry_fields.cc
+++ b/source/blender/blenkernel/intern/geometry_fields.cc
@ -407,6 +407,55 @@ bool NormalFieldInput::is_equal_to(const fn::FieldNode &other) const
  return dynamic_cast<const NormalFieldInput *>(&other) != nullptr;
 }

+static std::optional<AttributeIDRef> try_get_field_direct_attribute_id(const fn::GField &any_field)
+{
+  if (const auto *field = dynamic_cast<const AttributeFieldInput *>(&any_field.node())) {
+    return field->attribute_name();
+  }
+  if (const auto *field = dynamic_cast<const AnonymousAttributeFieldInput *>(&any_field.node())) {
+    return *field->anonymous_id();
+  }
+  return {};
+}
+
+static bool attribute_kind_matches(const AttributeMetaData meta_data,
+                                   const eAttrDomain domain,
+                                   const eCustomDataType data_type)
+{
+  return meta_data.domain == domain && meta_data.data_type == data_type;
+}
+
+/**
+ * Some fields reference attributes directly. When the referenced attribute has the requested type
+ * and domain, use implicit sharing to avoid duplication when creating the captured attribute.
+ */
+static bool try_add_shared_field_attribute(MutableAttributeAccessor attributes,
+                                           const AttributeIDRef &id_to_create,
+                                           const eAttrDomain domain,
+                                           const fn::GField &field)
+{
+  const std::optional<AttributeIDRef> field_id = try_get_field_direct_attribute_id(field);
+  if (!field_id) {
+    return false;
+  }
+  const std::optional<AttributeMetaData> meta_data = attributes.lookup_meta_data(*field_id);
+  if (!meta_data) {
+    return false;
+  }
+  const eCustomDataType data_type = bke::cpp_type_to_custom_data_type(field.cpp_type());
+  if (!attribute_kind_matches(*meta_data, domain, data_type)) {
+    /* Avoid costly domain and type interpolation, which would make sharing impossible. */
+    return false;
+  }
+  const GAttributeReader attribute = attributes.lookup(*field_id, domain, data_type);
+  if (!attribute.sharing_info || !attribute.varray.is_span()) {
+    return false;
+  }
+  const AttributeInitShared init(attribute.varray.get_internal_span().data(),
+                                 *attribute.sharing_info);
+  return attributes.add(id_to_create, domain, data_type, init);
+}
+
 bool try_capture_field_on_geometry(GeometryComponent &component,
                                   const AttributeIDRef &attribute_id,
                                   const eAttrDomain domain,
@ -422,20 +471,20 @@ bool try_capture_field_on_geometry(GeometryComponent &component,
    return attributes.add(attribute_id, domain, data_type, AttributeInitConstruct{});
  }

-  bke::GeometryFieldContext field_context{component, domain};
+  const bke::GeometryFieldContext field_context{component, domain};
  const IndexMask mask{IndexMask(domain_size)};
  const bke::AttributeValidator validator = attributes.lookup_validator(attribute_id);

  const std::optional<AttributeMetaData> meta_data = attributes.lookup_meta_data(attribute_id);
-  const bool attribute_exists = meta_data && meta_data->domain == domain &&
-                                meta_data->data_type == data_type;
+  const bool attribute_matches = meta_data &&
+                                 attribute_kind_matches(*meta_data, domain, data_type);

-  /*  We are writing to an attribute that exists already with the correct domain and type. */
-  if (attribute_exists) {
+  /* We are writing to an attribute that exists already with the correct domain and type. */
+  if (attribute_matches) {
    if (GSpanAttributeWriter dst_attribute = attributes.lookup_for_write_span(attribute_id)) {
-      bke::GeometryFieldContext field_context{component, domain};
      const IndexMask mask{IndexMask(domain_size)};

+      const bke::GeometryFieldContext field_context{component, domain};
      fn::FieldEvaluator evaluator{field_context, &mask};
      evaluator.add(validator.validate_field_if_necessary(field));
      evaluator.set_selection(selection);
@ -450,11 +499,19 @@ bool try_capture_field_on_geometry(GeometryComponent &component,
    }
  }

+  const bool selection_is_full = !selection.node().depends_on_input() &&
+                                 fn::evaluate_constant_field(selection);
+
+  if (!validator && selection_is_full) {
+    if (try_add_shared_field_attribute(attributes, attribute_id, domain, field)) {
+      return true;
+    }
+  }
+
  /* Could avoid allocating a new buffer if:
   * - The field does not depend on that attribute (we can't easily check for that yet). */
  void *buffer = MEM_mallocN_aligned(type.size() * domain_size, type.alignment(), __func__);
-  if (selection.node().depends_on_input() || !fn::evaluate_constant_field(selection)) {
-    /* If every element might not be selected, the buffer must be initialized. */
+  if (!selection_is_full) {
    type.value_initialize_n(buffer, domain_size);
  }
  fn::FieldEvaluator evaluator{field_context, &mask};
@ -463,7 +520,7 @@ bool try_capture_field_on_geometry(GeometryComponent &component,
  evaluator.set_selection(selection);
  evaluator.evaluate();

-  if (attribute_exists) {
+  if (attribute_matches) {
    if (GAttributeWriter attribute = attributes.lookup_for_write(attribute_id)) {
      attribute.varray.set_all(buffer);
      attribute.finish();
--- a/source/blender/blenkernel/intern/mesh_normals.cc
+++ b/source/blender/blenkernel/intern/mesh_normals.cc
@ -39,15 +39,6 @@

 #include "atomic_ops.h"

-using blender::BitVector;
-using blender::float3;
-using blender::int2;
-using blender::MutableBitSpan;
-using blender::MutableSpan;
-using blender::short2;
-using blender::Span;
-using blender::VArray;
-
 // #define DEBUG_TIME

 #ifdef DEBUG_TIME
@ -315,6 +306,7 @@ void normals_calc_poly_vert(const Span<float3> positions,

 blender::Span<blender::float3> Mesh::vert_normals() const
 {
+  using namespace blender;
  if (!this->runtime->vert_normals_dirty) {
    BLI_assert(this->runtime->vert_normals.size() == this->totvert);
    return this->runtime->vert_normals;
@ -327,14 +319,14 @@ blender::Span<blender::float3> Mesh::vert_normals() const
  }

  /* Isolate task because a mutex is locked and computing normals is multi-threaded. */
-  blender::threading::isolate_task([&]() {
+  threading::isolate_task([&]() {
    const Span<float3> positions = this->vert_positions();
-    const blender::OffsetIndices polys = this->polys();
+    const OffsetIndices polys = this->polys();
    const Span<int> corner_verts = this->corner_verts();

    this->runtime->vert_normals.reinitialize(positions.size());
    this->runtime->poly_normals.reinitialize(polys.size());
-    blender::bke::mesh::normals_calc_poly_vert(
+    bke::mesh::normals_calc_poly_vert(
        positions, polys, corner_verts, this->runtime->poly_normals, this->runtime->vert_normals);

    this->runtime->vert_normals_dirty = false;
@ -346,6 +338,7 @@ blender::Span<blender::float3> Mesh::vert_normals() const

 blender::Span<blender::float3> Mesh::poly_normals() const
 {
+  using namespace blender;
  if (!this->runtime->poly_normals_dirty) {
    BLI_assert(this->runtime->poly_normals.size() == this->totpoly);
    return this->runtime->poly_normals;
@ -358,14 +351,13 @@ blender::Span<blender::float3> Mesh::poly_normals() const
  }

  /* Isolate task because a mutex is locked and computing normals is multi-threaded. */
-  blender::threading::isolate_task([&]() {
+  threading::isolate_task([&]() {
    const Span<float3> positions = this->vert_positions();
-    const blender::OffsetIndices polys = this->polys();
+    const OffsetIndices polys = this->polys();
    const Span<int> corner_verts = this->corner_verts();

    this->runtime->poly_normals.reinitialize(polys.size());
-    blender::bke::mesh::normals_calc_polys(
-        positions, polys, corner_verts, this->runtime->poly_normals);
+    bke::mesh::normals_calc_polys(positions, polys, corner_verts, this->runtime->poly_normals);

    this->runtime->poly_normals_dirty = false;
  });
@ -672,6 +664,8 @@ void BKE_lnor_space_custom_normal_to_data(const MLoopNorSpace *lnor_space,
  }
 }

+namespace blender::bke::mesh {
+
 #define LOOP_SPLIT_TASK_BLOCK_SIZE 1024

 struct LoopSplitTaskData {
@ -684,8 +678,6 @@ struct LoopSplitTaskData {
  /** We have to create those outside of tasks, since #MemArena is not thread-safe. */
  MLoopNorSpace *lnor_space;
  int ml_curr_index;
-  /** Also used a flag to switch between single or fan process! */
-  int ml_prev_index;
  int poly_index;

  Type flag;
@ -704,7 +696,7 @@ struct LoopSplitTaskDataCommon {
  Span<int2> edges;
  Span<int> corner_verts;
  Span<int> corner_edges;
-  blender::OffsetIndices<int> polys;
+  OffsetIndices<int> polys;
  Span<int2> edge_to_loops;
  Span<int> loop_to_poly;
  Span<float3> poly_normals;
@ -716,8 +708,6 @@ struct LoopSplitTaskDataCommon {
 /* See comment about edge_to_loops below. */
 #define IS_EDGE_SHARP(_e2l) ELEM((_e2l)[1], INDEX_UNSET, INDEX_INVALID)

-namespace blender::bke::mesh {
-
 static void mesh_edges_sharp_tag(const OffsetIndices<int> polys,
                                 const Span<int> corner_verts,
                                 const Span<int> corner_edges,
@ -822,16 +812,16 @@ void edges_sharp_from_angle_set(const OffsetIndices<int> polys,
 }

 static void loop_manifold_fan_around_vert_next(const Span<int> corner_verts,
-                                               const blender::OffsetIndices<int> polys,
+                                               const OffsetIndices<int> polys,
                                               const Span<int> loop_to_poly,
                                               const int *e2lfan_curr,
-                                               const uint mv_pivot_index,
+                                               const int vert_pivot,
                                               int *r_mlfan_curr_index,
                                               int *r_mlfan_vert_index,
                                               int *r_mpfan_curr_index)
 {
  const int mlfan_curr_orig = *r_mlfan_curr_index;
-  const uint vert_fan_orig = corner_verts[mlfan_curr_orig];
+  const int vert_fan_orig = corner_verts[mlfan_curr_orig];

  /* WARNING: This is rather complex!
   * We have to find our next edge around the vertex (fan mode).
@ -846,20 +836,20 @@ static void loop_manifold_fan_around_vert_next(const Span<int> corner_verts,
  BLI_assert(*r_mlfan_curr_index >= 0);
  BLI_assert(*r_mpfan_curr_index >= 0);

-  const uint vert_fan_next = corner_verts[*r_mlfan_curr_index];
-  const blender::IndexRange mpfan_next = polys[*r_mpfan_curr_index];
-  if ((vert_fan_orig == vert_fan_next && vert_fan_orig == mv_pivot_index) ||
-      !ELEM(vert_fan_orig, vert_fan_next, mv_pivot_index)) {
+  const int vert_fan_next = corner_verts[*r_mlfan_curr_index];
+  const IndexRange poly_fan_next = polys[*r_mpfan_curr_index];
+  if ((vert_fan_orig == vert_fan_next && vert_fan_orig == vert_pivot) ||
+      !ELEM(vert_fan_orig, vert_fan_next, vert_pivot)) {
    /* We need the previous loop, but current one is our vertex's loop. */
    *r_mlfan_vert_index = *r_mlfan_curr_index;
-    if (--(*r_mlfan_curr_index) < mpfan_next.start()) {
-      *r_mlfan_curr_index = mpfan_next.start() + mpfan_next.size() - 1;
+    if (--(*r_mlfan_curr_index) < poly_fan_next.start()) {
+      *r_mlfan_curr_index = poly_fan_next.start() + poly_fan_next.size() - 1;
    }
  }
  else {
    /* We need the next loop, which is also our vertex's loop. */
-    if (++(*r_mlfan_curr_index) >= mpfan_next.start() + mpfan_next.size()) {
-      *r_mlfan_curr_index = mpfan_next.start();
+    if (++(*r_mlfan_curr_index) >= poly_fan_next.start() + poly_fan_next.size()) {
+      *r_mlfan_curr_index = poly_fan_next.start();
    }
    *r_mlfan_vert_index = *r_mlfan_curr_index;
  }
@ -872,6 +862,7 @@ static void split_loop_nor_single_do(LoopSplitTaskDataCommon *common_data, LoopS

  const Span<float3> positions = common_data->positions;
  const Span<int2> edges = common_data->edges;
+  const OffsetIndices polys = common_data->polys;
  const Span<int> corner_verts = common_data->corner_verts;
  const Span<int> corner_edges = common_data->corner_edges;
  const Span<float3> poly_normals = common_data->poly_normals;
@ -879,7 +870,6 @@ static void split_loop_nor_single_do(LoopSplitTaskDataCommon *common_data, LoopS

  MLoopNorSpace *lnor_space = data->lnor_space;
  const int ml_curr_index = data->ml_curr_index;
-  const int ml_prev_index = data->ml_prev_index;
  const int poly_index = data->poly_index;

  /* Simple case (both edges around that vertex are sharp in current polygon),
@ -898,17 +888,18 @@ static void split_loop_nor_single_do(LoopSplitTaskDataCommon *common_data, LoopS
  /* If needed, generate this (simple!) lnor space. */
  if (lnors_spacearr) {
    float vec_curr[3], vec_prev[3];
+    const int ml_prev_index = mesh::poly_corner_prev(polys[poly_index], ml_curr_index);

-    const int mv_pivot_index =
-        corner_verts[ml_curr_index]; /* The vertex we are "fanning" around! */
-    const int2 &me_curr = edges[corner_edges[ml_curr_index]];
-    const int vert_2 = me_curr[0] == mv_pivot_index ? me_curr[1] : me_curr[0];
-    const int2 &me_prev = edges[corner_edges[ml_prev_index]];
-    const int vert_3 = me_prev[0] == mv_pivot_index ? me_prev[1] : me_prev[0];
+    /* The vertex we are "fanning" around. */
+    const int vert_pivot = corner_verts[ml_curr_index];
+    const int2 &edge = edges[corner_edges[ml_curr_index]];
+    const int vert_2 = edge_other_vert(edge, vert_pivot);
+    const int2 &edge_prev = edges[corner_edges[ml_prev_index]];
+    const int vert_3 = edge_other_vert(edge_prev, vert_pivot);

-    sub_v3_v3v3(vec_curr, positions[vert_2], positions[mv_pivot_index]);
+    sub_v3_v3v3(vec_curr, positions[vert_2], positions[vert_pivot]);
    normalize_v3(vec_curr);
-    sub_v3_v3v3(vec_prev, positions[vert_3], positions[mv_pivot_index]);
+    sub_v3_v3v3(vec_prev, positions[vert_3], positions[vert_pivot]);
    normalize_v3(vec_prev);

    BKE_lnor_space_define(lnor_space, loop_normals[ml_curr_index], vec_curr, vec_prev, nullptr);
@ -932,7 +923,7 @@ static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data,

  const Span<float3> positions = common_data->positions;
  const Span<int2> edges = common_data->edges;
-  const blender::OffsetIndices polys = common_data->polys;
+  const OffsetIndices polys = common_data->polys;
  const Span<int> corner_verts = common_data->corner_verts;
  const Span<int> corner_edges = common_data->corner_edges;
  const Span<int2> edge_to_loops = common_data->edge_to_loops;
@ -944,8 +935,8 @@ static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data,
  float(*lnor)[3] = data->lnor;
 #endif
  const int ml_curr_index = data->ml_curr_index;
-  const int ml_prev_index = data->ml_prev_index;
  const int poly_index = data->poly_index;
+  const int ml_prev_index = poly_corner_prev(polys[poly_index], ml_curr_index);

  /* Sigh! we have to fan around current vertex, until we find the other non-smooth edge,
   * and accumulate face normals into the vertex!
@ -953,10 +944,10 @@ static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data,
   * same as the vertex normal, but I do not see any easy way to detect that (would need to count
   * number of sharp edges per vertex, I doubt the additional memory usage would be worth it,
   * especially as it should not be a common case in real-life meshes anyway). */
-  const int mv_pivot_index = corner_verts[ml_curr_index]; /* The vertex we are "fanning" around! */
+  const int vert_pivot = corner_verts[ml_curr_index]; /* The vertex we are "fanning" around! */

  /* `ml_curr_index` would be mlfan_prev if we needed that one. */
-  const int2 &me_org = edges[corner_edges[ml_curr_index]];
+  const int2 &edge_orig = edges[corner_edges[ml_curr_index]];

  float vec_curr[3], vec_prev[3], vec_org[3];
  float lnor[3] = {0.0f, 0.0f, 0.0f};
@ -984,10 +975,8 @@ static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data,

  /* Only need to compute previous edge's vector once, then we can just reuse old current one! */
  {
-    const float3 &mv_2 = (me_org[0] == mv_pivot_index) ? positions[me_org[1]] :
-                                                         positions[me_org[0]];
-
-    sub_v3_v3v3(vec_org, mv_2, positions[mv_pivot_index]);
+    const int vert_2 = edge_other_vert(edge_orig, vert_pivot);
+    sub_v3_v3v3(vec_org, positions[vert_2], positions[vert_pivot]);
    normalize_v3(vec_org);
    copy_v3_v3(vec_prev, vec_org);

@ -996,20 +985,18 @@ static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data,
    }
  }

-  // printf("FAN: vert %d, start edge %d\n", mv_pivot_index, ml_curr->e);
+  // printf("FAN: vert %d, start edge %d\n", vert_pivot, ml_curr->e);

  while (true) {
-    const int2 &me_curr = edges[corner_edges[mlfan_curr_index]];
+    const int2 &edge = edges[corner_edges[mlfan_curr_index]];
    /* Compute edge vectors.
     * NOTE: We could pre-compute those into an array, in the first iteration, instead of computing
     *       them twice (or more) here. However, time gained is not worth memory and time lost,
     *       given the fact that this code should not be called that much in real-life meshes.
     */
    {
-      const float3 &mv_2 = (me_curr[0] == mv_pivot_index) ? positions[me_curr[1]] :
-                                                            positions[me_curr[0]];
-
-      sub_v3_v3v3(vec_curr, mv_2, positions[mv_pivot_index]);
+      const int vert_2 = edge_other_vert(edge, vert_pivot);
+      sub_v3_v3v3(vec_curr, positions[vert_2], positions[vert_pivot]);
      normalize_v3(vec_curr);
    }

@ -1045,13 +1032,13 @@ static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data,
    if (lnors_spacearr) {
      /* Assign current lnor space to current 'vertex' loop. */
      BKE_lnor_space_add_loop(lnors_spacearr, lnor_space, mlfan_vert_index, nullptr, false);
-      if (me_curr != me_org) {
+      if (edge != edge_orig) {
        /* We store here all edges-normalized vectors processed. */
        BLI_stack_push(edge_vectors, vec_curr);
      }
    }

-    if (IS_EDGE_SHARP(edge_to_loops[corner_edges[mlfan_curr_index]]) || (me_curr == me_org)) {
+    if (IS_EDGE_SHARP(edge_to_loops[corner_edges[mlfan_curr_index]]) || (edge == edge_orig)) {
      /* Current edge is sharp and we have finished with this fan of faces around this vert,
       * or this vert is smooth, and we have completed a full turn around it. */
      // printf("FAN: Finished!\n");
@ -1065,7 +1052,7 @@ static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data,
                                       polys,
                                       loop_to_poly,
                                       edge_to_loops[corner_edges[mlfan_curr_index]],
-                                       mv_pivot_index,
+                                       vert_pivot,
                                       &mlfan_curr_index,
                                       &mlfan_vert_index,
                                       &mpfan_curr_index);
@ -1167,7 +1154,7 @@ static void loop_split_worker(TaskPool *__restrict pool, void *taskdata)
 */
 static bool loop_split_generator_check_cyclic_smooth_fan(const Span<int> corner_verts,
                                                         const Span<int> corner_edges,
-                                                         const blender::OffsetIndices<int> polys,
+                                                         const OffsetIndices<int> polys,
                                                         const Span<int2> edge_to_loops,
                                                         const Span<int> loop_to_poly,
                                                         const int *e2l_prev,
@ -1176,8 +1163,8 @@ static bool loop_split_generator_check_cyclic_smooth_fan(const Span<int> corner_
                                                         const int ml_prev_index,
                                                         const int mp_curr_index)
 {
-  /* The vertex we are "fanning" around! */
-  const uint mv_pivot_index = corner_verts[ml_curr_index];
+  /* The vertex we are "fanning" around. */
+  const int vert_pivot = corner_verts[ml_curr_index];

  const int *e2lfan_curr = e2l_prev;
  if (IS_EDGE_SHARP(e2lfan_curr)) {
@ -1204,7 +1191,7 @@ static bool loop_split_generator_check_cyclic_smooth_fan(const Span<int> corner_
                                       polys,
                                       loop_to_poly,
                                       e2lfan_curr,
-                                       mv_pivot_index,
+                                       vert_pivot,
                                       &mlfan_curr_index,
                                       &mlfan_vert_index,
                                       &mpfan_curr_index);
@ -1322,7 +1309,6 @@ static void loop_split_generator(TaskPool *pool, LoopSplitTaskDataCommon *common
        if (IS_EDGE_SHARP(edge_to_loops[corner_edges[ml_curr_index]]) &&
            IS_EDGE_SHARP(edge_to_loops[corner_edges[ml_prev_index]])) {
          data->ml_curr_index = ml_curr_index;
-          data->ml_prev_index = ml_prev_index;
          data->flag = LoopSplitTaskData::Type::Single;
          data->poly_index = poly_index;
          if (lnors_spacearr) {
@ -1338,7 +1324,6 @@ static void loop_split_generator(TaskPool *pool, LoopSplitTaskDataCommon *common
           * sharp previous edge). All this due/thanks to the link between normals and loop
           * ordering (i.e. winding). */
          data->ml_curr_index = ml_curr_index;
-          data->ml_prev_index = ml_prev_index;
          data->flag = LoopSplitTaskData::Type::Fan;
          data->poly_index = poly_index;
          if (lnors_spacearr) {
@ -1825,19 +1810,19 @@ static void mesh_set_custom_normals(Mesh *mesh, float (*r_custom_nors)[3], const
  const bool *sharp_faces = static_cast<const bool *>(
      CustomData_get_layer_named(&mesh->pdata, CD_PROP_BOOL, "sharp_face"));

-  mesh_normals_loop_custom_set(mesh->vert_positions(),
-                               mesh->edges(),
-                               mesh->polys(),
-                               mesh->corner_verts(),
-                               mesh->corner_edges(),
-                               mesh->vert_normals(),
-                               mesh->poly_normals(),
-                               sharp_faces,
-                               use_vertices,
-                               {reinterpret_cast<blender::float3 *>(r_custom_nors),
-                                use_vertices ? mesh->totvert : mesh->totloop},
-                               sharp_edges.span,
-                               clnors);
+  mesh_normals_loop_custom_set(
+      mesh->vert_positions(),
+      mesh->edges(),
+      mesh->polys(),
+      mesh->corner_verts(),
+      mesh->corner_edges(),
+      mesh->vert_normals(),
+      mesh->poly_normals(),
+      sharp_faces,
+      use_vertices,
+      {reinterpret_cast<float3 *>(r_custom_nors), use_vertices ? mesh->totvert : mesh->totloop},
+      sharp_edges.span,
+      clnors);

  sharp_edges.finish();
 }
--- a/source/blender/blenkernel/intern/preferences.c
+++ b/source/blender/blenkernel/intern/preferences.c
@ -24,6 +24,7 @@

 #include "BLT_translation.h"

+#include "DNA_defaults.h"
 #include "DNA_userdef_types.h"

 #define U BLI_STATIC_ASSERT(false, "Global 'U' not allowed, only use arguments passed in!")
@ -37,6 +38,7 @@ bUserAssetLibrary *BKE_preferences_asset_library_add(UserDef *userdef,
                                                     const char *path)
 {
  bUserAssetLibrary *library = MEM_callocN(sizeof(*library), "bUserAssetLibrary");
+  memcpy(library, DNA_struct_default_get(bUserAssetLibrary), sizeof(*library));

  BLI_addtail(&userdef->asset_libraries, library);

@ -46,7 +48,6 @@ bUserAssetLibrary *BKE_preferences_asset_library_add(UserDef *userdef,
  if (path) {
    BLI_strncpy(library->path, path, sizeof(library->path));
  }
-  library->import_method = ASSET_IMPORT_APPEND_REUSE;

  return library;
 }
--- a/source/blender/blenkernel/intern/screen.c
+++ b/source/blender/blenkernel/intern/screen.c
@ -242,7 +242,7 @@ void BKE_screen_foreach_id_screen_area(LibraryForeachIDData *data, ScrArea *area
          }
        }

-        /* Both `snode->id` and `snode->nodetree` have been remapped now, sotheir data can be
+        /* Both `snode->id` and `snode->nodetree` have been remapped now, so their data can be
         * accessed. */
        BLI_assert(snode->id == NULL || snode->nodetree == NULL ||
                   (snode->nodetree->id.flag & LIB_EMBEDDED_DATA) == 0 ||
--- a/source/blender/blenkernel/intern/subdiv_foreach.cc
+++ b/source/blender/blenkernel/intern/subdiv_foreach.cc
@ -1728,7 +1728,7 @@ static void subdiv_foreach_mark_non_loose_geometry(SubdivForeachTaskContext *ctx
  for (const int poly_index : ctx->coarse_polys.index_range()) {
    for (const int corner : ctx->coarse_polys[poly_index]) {
      BLI_BITMAP_ENABLE(ctx->coarse_vertices_used_map, ctx->coarse_corner_verts[corner]);
-      BLI_BITMAP_ENABLE(ctx->coarse_edges_used_map, ctx->coarse_corner_verts[corner]);
+      BLI_BITMAP_ENABLE(ctx->coarse_edges_used_map, ctx->coarse_corner_edges[corner]);
    }
  }
 }
--- a/source/blender/blenkernel/intern/vfont.c
+++ b/source/blender/blenkernel/intern/vfont.c
@ -1787,16 +1787,8 @@ bool BKE_vfont_to_curve_ex(Object *ob,
  };

  do {
-    data.ok &= vfont_to_curve(ob,
-                              cu,
-                              mode,
-                              &data,
-                              NULL,
-                              r_nubase,
-                              r_text,
-                              r_text_len,
-                              r_text_free,
-                              r_chartransdata);
+    data.ok &= vfont_to_curve(
+        ob, cu, mode, &data, NULL, r_nubase, r_text, r_text_len, r_text_free, r_chartransdata);
  } while (data.ok && ELEM(data.status, VFONT_TO_CURVE_SCALE_ONCE, VFONT_TO_CURVE_BISECT));

  return data.ok;
--- a/source/blender/blenlib/BLI_asan.h
+++ b/source/blender/blenlib/BLI_asan.h
@ -7,8 +7,10 @@
 #  define __has_feature(x) 0
 #endif

-#if (defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)) && !defined(_MSC_VER)
+#if (defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)) && \
+    (!defined(_MSC_VER) || _MSC_VER > 1929) /* MSVC 2019 and below doesn't ship ASAN headers. */
 #  include "sanitizer/asan_interface.h"
+#  define WITH_ASAN
 #else
 /* Ensure return value is used. Just using UNUSED_VARS results in a warning. */
 #  define ASAN_POISON_MEMORY_REGION(addr, size) (void)(0 && ((size) != 0 && (addr) != NULL))
--- a/source/blender/blenlib/intern/BLI_mempool.c
+++ b/source/blender/blenlib/intern/BLI_mempool.c
@ -20,9 +20,14 @@

 #include "BLI_utildefines.h"

+#include "BLI_asan.h"
 #include "BLI_mempool.h"         /* own include */
 #include "BLI_mempool_private.h" /* own include */

+#ifdef WITH_ASAN
+#  include "BLI_threads.h"
+#endif
+
 #include "MEM_guardedalloc.h"

 #include "BLI_strict_flags.h" /* keep last */
@ -31,6 +36,12 @@
 #  include "valgrind/memcheck.h"
 #endif

+#ifdef WITH_ASAN
+#  define POISON_REDZONE_SIZE 32
+#else
+#  define POISON_REDZONE_SIZE 0
+#endif
+
 /* NOTE: copied from BLO_blend_defs.h, don't use here because we're in BLI. */
 #ifdef __BIG_ENDIAN__
 /* Big Endian */
@ -95,6 +106,10 @@ typedef struct BLI_mempool_chunk {
 * The mempool, stores and tracks memory \a chunks and elements within those chunks \a free.
 */
 struct BLI_mempool {
+  /* Serialize access to mempools when debugging wih ASAN. */
+#ifdef WITH_ASAN
+  ThreadMutex mutex;
+#endif
  /** Single linked list of allocated chunks. */
  BLI_mempool_chunk *chunks;
  /** Keep a pointer to the last, so we can append new chunks there
@ -108,7 +123,6 @@ struct BLI_mempool {
  /** Number of elements per chunk. */
  uint pchunk;
  uint flag;
-  /* keeps aligned to 16 bits */

  /** Free element list. Interleaved into chunk data. */
  BLI_freenode *free;
@ -132,6 +146,24 @@ struct BLI_mempool {
 /** Extra bytes implicitly used for every chunk alloc. */
 #define CHUNK_OVERHEAD (uint)(MEM_SIZE_OVERHEAD + sizeof(BLI_mempool_chunk))

+static void mempool_asan_unlock(BLI_mempool *pool)
+{
+#ifdef WITH_ASAN
+  BLI_mutex_unlock(&pool->mutex);
+#else
+  UNUSED_VARS(pool);
+#endif
+}
+
+static void mempool_asan_lock(BLI_mempool *pool)
+{
+#ifdef WITH_ASAN
+  BLI_mutex_lock(&pool->mutex);
+#else
+  UNUSED_VARS(pool);
+#endif
+}
+
 #ifdef USE_CHUNK_POW2
 static uint power_of_2_max_u(uint x)
 {
@ -166,7 +198,7 @@ BLI_INLINE uint mempool_maxchunks(const uint elem_num, const uint pchunk)

 static BLI_mempool_chunk *mempool_chunk_alloc(BLI_mempool *pool)
 {
-  return MEM_mallocN(sizeof(BLI_mempool_chunk) + (size_t)pool->csize, "BLI_Mempool Chunk");
+  return MEM_mallocN(sizeof(BLI_mempool_chunk) + (size_t)pool->csize, "mempool chunk");
 }

 /**
@ -206,22 +238,41 @@ static BLI_freenode *mempool_chunk_add(BLI_mempool *pool,
  j = pool->pchunk;
  if (pool->flag & BLI_MEMPOOL_ALLOW_ITER) {
    while (j--) {
-      curnode->next = NODE_STEP_NEXT(curnode);
+      BLI_freenode *next;
+
+      BLI_asan_unpoison(curnode, pool->esize - POISON_REDZONE_SIZE);
+
+      curnode->next = next = NODE_STEP_NEXT(curnode);
      curnode->freeword = FREEWORD;
-      curnode = curnode->next;
+
+      BLI_asan_poison(curnode, pool->esize);
+
+      curnode = next;
    }
  }
  else {
    while (j--) {
-      curnode->next = NODE_STEP_NEXT(curnode);
-      curnode = curnode->next;
+      BLI_freenode *next;
+
+      BLI_asan_unpoison(curnode, pool->esize - POISON_REDZONE_SIZE);
+      curnode->next = next = NODE_STEP_NEXT(curnode);
+      BLI_asan_poison(curnode, pool->esize);
+
+      curnode = next;
    }
  }

  /* terminate the list (rewind one)
   * will be overwritten if 'curnode' gets passed in again as 'last_tail' */
+
+  BLI_asan_unpoison(curnode, pool->esize - POISON_REDZONE_SIZE);
+  BLI_asan_poison(curnode, pool->esize);
+
  curnode = NODE_STEP_PREV(curnode);
+
+  BLI_asan_unpoison(curnode, pool->esize - POISON_REDZONE_SIZE);
  curnode->next = NULL;
+  BLI_asan_poison(curnode, pool->esize);

 #ifdef USE_TOTALLOC
  pool->totalloc += pool->pchunk;
@ -229,24 +280,27 @@ static BLI_freenode *mempool_chunk_add(BLI_mempool *pool,

  /* final pointer in the previously allocated chunk is wrong */
  if (last_tail) {
+    BLI_asan_unpoison(last_tail, pool->esize - POISON_REDZONE_SIZE);
    last_tail->next = CHUNK_DATA(mpchunk);
+    BLI_asan_poison(last_tail, pool->esize);
  }

  return curnode;
 }

-static void mempool_chunk_free(BLI_mempool_chunk *mpchunk)
+static void mempool_chunk_free(BLI_mempool_chunk *mpchunk, BLI_mempool *pool)
 {
+  BLI_asan_unpoison(mpchunk, sizeof(BLI_mempool_chunk) + pool->esize * pool->csize);
  MEM_freeN(mpchunk);
 }

-static void mempool_chunk_free_all(BLI_mempool_chunk *mpchunk)
+static void mempool_chunk_free_all(BLI_mempool_chunk *mpchunk, BLI_mempool *pool)
 {
  BLI_mempool_chunk *mpchunk_next;

  for (; mpchunk; mpchunk = mpchunk_next) {
    mpchunk_next = mpchunk->next;
-    mempool_chunk_free(mpchunk);
+    mempool_chunk_free(mpchunk, pool);
  }
 }

@ -259,6 +313,10 @@ BLI_mempool *BLI_mempool_create(uint esize, uint elem_num, uint pchunk, uint fla
  /* allocate the pool structure */
  pool = MEM_mallocN(sizeof(BLI_mempool), "memory pool");

+#ifdef WITH_ASAN
+  BLI_mutex_init(&pool->mutex);
+#endif
+
  /* set the elem size */
  if (esize < (int)MEMPOOL_ELEM_SIZE_MIN) {
    esize = (int)MEMPOOL_ELEM_SIZE_MIN;
@ -268,6 +326,8 @@ BLI_mempool *BLI_mempool_create(uint esize, uint elem_num, uint pchunk, uint fla
    esize = MAX2(esize, (uint)sizeof(BLI_freenode));
  }

+  esize += POISON_REDZONE_SIZE;
+
  maxchunks = mempool_maxchunks(elem_num, pchunk);

  pool->chunks = NULL;
@ -328,6 +388,8 @@ void *BLI_mempool_alloc(BLI_mempool *pool)

  free_pop = pool->free;

+  BLI_asan_unpoison(free_pop, pool->esize - POISON_REDZONE_SIZE);
+
  BLI_assert(pool->chunk_tail->next == NULL);

  if (pool->flag & BLI_MEMPOOL_ALLOW_ITER) {
@ -347,10 +409,17 @@ void *BLI_mempool_alloc(BLI_mempool *pool)
 void *BLI_mempool_calloc(BLI_mempool *pool)
 {
  void *retval = BLI_mempool_alloc(pool);
-  memset(retval, 0, (size_t)pool->esize);
+
+  memset(retval, 0, (size_t)pool->esize - POISON_REDZONE_SIZE);
+
  return retval;
 }

+/**
+ * Free an element from the mempool.
+ *
+ * \note doesn't protect against double frees, take care!
+ */
 void BLI_mempool_free(BLI_mempool *pool, void *addr)
 {
  BLI_freenode *newhead = addr;
@ -372,7 +441,7 @@ void BLI_mempool_free(BLI_mempool *pool, void *addr)

  /* Enable for debugging. */
  if (UNLIKELY(mempool_debug_memset)) {
-    memset(addr, 255, pool->esize);
+    memset(addr, 255, pool->esize - POISON_REDZONE_SIZE);
  }
 #endif

@ -387,6 +456,8 @@ void BLI_mempool_free(BLI_mempool *pool, void *addr)
  newhead->next = pool->free;
  pool->free = newhead;

+  BLI_asan_poison(newhead, pool->esize);
+
  pool->totused--;

 #ifdef WITH_MEM_VALGRIND
@ -401,7 +472,7 @@ void BLI_mempool_free(BLI_mempool *pool, void *addr)
    BLI_mempool_chunk *first;

    first = pool->chunks;
-    mempool_chunk_free_all(first->next);
+    mempool_chunk_free_all(first->next, pool);
    first->next = NULL;
    pool->chunk_tail = first;

@ -419,11 +490,21 @@ void BLI_mempool_free(BLI_mempool *pool, void *addr)

    j = pool->pchunk;
    while (j--) {
-      curnode->next = NODE_STEP_NEXT(curnode);
-      curnode = curnode->next;
+      BLI_asan_unpoison(curnode, pool->esize - POISON_REDZONE_SIZE);
+      BLI_freenode *next = curnode->next = NODE_STEP_NEXT(curnode);
+      BLI_asan_poison(curnode, pool->esize);
+      curnode = next;
    }
-    curnode = NODE_STEP_PREV(curnode);
+
+    BLI_asan_unpoison(curnode, pool->esize - POISON_REDZONE_SIZE);
+    BLI_freenode *prev = NODE_STEP_PREV(curnode);
+    BLI_asan_poison(curnode, pool->esize);
+
+    curnode = prev;
+
+    BLI_asan_unpoison(curnode, pool->esize - POISON_REDZONE_SIZE);
    curnode->next = NULL; /* terminate the list */
+    BLI_asan_poison(curnode, pool->esize);

 #ifdef WITH_MEM_VALGRIND
    VALGRIND_MEMPOOL_FREE(pool, CHUNK_DATA(first));
@ -433,14 +514,18 @@ void BLI_mempool_free(BLI_mempool *pool, void *addr)

 int BLI_mempool_len(const BLI_mempool *pool)
 {
-  return (int)pool->totused;
+  int ret = (int)pool->totused;
+
+  return ret;
 }

 void *BLI_mempool_findelem(BLI_mempool *pool, uint index)
 {
+  mempool_asan_lock(pool);
+
  BLI_assert(pool->flag & BLI_MEMPOOL_ALLOW_ITER);

-  if (index < pool->totused) {
+  if (index < (uint)pool->totused) {
    /* We could have some faster mem chunk stepping code inline. */
    BLI_mempool_iter iter;
    void *elem;
@ -448,9 +533,12 @@ void *BLI_mempool_findelem(BLI_mempool *pool, uint index)
    for (elem = BLI_mempool_iterstep(&iter); index-- != 0; elem = BLI_mempool_iterstep(&iter)) {
      /* pass */
    }
+
+    mempool_asan_unlock(pool);
    return elem;
  }

+  mempool_asan_unlock(pool);
  return NULL;
 }

@ -459,12 +547,16 @@ void BLI_mempool_as_table(BLI_mempool *pool, void **data)
  BLI_mempool_iter iter;
  void *elem;
  void **p = data;
+
  BLI_assert(pool->flag & BLI_MEMPOOL_ALLOW_ITER);
+
  BLI_mempool_iternew(pool, &iter);
+
  while ((elem = BLI_mempool_iterstep(&iter))) {
    *p++ = elem;
  }
-  BLI_assert((uint)(p - data) == pool->totused);
+
+  BLI_assert((ptrdiff_t)(p - data) == (ptrdiff_t)pool->totused);
 }

 void **BLI_mempool_as_tableN(BLI_mempool *pool, const char *allocstr)
@ -476,21 +568,24 @@ void **BLI_mempool_as_tableN(BLI_mempool *pool, const char *allocstr)

 void BLI_mempool_as_array(BLI_mempool *pool, void *data)
 {
-  const uint esize = pool->esize;
+  const uint esize = pool->esize - (uint)POISON_REDZONE_SIZE;
  BLI_mempool_iter iter;
  char *elem, *p = data;
+
  BLI_assert(pool->flag & BLI_MEMPOOL_ALLOW_ITER);
+
+  mempool_asan_lock(pool);
  BLI_mempool_iternew(pool, &iter);
  while ((elem = BLI_mempool_iterstep(&iter))) {
    memcpy(p, elem, (size_t)esize);
    p = NODE_STEP_NEXT(p);
  }
-  BLI_assert((uint)(p - (char *)data) == pool->totused * esize);
+  mempool_asan_unlock(pool);
 }

 void *BLI_mempool_as_arrayN(BLI_mempool *pool, const char *allocstr)
 {
-  char *data = MEM_malloc_arrayN(pool->totused, pool->esize, allocstr);
+  char *data = MEM_malloc_arrayN((size_t)pool->totused, pool->esize, allocstr);
  BLI_mempool_as_array(pool, data);
  return data;
 }
@ -586,6 +681,8 @@ void *BLI_mempool_iterstep(BLI_mempool_iter *iter)
  do {
    ret = curnode;

+    BLI_asan_unpoison(ret, iter->pool->esize - POISON_REDZONE_SIZE);
+
    if (++iter->curindex != iter->pool->pchunk) {
      curnode = POINTER_OFFSET(curnode, esize);
    }
@ -593,7 +690,14 @@ void *BLI_mempool_iterstep(BLI_mempool_iter *iter)
      iter->curindex = 0;
      iter->curchunk = iter->curchunk->next;
      if (UNLIKELY(iter->curchunk == NULL)) {
-        return (ret->freeword == FREEWORD) ? NULL : ret;
+        BLI_asan_unpoison(ret, iter->pool->esize - POISON_REDZONE_SIZE);
+        void *ret2 = (ret->freeword == FREEWORD) ? NULL : ret;
+
+        if (ret->freeword == FREEWORD) {
+          BLI_asan_poison(ret, iter->pool->esize);
+        }
+
+        return ret2;
      }
      curnode = CHUNK_DATA(iter->curchunk);
    }
@ -609,12 +713,16 @@ void *mempool_iter_threadsafe_step(BLI_mempool_threadsafe_iter *ts_iter)
    return NULL;
  }

+  mempool_asan_lock(iter->pool);
+
  const uint esize = iter->pool->esize;
  BLI_freenode *curnode = POINTER_OFFSET(CHUNK_DATA(iter->curchunk), (esize * iter->curindex));
  BLI_freenode *ret;
  do {
    ret = curnode;

+    BLI_asan_unpoison(ret, esize - POISON_REDZONE_SIZE);
+
    if (++iter->curindex != iter->pool->pchunk) {
      curnode = POINTER_OFFSET(curnode, esize);
    }
@ -630,24 +738,49 @@ void *mempool_iter_threadsafe_step(BLI_mempool_threadsafe_iter *ts_iter)
        /* pass. */
      }
      if (UNLIKELY(iter->curchunk == NULL)) {
-        return (ret->freeword == FREEWORD) ? NULL : ret;
+        if (ret->freeword == FREEWORD) {
+          BLI_asan_poison(ret, esize);
+          mempool_asan_unlock(iter->pool);
+          return NULL;
+        }
+        else {
+          mempool_asan_unlock(iter->pool);
+          return ret;
+        }
      }
      /* End `threadsafe` exception. */

      iter->curchunk = iter->curchunk->next;
      if (UNLIKELY(iter->curchunk == NULL)) {
-        return (ret->freeword == FREEWORD) ? NULL : ret;
+        if (ret->freeword == FREEWORD) {
+          BLI_asan_poison(ret, iter->pool->esize);
+          mempool_asan_unlock(iter->pool);
+          return NULL;
+        }
+        else {
+          mempool_asan_unlock(iter->pool);
+          return ret;
+        }
      }
+
      curnode = CHUNK_DATA(iter->curchunk);
    }
-  } while (ret->freeword == FREEWORD);

+    if (ret->freeword == FREEWORD) {
+      BLI_asan_poison(ret, iter->pool->esize);
+    }
+    else {
+      break;
+    }
+  } while (true);
+
+  mempool_asan_unlock(iter->pool);
  return ret;
 }

 #endif

-void BLI_mempool_clear_ex(BLI_mempool *pool, const int totelem_reserve)
+void BLI_mempool_clear_ex(BLI_mempool *pool, const int elem_num_reserve)
 {
  BLI_mempool_chunk *mpchunk;
  BLI_mempool_chunk *mpchunk_next;
@ -661,11 +794,11 @@ void BLI_mempool_clear_ex(BLI_mempool *pool, const int totelem_reserve)
  VALGRIND_CREATE_MEMPOOL(pool, 0, false);
 #endif

-  if (totelem_reserve == -1) {
+  if (elem_num_reserve == -1) {
    maxchunks = pool->maxchunks;
  }
  else {
-    maxchunks = mempool_maxchunks((uint)totelem_reserve, pool->pchunk);
+    maxchunks = mempool_maxchunks((uint)elem_num_reserve, pool->pchunk);
  }

  /* Free all after 'pool->maxchunks'. */
@ -678,7 +811,7 @@ void BLI_mempool_clear_ex(BLI_mempool *pool, const int totelem_reserve)

    do {
      mpchunk_next = mpchunk->next;
-      mempool_chunk_free(mpchunk);
+      mempool_chunk_free(mpchunk, pool);
    } while ((mpchunk = mpchunk_next));
  }

@ -706,7 +839,7 @@ void BLI_mempool_clear(BLI_mempool *pool)

 void BLI_mempool_destroy(BLI_mempool *pool)
 {
-  mempool_chunk_free_all(pool->chunks);
+  mempool_chunk_free_all(pool->chunks, pool);

 #ifdef WITH_MEM_VALGRIND
  VALGRIND_DESTROY_MEMPOOL(pool);
--- a/source/blender/blenloader/intern/versioning_300.cc
+++ b/source/blender/blenloader/intern/versioning_300.cc
@ -1349,6 +1349,18 @@ void do_versions_after_linking_300(FileData * /*fd*/, Main *bmain)
    FOREACH_NODETREE_END;
  }

+  if (!MAIN_VERSION_ATLEAST(bmain, 306, 6)) {
+    LISTBASE_FOREACH (Scene *, scene, &bmain->scenes) {
+      Editing *ed = SEQ_editing_get(scene);
+      if (ed == nullptr) {
+        continue;
+      }
+
+      SEQ_for_each_callback(
+          &scene->ed->seqbase, do_versions_sequencer_init_retiming_tool_data, scene);
+    }
+  }
+
  /**
   * Versioning code until next subversion bump goes here.
   *
@ -1361,16 +1373,6 @@ void do_versions_after_linking_300(FileData * /*fd*/, Main *bmain)
   */
  {
    /* Keep this block, even when empty. */
-
-    LISTBASE_FOREACH (Scene *, scene, &bmain->scenes) {
-      Editing *ed = SEQ_editing_get(scene);
-      if (ed == nullptr) {
-        continue;
-      }
-
-      SEQ_for_each_callback(
-          &scene->ed->seqbase, do_versions_sequencer_init_retiming_tool_data, scene);
-    }
  }
 }

--- a/source/blender/blenloader/intern/versioning_userdef.c
+++ b/source/blender/blenloader/intern/versioning_userdef.c
@ -813,6 +813,12 @@ void blo_do_versions_userdef(UserDef *userdef)
    }
  }

+  if (!USER_VERSION_ATLEAST(306, 6)) {
+    LISTBASE_FOREACH (bUserAssetLibrary *, asset_library, &userdef->asset_libraries) {
+      asset_library->flag |= ASSET_LIBRARY_RELATIVE_PATH;
+    }
+  }
+
  /**
   * Versioning code until next subversion bump goes here.
   *
--- a/source/blender/editors/asset/ED_asset_handle.h
+++ b/source/blender/editors/asset/ED_asset_handle.h
@ -32,6 +32,7 @@ void ED_asset_handle_get_full_library_path(
    /* `1090` for #FILE_MAX_LIBEXTRA,
     * rely on warnings to let us know if this gets out of sync. */
    char r_full_lib_path[1090]);
+bool ED_asset_handle_get_use_relative_path(const struct AssetHandle *asset);

 #ifdef __cplusplus
 }
--- a/source/blender/editors/asset/intern/asset_handle.cc
+++ b/source/blender/editors/asset/intern/asset_handle.cc
@ -68,3 +68,8 @@ void ED_asset_handle_get_full_library_path(const AssetHandle *asset_handle,

  BLI_strncpy(r_full_lib_path, library_path.c_str(), FILE_MAX);
 }
+
+bool ED_asset_handle_get_use_relative_path(const AssetHandle *asset)
+{
+  return AS_asset_representation_use_relative_path_get(asset->file_data->asset);
+}
--- a/source/blender/editors/curve/curve_ops.c
+++ b/source/blender/editors/curve/curve_ops.c
@ -40,7 +40,7 @@ void ED_operatortypes_curve(void)
  WM_operatortype_append(FONT_OT_text_paste);
  WM_operatortype_append(FONT_OT_text_paste_from_file);

-	WM_operatortype_append(FONT_OT_selection_set);
+  WM_operatortype_append(FONT_OT_selection_set);
  WM_operatortype_append(FONT_OT_select_word);

  WM_operatortype_append(FONT_OT_move);
--- a/source/blender/editors/curve/editfont.c
+++ b/source/blender/editors/curve/editfont.c
@ -1917,7 +1917,7 @@ void FONT_OT_selection_set(struct wmOperatorType *ot)

 /* -------------------------------------------------------------------- */
 /** \name Select Word Operator
-* \{ */
+ * \{ */

 static int font_select_word_exec(bContext *C, wmOperator *UNUSED(op))
 {
--- a/source/blender/editors/io/io_usd.c
+++ b/source/blender/editors/io/io_usd.c
@ -619,14 +619,13 @@ void WM_OT_usd_import(struct wmOperatorType *ot)
  RNA_def_boolean(
      ot->srna, "read_mesh_colors", true, "Color Attributes", "Read mesh color attributes");

-  RNA_def_string(
-      ot->srna,
-      "prim_path_mask",
-      NULL,
-      0,
-      "Path Mask",
-      "Import only the primitive at the given path and its descendents.  "
-      "Multiple paths may be specified in a list delimited by commas or semicolons");
+  RNA_def_string(ot->srna,
+                 "prim_path_mask",
+                 NULL,
+                 0,
+                 "Path Mask",
+                 "Import only the primitive at the given path and its descendents.  "
+                 "Multiple paths may be specified in a list delimited by commas or semicolons");

  RNA_def_boolean(ot->srna, "import_guide", false, "Guide", "Import guide geometry");

--- a/source/blender/editors/space_view3d/view3d_navigate.h
+++ b/source/blender/editors/space_view3d/view3d_navigate.h
@ -49,6 +49,7 @@ enum {

 /* NOTE: these defines are saved in keymap files, do not change values but just add new ones */
 enum {
+  VIEW_MODAL_CANCEL = 0,  /* used for all view operations */
  VIEW_MODAL_CONFIRM = 1, /* used for all view operations */
  VIEWROT_MODAL_AXIS_SNAP_ENABLE = 2,
  VIEWROT_MODAL_AXIS_SNAP_DISABLE = 3,
--- a/source/blender/editors/space_view3d/view3d_navigate_dolly.c
+++ b/source/blender/editors/space_view3d/view3d_navigate_dolly.c
@ -31,6 +31,7 @@
 void viewdolly_modal_keymap(wmKeyConfig *keyconf)
 {
  static const EnumPropertyItem modal_items[] = {
+      {VIEW_MODAL_CANCEL, "CANCEL", 0, "Cancel", ""},
      {VIEW_MODAL_CONFIRM, "CONFIRM", 0, "Confirm", ""},

      {VIEWROT_MODAL_SWITCH_ROTATE, "SWITCH_TO_ROTATE", 0, "Switch to Rotate"},
@ -48,34 +49,6 @@ void viewdolly_modal_keymap(wmKeyConfig *keyconf)

  keymap = WM_modalkeymap_ensure(keyconf, "View3D Dolly Modal", modal_items);

-  /* disabled mode switching for now, can re-implement better, later on */
-#if 0
-  WM_modalkeymap_add_item(keymap,
-                          &(const KeyMapItem_Params){
-                              .type = LEFTMOUSE,
-                              .value = KM_RELEASE,
-                              .modifier = KM_ANY,
-                              .direction = KM_ANY,
-                          },
-                          VIEWROT_MODAL_SWITCH_ROTATE);
-  WM_modalkeymap_add_item(keymap,
-                          &(const KeyMapItem_Params){
-                              .type = EVT_LEFTCTRLKEY,
-                              .value = KM_RELEASE,
-                              .modifier = KM_ANY,
-                              .direction = KM_ANY,
-                          },
-                          VIEWROT_MODAL_SWITCH_ROTATE);
-  WM_modalkeymap_add_item(keymap,
-                          &(const KeyMapItem_Params){
-                              .type = EVT_LEFTSHIFTKEY,
-                              .value = KM_PRESS,
-                              .modifier = KM_ANY,
-                              .direction = KM_ANY,
-                          },
-                          VIEWROT_MODAL_SWITCH_MOVE);
-#endif
-
  /* assign map to operators */
  WM_modalkeymap_assign(keymap, "VIEW3D_OT_dolly");
 }
@ -167,7 +140,7 @@ static int viewdolly_modal(bContext *C, wmOperator *op, const wmEvent *event)
        event_code = VIEW_CONFIRM;
      }
    }
-    else if (ELEM(event->type, EVT_ESCKEY, RIGHTMOUSE)) {
+    else if (event->type == EVT_ESCKEY) {
      if (event->val == KM_PRESS) {
        event_code = VIEW_CANCEL;
      }
--- a/source/blender/editors/space_view3d/view3d_navigate_move.c
+++ b/source/blender/editors/space_view3d/view3d_navigate_move.c
@ -27,6 +27,7 @@
 void viewmove_modal_keymap(wmKeyConfig *keyconf)
 {
  static const EnumPropertyItem modal_items[] = {
+      {VIEW_MODAL_CANCEL, "CANCEL", 0, "Cancel", ""},
      {VIEW_MODAL_CONFIRM, "CONFIRM", 0, "Confirm", ""},

      {VIEWROT_MODAL_SWITCH_ZOOM, "SWITCH_TO_ZOOM", 0, "Switch to Zoom"},
@ -44,53 +45,6 @@ void viewmove_modal_keymap(wmKeyConfig *keyconf)

  keymap = WM_modalkeymap_ensure(keyconf, "View3D Move Modal", modal_items);

-  /* items for modal map */
-
-  WM_modalkeymap_add_item(keymap,
-                          &(const KeyMapItem_Params){
-                              .type = MIDDLEMOUSE,
-                              .value = KM_RELEASE,
-                              .modifier = KM_ANY,
-                              .direction = KM_ANY,
-                          },
-                          VIEW_MODAL_CONFIRM);
-  WM_modalkeymap_add_item(keymap,
-                          &(const KeyMapItem_Params){
-                              .type = EVT_ESCKEY,
-                              .value = KM_PRESS,
-                              .modifier = KM_ANY,
-                              .direction = KM_ANY,
-                          },
-                          VIEW_MODAL_CONFIRM);
-
-  /* disabled mode switching for now, can re-implement better, later on */
-#if 0
-  WM_modalkeymap_add_item(keymap,
-                          &(const KeyMapItem_Params){
-                              .type = LEFTMOUSE,
-                              .value = KM_PRESS,
-                              .modifier = KM_ANY,
-                              .direction = KM_ANY,
-                          },
-                          VIEWROT_MODAL_SWITCH_ZOOM);
-  WM_modalkeymap_add_item(keymap,
-                          &(const KeyMapItem_Params){
-                              .type = EVT_LEFTCTRLKEY,
-                              .value = KM_PRESS,
-                              .modifier = KM_ANY,
-                              .direction = KM_ANY,
-                          },
-                          VIEWROT_MODAL_SWITCH_ZOOM);
-  WM_modalkeymap_add_item(keymap,
-                          &(const KeyMapItem_Params){
-                              .type = EVT_LEFTSHIFTKEY,
-                              .value = KM_RELEASE,
-                              .modifier = KM_ANY,
-                              .direction = KM_ANY,
-                          },
-                          VIEWROT_MODAL_SWITCH_ROTATE);
-#endif
-
  /* assign map to operators */
  WM_modalkeymap_assign(keymap, "VIEW3D_OT_move");
 }
@ -109,6 +63,9 @@ static int viewmove_modal(bContext *C, wmOperator *op, const wmEvent *event)
      case VIEW_MODAL_CONFIRM:
        event_code = VIEW_CONFIRM;
        break;
+      case VIEW_MODAL_CANCEL:
+        event_code = VIEW_CANCEL;
+        break;
      case VIEWROT_MODAL_SWITCH_ZOOM:
        WM_operator_name_call(C, "VIEW3D_OT_zoom", WM_OP_INVOKE_DEFAULT, NULL, event);
        event_code = VIEW_CONFIRM;
@ -128,7 +85,7 @@ static int viewmove_modal(bContext *C, wmOperator *op, const wmEvent *event)
        event_code = VIEW_CONFIRM;
      }
    }
-    else if (ELEM(event->type, EVT_ESCKEY, RIGHTMOUSE)) {
+    else if (event->type == EVT_ESCKEY) {
      if (event->val == KM_PRESS) {
        event_code = VIEW_CANCEL;
      }
--- a/source/blender/editors/space_view3d/view3d_navigate_roll.c
+++ b/source/blender/editors/space_view3d/view3d_navigate_roll.c
@ -93,6 +93,9 @@ static int viewroll_modal(bContext *C, wmOperator *op, const wmEvent *event)
      case VIEW_MODAL_CONFIRM:
        event_code = VIEW_CONFIRM;
        break;
+      case VIEW_MODAL_CANCEL:
+        event_code = VIEW_CANCEL;
+        break;
      case VIEWROT_MODAL_SWITCH_MOVE:
        WM_operator_name_call(C, "VIEW3D_OT_move", WM_OP_INVOKE_DEFAULT, NULL, event);
        event_code = VIEW_CONFIRM;
@ -114,7 +117,7 @@ static int viewroll_modal(bContext *C, wmOperator *op, const wmEvent *event)
        event_code = VIEW_CONFIRM;
      }
    }
-    else if (ELEM(event->type, EVT_ESCKEY, RIGHTMOUSE)) {
+    else if (event->type == EVT_ESCKEY) {
      if (event->val == KM_PRESS) {
        event_code = VIEW_CANCEL;
      }
--- a/source/blender/editors/space_view3d/view3d_navigate_rotate.c
+++ b/source/blender/editors/space_view3d/view3d_navigate_rotate.c
@ -24,6 +24,7 @@
 void viewrotate_modal_keymap(wmKeyConfig *keyconf)
 {
  static const EnumPropertyItem modal_items[] = {
+      {VIEW_MODAL_CANCEL, "CANCEL", 0, "Cancel", ""},
      {VIEW_MODAL_CONFIRM, "CONFIRM", 0, "Confirm", ""},

      {VIEWROT_MODAL_AXIS_SNAP_ENABLE, "AXIS_SNAP_ENABLE", 0, "Axis Snap", ""},
@ -44,34 +45,6 @@ void viewrotate_modal_keymap(wmKeyConfig *keyconf)

  keymap = WM_modalkeymap_ensure(keyconf, "View3D Rotate Modal", modal_items);

-  /* disabled mode switching for now, can re-implement better, later on */
-#if 0
-  WM_modalkeymap_add_item(keymap,
-                          &(const KeyMapItem_Params){
-                              .type = LEFTMOUSE,
-                              .value = KM_PRESS,
-                              .modifier = KM_ANY,
-                              .direction = KM_ANY,
-                          },
-                          VIEWROT_MODAL_SWITCH_ZOOM);
-  WM_modalkeymap_add_item(keymap,
-                          &(const KeyMapItem_Params){
-                              .type = EVT_LEFTCTRLKEY,
-                              .value = KM_PRESS,
-                              .modifier = KM_ANY,
-                              .direction = KM_ANY,
-                          },
-                          VIEWROT_MODAL_SWITCH_ZOOM);
-  WM_modalkeymap_add_item(keymap,
-                          &(const KeyMapItem_Params){
-                              .type = EVT_LEFTSHIFTKEY,
-                              .value = KM_PRESS,
-                              .modifier = KM_ANY,
-                              .direction = KM_ANY,
-                          },
-                          VIEWROT_MODAL_SWITCH_MOVE);
-#endif
-
  /* assign map to operators */
  WM_modalkeymap_assign(keymap, "VIEW3D_OT_rotate");
 }
@ -333,6 +306,9 @@ static int viewrotate_modal(bContext *C, wmOperator *op, const wmEvent *event)
      case VIEW_MODAL_CONFIRM:
        event_code = VIEW_CONFIRM;
        break;
+      case VIEW_MODAL_CANCEL:
+        event_code = VIEW_CANCEL;
+        break;
      case VIEWROT_MODAL_AXIS_SNAP_ENABLE:
        vod->axis_snap = true;
        event_code = VIEW_APPLY;
@ -361,7 +337,7 @@ static int viewrotate_modal(bContext *C, wmOperator *op, const wmEvent *event)
        event_code = VIEW_CONFIRM;
      }
    }
-    else if (ELEM(event->type, EVT_ESCKEY, RIGHTMOUSE)) {
+    else if (event->type == EVT_ESCKEY) {
      if (event->val == KM_PRESS) {
        event_code = VIEW_CANCEL;
      }
--- a/source/blender/editors/space_view3d/view3d_navigate_zoom.c
+++ b/source/blender/editors/space_view3d/view3d_navigate_zoom.c
@ -31,6 +31,7 @@
 void viewzoom_modal_keymap(wmKeyConfig *keyconf)
 {
  static const EnumPropertyItem modal_items[] = {
+      {VIEW_MODAL_CANCEL, "CANCEL", 0, "Cancel", ""},
      {VIEW_MODAL_CONFIRM, "CONFIRM", 0, "Confirm", ""},

      {VIEWROT_MODAL_SWITCH_ROTATE, "SWITCH_TO_ROTATE", 0, "Switch to Rotate"},
@ -48,34 +49,6 @@ void viewzoom_modal_keymap(wmKeyConfig *keyconf)

  keymap = WM_modalkeymap_ensure(keyconf, "View3D Zoom Modal", modal_items);

-  /* disabled mode switching for now, can re-implement better, later on */
-#if 0
-  WM_modalkeymap_add_item(keymap,
-                          &(const KeyMapItem_Params){
-                              .type = LEFTMOUSE,
-                              .value = KM_RELEASE,
-                              .modifier = KM_ANY,
-                              .direction = KM_ANY,
-                          },
-                          VIEWROT_MODAL_SWITCH_ROTATE);
-  WM_modalkeymap_add_item(keymap,
-                          &(const KeyMapItem_Params){
-                              .type = EVT_LEFTCTRLKEY,
-                              .value = KM_RELEASE,
-                              .modifier = KM_ANY,
-                              .direction = KM_ANY,
-                          },
-                          VIEWROT_MODAL_SWITCH_ROTATE);
-  WM_modalkeymap_add_item(keymap,
-                          &(const KeyMapItem_Params){
-                              .type = EVT_LEFTSHIFTKEY,
-                              .value = KM_PRESS,
-                              .modifier = KM_ANY,
-                              .direction = KM_ANY,
-                          },
-                          VIEWROT_MODAL_SWITCH_MOVE);
-#endif
-
  /* assign map to operators */
  WM_modalkeymap_assign(keymap, "VIEW3D_OT_zoom");
 }
@ -383,6 +356,9 @@ static int viewzoom_modal(bContext *C, wmOperator *op, const wmEvent *event)
      case VIEW_MODAL_CONFIRM:
        event_code = VIEW_CONFIRM;
        break;
+      case VIEW_MODAL_CANCEL:
+        event_code = VIEW_CANCEL;
+        break;
      case VIEWROT_MODAL_SWITCH_MOVE:
        WM_operator_name_call(C, "VIEW3D_OT_move", WM_OP_INVOKE_DEFAULT, NULL, event);
        event_code = VIEW_CONFIRM;
@ -408,7 +384,7 @@ static int viewzoom_modal(bContext *C, wmOperator *op, const wmEvent *event)
        event_code = VIEW_CONFIRM;
      }
    }
-    else if (ELEM(event->type, EVT_ESCKEY, RIGHTMOUSE)) {
+    else if (event->type == EVT_ESCKEY) {
      if (event->val == KM_PRESS) {
        event_code = VIEW_CANCEL;
      }
--- a/source/blender/geometry/GEO_mesh_to_volume.hh
+++ b/source/blender/geometry/GEO_mesh_to_volume.hh
@ -47,8 +47,6 @@ VolumeGrid *fog_volume_grid_add_from_mesh(Volume *volume,
                                          const Mesh *mesh,
                                          const float4x4 &mesh_to_volume_space_transform,
                                          float voxel_size,
-                                          bool fill_volume,
-                                          float exterior_band_width,
                                          float interior_band_width,
                                          float density);
 /**
--- a/source/blender/geometry/intern/mesh_to_volume.cc
+++ b/source/blender/geometry/intern/mesh_to_volume.cc
@ -10,8 +10,10 @@
 #include "GEO_mesh_to_volume.hh"

 #ifdef WITH_OPENVDB
+#  include <algorithm>
 #  include <openvdb/openvdb.h>
 #  include <openvdb/tools/GridTransformer.h>
+#  include <openvdb/tools/LevelSetUtil.h>
 #  include <openvdb/tools/VolumeToMesh.h>

 namespace blender::geometry {
@ -88,26 +90,28 @@ float volume_compute_voxel_size(const Depsgraph *depsgraph,
  float3 bb_max;
  bounds_fn(bb_min, bb_max);

-  /* Compute the voxel size based on the desired number of voxels and the approximated bounding
-   * box of the volume. */
+  /* Compute the diagonal of the bounding box. This is used because
+   * it will always be bigger than the widest side of the mesh. */
  const float diagonal = math::distance(math::transform_point(transform, bb_max),
                                        math::transform_point(transform, bb_min));
-  const float approximate_volume_side_length = diagonal + exterior_band_width * 2.0f;
-  const float voxel_size = approximate_volume_side_length / res.settings.voxel_amount /
-                           volume_simplify;
-  return voxel_size;
+
+  /* To get the approximate size per voxel, first subtract the exterior band from the requested
+   * voxel amount, then divide the diagonal with this value if it's bigger than 1. */
+  const float voxel_size =
+      (diagonal / std::max(1.0f, float(res.settings.voxel_amount) - 2.0f * exterior_band_width));
+
+  /* Return the simplified voxel size. */
+  return voxel_size / volume_simplify;
 }

 static openvdb::FloatGrid::Ptr mesh_to_fog_volume_grid(
    const Mesh *mesh,
    const float4x4 &mesh_to_volume_space_transform,
    const float voxel_size,
-    const bool fill_volume,
-    const float exterior_band_width,
    const float interior_band_width,
    const float density)
 {
-  if (voxel_size == 0.0f) {
+  if (voxel_size < 1e-5f || interior_band_width <= 0.0f) {
    return nullptr;
  }

@ -117,22 +121,21 @@ static openvdb::FloatGrid::Ptr mesh_to_fog_volume_grid(
  mesh_to_index_space_transform.location() -= 0.5f;

  OpenVDBMeshAdapter mesh_adapter{*mesh, mesh_to_index_space_transform};
+  const float interior = std::max(1.0f, interior_band_width / voxel_size);

-  /* Convert the bandwidths from object in index space. */
-  const float exterior = MAX2(0.001f, exterior_band_width / voxel_size);
-  const float interior = MAX2(0.001f, interior_band_width / voxel_size);
-
-  /* Setting the interior bandwidth to FLT_MAX, will make it fill the entire volume. */
+  openvdb::math::Transform::Ptr transform = openvdb::math::Transform::createLinearTransform(
+      voxel_size);
  openvdb::FloatGrid::Ptr new_grid = openvdb::tools::meshToVolume<openvdb::FloatGrid>(
-      mesh_adapter, {}, exterior, fill_volume ? FLT_MAX : interior);
+      mesh_adapter, *transform, 1.0f, interior);

-  /* Give each grid cell a fixed density for now. */
-  openvdb::tools::foreach (
-      new_grid->beginValueOn(),
-      [density](const openvdb::FloatGrid::ValueOnIter &iter) { iter.setValue(density); });
-
-  new_grid->setGridClass(openvdb::GRID_FOG_VOLUME);
+  openvdb::tools::sdfToFogVolume(*new_grid);

+  if (density != 1.0f) {
+    openvdb::tools::foreach (new_grid->beginValueOn(),
+                             [&](const openvdb::FloatGrid::ValueOnIter &iter) {
+                               iter.modifyValue([&](float &value) { value *= density; });
+                             });
+  }
  return new_grid;
 }

@ -180,31 +183,12 @@ VolumeGrid *fog_volume_grid_add_from_mesh(Volume *volume,
                                          const Mesh *mesh,
                                          const float4x4 &mesh_to_volume_space_transform,
                                          const float voxel_size,
-                                          const bool fill_volume,
-                                          const float exterior_band_width,
                                          const float interior_band_width,
                                          const float density)
 {
-  VolumeGrid *c_grid = BKE_volume_grid_add(volume, name.c_str(), VOLUME_GRID_FLOAT);
-  openvdb::FloatGrid::Ptr grid = openvdb::gridPtrCast<openvdb::FloatGrid>(
-      BKE_volume_grid_openvdb_for_write(volume, c_grid, false));
-
-  /* Generate grid from mesh */
-  openvdb::FloatGrid::Ptr mesh_grid = mesh_to_fog_volume_grid(mesh,
-                                                              mesh_to_volume_space_transform,
-                                                              voxel_size,
-                                                              fill_volume,
-                                                              exterior_band_width,
-                                                              interior_band_width,
-                                                              density);
-
-  if (mesh_grid != nullptr) {
-    /* Merge the generated grid. Should be cheap because grid has just been created. */
-    grid->merge(*mesh_grid);
-    /* Change transform so that the index space is correctly transformed to object space. */
-    grid->transform().postScale(voxel_size);
-  }
-  return c_grid;
+  openvdb::FloatGrid::Ptr mesh_grid = mesh_to_fog_volume_grid(
+      mesh, mesh_to_volume_space_transform, voxel_size, interior_band_width, density);
+  return mesh_grid ? BKE_volume_grid_add_vdb(*volume, name, std::move(mesh_grid)) : nullptr;
 }

 VolumeGrid *sdf_volume_grid_add_from_mesh(Volume *volume,
--- a/source/blender/geometry/intern/uv_pack.cc
+++ b/source/blender/geometry/intern/uv_pack.cc
@ -313,7 +313,6 @@ UVPackIsland_Params::UVPackIsland_Params()
 /* Compact representation for AABB packers. */
 class UVAABBIsland {
 public:
-  uv_phi phi;
  float2 uv_diagonal;
  int64_t index;
  float aspect_y;
@ -330,8 +329,10 @@ class UVAABBIsland {
 * Technically, the algorithm here is only `O(n)`, In practice, to get reasonable results,
 * the input must be pre-sorted, which costs an additional `O(nlogn)` time complexity.
 */
-static void pack_islands_alpaca_turbo(const Span<UVAABBIsland *> islands,
+static void pack_islands_alpaca_turbo(const int64_t start_index,
+                                      const Span<UVAABBIsland *> islands,
                                      const float target_aspect_y,
+                                      MutableSpan<uv_phi> r_phis,
                                      float *r_max_u,
                                      float *r_max_v)
 {
@ -344,7 +345,8 @@ static void pack_islands_alpaca_turbo(const Span<UVAABBIsland *> islands,
  float v0 = zigzag ? 0.0f : next_v1;

  /* Visit every island in order. */
-  for (UVAABBIsland *island : islands) {
+  for (int64_t index = start_index; index < islands.size(); index++) {
+    UVAABBIsland *island = islands[index];
    const float dsm_u = island->uv_diagonal.x;
    const float dsm_v = island->uv_diagonal.y;

@ -363,8 +365,10 @@ static void pack_islands_alpaca_turbo(const Span<UVAABBIsland *> islands,
    }

    /* Place the island. */
-    island->phi.translation.x = u0 + dsm_u * 0.5f;
-    island->phi.translation.y = v0 + dsm_v * 0.5f;
+    uv_phi &phi = r_phis[island->index];
+    phi.rotation = 0.0f;
+    phi.translation.x = u0 + dsm_u * 0.5f;
+    phi.translation.y = v0 + dsm_v * 0.5f;
    if (zigzag) {
      /* Move upwards. */
      v0 += dsm_v;
@ -437,8 +441,10 @@ static void update_hole_rotate(float2 &hole,
 * Also adds the concept of a "Hole", which is unused space that can be filled.
 * Tracking the "Hole" has a slight performance cost, while improving packing efficiency.
 */
-static void pack_islands_alpaca_rotate(const Span<UVAABBIsland *> islands,
+static void pack_islands_alpaca_rotate(const int64_t start_index,
+                                       const Span<UVAABBIsland *> islands,
                                       const float target_aspect_y,
+                                       MutableSpan<uv_phi> r_phis,
                                       float *r_max_u,
                                       float *r_max_v)
 {
@ -456,7 +462,9 @@ static void pack_islands_alpaca_rotate(const Span<UVAABBIsland *> islands,
  float v0 = zigzag ? 0.0f : next_v1;

  /* Visit every island in order. */
-  for (UVAABBIsland *island : islands) {
+  for (int64_t index = start_index; index < islands.size(); index++) {
+    UVAABBIsland *island = islands[index];
+    uv_phi &phi = r_phis[island->index];
    const float uvdiag_x = island->uv_diagonal.x * island->aspect_y;
    float min_dsm = std::min(uvdiag_x, island->uv_diagonal.y);
    float max_dsm = std::max(uvdiag_x, island->uv_diagonal.y);
@ -464,14 +472,14 @@ static void pack_islands_alpaca_rotate(const Span<UVAABBIsland *> islands,
    if (min_dsm < hole_diagonal.x && max_dsm < hole_diagonal.y) {
      /* Place island in the hole. */
      if (hole_rotate == (min_dsm == island->uv_diagonal.x)) {
-        island->phi.rotation = DEG2RADF(90.0f);
-        island->phi.translation.x = hole[0] + island->uv_diagonal.y * 0.5f / island->aspect_y;
-        island->phi.translation.y = hole[1] + island->uv_diagonal.x * 0.5f * island->aspect_y;
+        phi.rotation = DEG2RADF(90.0f);
+        phi.translation.x = hole[0] + island->uv_diagonal.y * 0.5f / island->aspect_y;
+        phi.translation.y = hole[1] + island->uv_diagonal.x * 0.5f * island->aspect_y;
      }
      else {
-        island->phi.rotation = 0.0f;
-        island->phi.translation.x = hole[0] + island->uv_diagonal.x * 0.5f;
-        island->phi.translation.y = hole[1] + island->uv_diagonal.y * 0.5f;
+        phi.rotation = 0.0f;
+        phi.translation.x = hole[0] + island->uv_diagonal.x * 0.5f;
+        phi.translation.y = hole[1] + island->uv_diagonal.y * 0.5f;
      }

      /* Update space left in the hole. */
@ -507,14 +515,14 @@ static void pack_islands_alpaca_rotate(const Span<UVAABBIsland *> islands,

    /* Place the island. */
    if (zigzag == (min_dsm == uvdiag_x)) {
-      island->phi.rotation = DEG2RADF(90.0f);
-      island->phi.translation.x = u0 + island->uv_diagonal.y * 0.5f / island->aspect_y;
-      island->phi.translation.y = v0 + island->uv_diagonal.x * 0.5f * island->aspect_y;
+      phi.rotation = DEG2RADF(90.0f);
+      phi.translation.x = u0 + island->uv_diagonal.y * 0.5f / island->aspect_y;
+      phi.translation.y = v0 + island->uv_diagonal.x * 0.5f * island->aspect_y;
    }
    else {
-      island->phi.rotation = 0.0f;
-      island->phi.translation.x = u0 + island->uv_diagonal.x * 0.5f;
-      island->phi.translation.y = v0 + island->uv_diagonal.y * 0.5f;
+      phi.rotation = 0.0f;
+      phi.translation.x = u0 + island->uv_diagonal.x * 0.5f;
+      phi.translation.y = v0 + island->uv_diagonal.y * 0.5f;
    }

    /* Move according to the "Alpaca rules", with rotation. */
@ -543,6 +551,7 @@ static void pack_island_box_pack_2d(const Span<UVAABBIsland *> aabbs,
                                    const float scale,
                                    const float margin,
                                    const float target_aspect_y,
+                                    MutableSpan<uv_phi> r_phis,
                                    float *r_max_u,
                                    float *r_max_v)
 {
@ -567,13 +576,11 @@ static void pack_island_box_pack_2d(const Span<UVAABBIsland *> aabbs,

  /* Write back box_pack UVs. */
  for (const int64_t i : aabbs.index_range()) {
-    PackIsland *island = islands[aabbs[i]->index];
    BoxPack *box = box_array + i;
-    uv_phi phi;
+    uv_phi &phi = *(uv_phi *)&r_phis[aabbs[i]->index];
    phi.rotation = 0.0f; /* #BLI_box_pack_2d never rotates. */
    phi.translation.x = (box->x + box->w * 0.5f) * target_aspect_y;
    phi.translation.y = (box->y + box->h * 0.5f);
-    island->place_(scale, phi);
  }

  /* Housekeeping. */
@ -868,6 +875,7 @@ static void pack_island_xatlas(const Span<UVAABBIsland *> island_indices,
                               const float scale,
                               const float margin,
                               const UVPackIsland_Params &params,
+                               MutableSpan<uv_phi> r_phis,
                               float *r_max_u,
                               float *r_max_v)
 {
@ -875,7 +883,6 @@ static void pack_island_xatlas(const Span<UVAABBIsland *> island_indices,
  float max_u = 0.0f;
  float max_v = 0.0f;

-  blender::Array<uv_phi> phis(island_indices.size());
  int scan_line = 0;
  int i = 0;

@ -925,14 +932,13 @@ static void pack_island_xatlas(const Span<UVAABBIsland *> island_indices,

      /* Redraw already placed islands. (Greedy.) */
      for (int j = 0; j < i; j++) {
-        occupancy.trace_island(islands[island_indices[j]->index], phis[j], scale, margin, true);
+        occupancy.trace_island(islands[island_indices[j]->index], r_phis[j], scale, margin, true);
      }
      continue;
    }

    /* Place island. */
-    phis[i] = phi;
-    island->place_(scale, phi);
+    r_phis[island_indices[i]->index] = phi;
    occupancy.trace_island(island, phi, scale, margin, true);
    i++; /* Next island. */

@ -980,9 +986,10 @@ static float pack_islands_scale_margin(const Span<PackIsland *> islands,
   * - Sort islands in size order.
   * - Call #BLI_box_pack_2d on the first `alpaca_cutoff` islands.
   * - Call #pack_islands_alpaca_* on the remaining islands.
-   * - Combine results.
   */

+  blender::Array<uv_phi> phis(islands.size());
+
  /* First, copy information from our input into the AABB structure. */
  Array<UVAABBIsland *> aabbs(islands.size());
  for (const int64_t i : islands.index_range()) {
@ -1049,6 +1056,7 @@ static float pack_islands_scale_margin(const Span<PackIsland *> islands,
                         scale,
                         margin,
                         params,
+                         phis.as_mutable_span(),
                         &max_u,
                         &max_v);
      break;
@ -1058,6 +1066,7 @@ static float pack_islands_scale_margin(const Span<PackIsland *> islands,
                              scale,
                              margin,
                              params.target_aspect_y,
+                              phis.as_mutable_span(),
                              &max_u,
                              &max_v);
      break;
@ -1067,25 +1076,25 @@ static float pack_islands_scale_margin(const Span<PackIsland *> islands,

  /* Call Alpaca. */
  if (params.rotate) {
-    pack_islands_alpaca_rotate(
-        aabbs.as_mutable_span().drop_front(max_box_pack), params.target_aspect_y, &max_u, &max_v);
+    pack_islands_alpaca_rotate(max_box_pack,
+                               aabbs.as_mutable_span(),
+                               params.target_aspect_y,
+                               phis.as_mutable_span(),
+                               &max_u,
+                               &max_v);
  }
  else {
-    pack_islands_alpaca_turbo(
-        aabbs.as_mutable_span().drop_front(max_box_pack), params.target_aspect_y, &max_u, &max_v);
+    pack_islands_alpaca_turbo(max_box_pack,
+                              aabbs.as_mutable_span(),
+                              params.target_aspect_y,
+                              phis.as_mutable_span(),
+                              &max_u,
+                              &max_v);
  }

-  /* Write back Alpaca UVs. */
-  for (int64_t i = max_box_pack; i < aabbs.size(); i++) {
-    UVAABBIsland *aabb = aabbs[i];
-    islands[aabb->index]->place_(scale, aabb->phi);
-  }
-
-  /* Memory management. */
-  for (int64_t i : aabbs.index_range()) {
-    UVAABBIsland *aabb = aabbs[i];
-    aabbs[i] = nullptr;
-    delete aabb;
+  /* Write back UVs. */
+  for (int64_t i = 0; i < aabbs.size(); i++) {
+    islands[i]->place_(scale, phis[i]);
  }

  return std::max(max_u / params.target_aspect_y, max_v);
--- a/source/blender/gpu/vulkan/vk_command_buffer.cc
+++ b/source/blender/gpu/vulkan/vk_command_buffer.cc
@ -9,6 +9,7 @@
 #include "vk_buffer.hh"
 #include "vk_context.hh"
 #include "vk_framebuffer.hh"
+#include "vk_index_buffer.hh"
 #include "vk_memory.hh"
 #include "vk_pipeline.hh"
 #include "vk_texture.hh"
@ -103,6 +104,14 @@ void VKCommandBuffer::bind(const uint32_t binding,
  vkCmdBindVertexBuffers(vk_command_buffer_, binding, 1, &vk_vertex_buffer, &offset);
 }

+void VKCommandBuffer::bind(const VKIndexBuffer &index_buffer, VkIndexType index_type)
+{
+  validate_framebuffer_exists();
+  ensure_active_framebuffer();
+  VkBuffer vk_buffer = index_buffer.vk_handle();
+  vkCmdBindIndexBuffer(vk_command_buffer_, vk_buffer, 0, index_type);
+}
+
 void VKCommandBuffer::begin_render_pass(const VKFrameBuffer &framebuffer)
 {
  validate_framebuffer_not_exists();
@ -285,7 +294,7 @@ void VKCommandBuffer::submit_encoded_commands()
 }

 /* -------------------------------------------------------------------- */
-/** \name Framebuffer/RenderPass state tracking
+/** \name FrameBuffer/RenderPass state tracking
 * \{ */

 void VKCommandBuffer::validate_framebuffer_not_exists()
--- a/source/blender/gpu/vulkan/vk_command_buffer.hh
+++ b/source/blender/gpu/vulkan/vk_command_buffer.hh
@ -60,13 +60,13 @@ class VKCommandBuffer : NonCopyable, NonMovable {
   * - minimize command buffers and track render passes.
   * - add custom encoder to also track resource usages.
   *
-   * Currently I expect the custom encoder has to be done eventually. But want to keep post-poning
+   * Currently I expect the custom encoder has to be done eventually. But want to keep postponing
   * the custom encoder for now to collect more use cases it should solve. (first pixel drawn on
   * screen).
   *
   * Some command can also be encoded in another way when encoded as a first command. For example
-   * clearing a framebuffer textures isn't allowed inside a render pass, but clearing the
-   * framebuffer textures via ops is allowed. When clearing a framebuffer texture directly after
+   * clearing a frame-buffer textures isn't allowed inside a render pass, but clearing the
+   * frame-buffer textures via ops is allowed. When clearing a frame-buffer texture directly after
   * beginning a render pass could be re-encoded to do this in the same command.
   *
   * So for now we track the state and temporary switch to another state if the command requires
@ -131,6 +131,7 @@ class VKCommandBuffer : NonCopyable, NonMovable {
  void init(const VkDevice vk_device, const VkQueue vk_queue, VkCommandBuffer vk_command_buffer);
  void begin_recording();
  void end_recording();
+
  void bind(const VKPipeline &vk_pipeline, VkPipelineBindPoint bind_point);
  void bind(const VKDescriptorSet &descriptor_set,
            const VkPipelineLayout vk_pipeline_layout,
@ -140,6 +141,7 @@ class VKCommandBuffer : NonCopyable, NonMovable {
            const VkDeviceSize offset);
  /* Bind the given buffer as a vertex buffer. */
  void bind(const uint32_t binding, const VkBuffer &vk_vertex_buffer, const VkDeviceSize offset);
+  void bind(const VKIndexBuffer &index_buffer, VkIndexType index_type);

  void begin_render_pass(const VKFrameBuffer &framebuffer);
  void end_render_pass(const VKFrameBuffer &framebuffer);
--- a/source/blender/gpu/vulkan/vk_common.cc
+++ b/source/blender/gpu/vulkan/vk_common.cc
@ -243,6 +243,127 @@ VkFormat to_vk_format(const eGPUTextureFormat format)
  return VK_FORMAT_UNDEFINED;
 }

+VkFormat to_vk_format(const GPUVertCompType type, const uint32_t size)
+{
+  switch (type) {
+    case GPU_COMP_I8:
+      switch (size) {
+        case 1:
+          return VK_FORMAT_R8_SNORM;
+        case 2:
+          return VK_FORMAT_R8G8_SNORM;
+        case 3:
+          return VK_FORMAT_R8G8B8_SNORM;
+        case 4:
+          return VK_FORMAT_R8G8B8A8_SNORM;
+        default:
+          break;
+      }
+      break;
+
+    case GPU_COMP_U8:
+      switch (size) {
+        case 1:
+          return VK_FORMAT_R8_UNORM;
+        case 2:
+          return VK_FORMAT_R8G8_UNORM;
+        case 3:
+          return VK_FORMAT_R8G8B8_UNORM;
+        case 4:
+          return VK_FORMAT_R8G8B8A8_UNORM;
+        default:
+          break;
+      }
+      break;
+
+    case GPU_COMP_I16:
+      switch (size) {
+        case 2:
+          return VK_FORMAT_R16_SNORM;
+        case 4:
+          return VK_FORMAT_R16G16_SNORM;
+        case 6:
+          return VK_FORMAT_R16G16B16_SNORM;
+        case 8:
+          return VK_FORMAT_R16G16B16A16_SNORM;
+        default:
+          break;
+      }
+      break;
+
+    case GPU_COMP_U16:
+      switch (size) {
+        case 2:
+          return VK_FORMAT_R16_UNORM;
+        case 4:
+          return VK_FORMAT_R16G16_UNORM;
+        case 6:
+          return VK_FORMAT_R16G16B16_UNORM;
+        case 8:
+          return VK_FORMAT_R16G16B16A16_UNORM;
+        default:
+          break;
+      }
+      break;
+
+    case GPU_COMP_I32:
+      switch (size) {
+        case 4:
+          return VK_FORMAT_R32_SINT;
+        case 8:
+          return VK_FORMAT_R32G32_SINT;
+        case 12:
+          return VK_FORMAT_R32G32B32_SINT;
+        case 16:
+          return VK_FORMAT_R32G32B32A32_SINT;
+        default:
+          break;
+      }
+      break;
+
+    case GPU_COMP_U32:
+      switch (size) {
+        case 4:
+          return VK_FORMAT_R32_UINT;
+        case 8:
+          return VK_FORMAT_R32G32_UINT;
+        case 12:
+          return VK_FORMAT_R32G32B32_UINT;
+        case 16:
+          return VK_FORMAT_R32G32B32A32_UINT;
+        default:
+          break;
+      }
+      break;
+
+    case GPU_COMP_F32:
+      switch (size) {
+        case 4:
+          return VK_FORMAT_R32_SFLOAT;
+        case 8:
+          return VK_FORMAT_R32G32_SFLOAT;
+        case 12:
+          return VK_FORMAT_R32G32B32_SFLOAT;
+        case 16:
+          return VK_FORMAT_R32G32B32A32_SFLOAT;
+        case 64:
+          return VK_FORMAT_R32G32B32A32_SFLOAT;
+        default:
+          break;
+      }
+      break;
+
+    case GPU_COMP_I10:
+      BLI_assert(size == 4);
+      return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
+
+    default:
+      break;
+  }
+  BLI_assert_unreachable();
+  return VK_FORMAT_R32_SFLOAT;
+}
+
 VkImageType to_vk_image_type(const eGPUTextureType type)
 {
  switch (type) {
@ -260,10 +381,10 @@ VkImageType to_vk_image_type(const eGPUTextureType type)

    case GPU_TEXTURE_ARRAY:
      /* GPU_TEXTURE_ARRAY should always be used together with 1D, 2D, or CUBE*/
-      BLI_assert_unreachable();
      break;
  }

+  BLI_assert_unreachable();
  return VK_IMAGE_TYPE_1D;
 }

@ -288,10 +409,10 @@ VkImageViewType to_vk_image_view_type(const eGPUTextureType type)

    case GPU_TEXTURE_ARRAY:
      /* GPU_TEXTURE_ARRAY should always be used together with 1D, 2D, or CUBE*/
-      BLI_assert_unreachable();
      break;
  }

+  BLI_assert_unreachable();
  return VK_IMAGE_VIEW_TYPE_1D;
 }

@ -349,4 +470,64 @@ VkClearColorValue to_vk_clear_color_value(const eGPUDataFormat format, const voi
  return result;
 }

+VkIndexType to_vk_index_type(const GPUIndexBufType index_type)
+{
+  switch (index_type) {
+    case GPU_INDEX_U16:
+      return VK_INDEX_TYPE_UINT16;
+    case GPU_INDEX_U32:
+      return VK_INDEX_TYPE_UINT32;
+    default:
+      break;
+  }
+  BLI_assert_unreachable();
+  return VK_INDEX_TYPE_UINT16;
+}
+
+VkPrimitiveTopology to_vk_primitive_topology(const GPUPrimType prim_type)
+{
+  switch (prim_type) {
+    case GPU_PRIM_POINTS:
+      return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
+    case GPU_PRIM_LINES:
+      return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
+    case GPU_PRIM_TRIS:
+      return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
+    case GPU_PRIM_LINE_STRIP:
+      return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
+    case GPU_PRIM_LINE_LOOP:
+      return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
+    case GPU_PRIM_TRI_STRIP:
+      return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
+    case GPU_PRIM_TRI_FAN:
+      return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
+    case GPU_PRIM_LINES_ADJ:
+      return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
+    case GPU_PRIM_TRIS_ADJ:
+      return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY;
+    case GPU_PRIM_LINE_STRIP_ADJ:
+      return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY;
+
+    case GPU_PRIM_NONE:
+      break;
+  }
+
+  BLI_assert_unreachable();
+  return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
+}
+
+VkCullModeFlags to_vk_cull_mode_flags(const eGPUFaceCullTest cull_test)
+{
+  switch (cull_test) {
+    case GPU_CULL_FRONT:
+      return VK_CULL_MODE_FRONT_BIT;
+    case GPU_CULL_BACK:
+      return VK_CULL_MODE_BACK_BIT;
+    case GPU_CULL_NONE:
+      return VK_CULL_MODE_NONE;
+  }
+  BLI_assert_unreachable();
+  return VK_CULL_MODE_NONE;
+}
+
 }  // namespace blender::gpu
--- a/source/blender/gpu/vulkan/vk_common.hh
+++ b/source/blender/gpu/vulkan/vk_common.hh
@ -17,16 +17,21 @@

 #include "vk_mem_alloc.h"

+#include "gpu_index_buffer_private.hh"
 #include "gpu_texture_private.hh"

 namespace blender::gpu {

 VkImageAspectFlagBits to_vk_image_aspect_flag_bits(const eGPUTextureFormat format);
 VkFormat to_vk_format(const eGPUTextureFormat format);
+VkFormat to_vk_format(const GPUVertCompType type, const uint32_t size);
 VkComponentMapping to_vk_component_mapping(const eGPUTextureFormat format);
 VkImageViewType to_vk_image_view_type(const eGPUTextureType type);
 VkImageType to_vk_image_type(const eGPUTextureType type);
 VkClearColorValue to_vk_clear_color_value(const eGPUDataFormat format, const void *data);
+VkIndexType to_vk_index_type(const GPUIndexBufType index_type);
+VkPrimitiveTopology to_vk_primitive_topology(const GPUPrimType prim_type);
+VkCullModeFlags to_vk_cull_mode_flags(const eGPUFaceCullTest cull_test);

 template<typename T> VkObjectType to_vk_object_type(T /*vk_obj*/)
 {
--- a/source/blender/gpu/vulkan/vk_context.cc
+++ b/source/blender/gpu/vulkan/vk_context.cc
@ -126,6 +126,17 @@ void VKContext::finish()

 void VKContext::memory_statistics_get(int * /*total_mem*/, int * /*free_mem*/) {}

+/* -------------------------------------------------------------------- */
+/** \name State manager
+ * \{ */
+
+const VKStateManager &VKContext::state_manager_get() const
+{
+  return *static_cast<const VKStateManager *>(state_manager);
+}
+
+/** \} */
+
 void VKContext::activate_framebuffer(VKFrameBuffer &framebuffer)
 {
  if (has_active_framebuffer()) {
--- a/source/blender/gpu/vulkan/vk_context.hh
+++ b/source/blender/gpu/vulkan/vk_context.hh
@ -15,8 +15,9 @@

 namespace blender::gpu {
 class VKFrameBuffer;
+class VKStateManager;

-class VKContext : public Context {
+class VKContext : public Context, NonCopyable {
 private:
  /** Copies of the handles owned by the GHOST context. */
  VkInstance vk_instance_ = VK_NULL_HANDLE;
@ -108,6 +109,8 @@ class VKContext : public Context {
    return descriptor_pools_;
  }

+  const VKStateManager &state_manager_get() const;
+
  VmaAllocator mem_allocator_get() const
  {
    return mem_allocator_;
--- a/source/blender/gpu/vulkan/vk_data_conversion.cc
+++ b/source/blender/gpu/vulkan/vk_data_conversion.cc
@ -764,6 +764,28 @@ void convert_host_to_device(void *dst_buffer,
  convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type);
 }

+void convert_host_to_device(void *dst_buffer,
+                            const void *src_buffer,
+                            uint2 src_size,
+                            uint src_row_length,
+                            eGPUDataFormat host_format,
+                            eGPUTextureFormat device_format)
+{
+  const uint8_t *src = static_cast<const uint8_t *>(src_buffer);
+  uint8_t *dst = static_cast<uint8_t *>(dst_buffer);
+  ConversionType conversion_type = host_to_device(host_format, device_format);
+  size_t src_row_len = src_row_length * to_bytesize(device_format, host_format);
+  size_t dst_row_len = src_size.x * to_bytesize(device_format);
+
+  for (uint row : IndexRange(src_size.y)) {
+    convert_buffer(&dst[dst_row_len * row],
+                   &src[src_row_len * row],
+                   src_size.x,
+                   device_format,
+                   conversion_type);
+  }
+}
+
 void convert_device_to_host(void *dst_buffer,
                            const void *src_buffer,
                            size_t buffer_size,
--- a/source/blender/gpu/vulkan/vk_data_conversion.hh
+++ b/source/blender/gpu/vulkan/vk_data_conversion.hh
@ -7,6 +7,8 @@

 #pragma once

+#include "BLI_math_vector_types.hh"
+
 #include "gpu_texture_private.hh"

 namespace blender::gpu {
@ -17,7 +19,7 @@ namespace blender::gpu {
 * \param dst_buffer: device buffer.
 * \param src_buffer: host buffer.
 * \param buffer_size: number of pixels to convert from the start of the given buffer.
- * \param host_format: format of the host buffer
+ * \param host_format: format of the host buffer.
 * \param device_format: format of the device buffer.
 *
 * \note Will assert when the host_format/device_format combination isn't valid
@ -30,6 +32,27 @@ void convert_host_to_device(void *dst_buffer,
                            eGPUDataFormat host_format,
                            eGPUTextureFormat device_format);

+/**
+ * Convert host buffer to device buffer with row length.
+ *
+ * \param dst_buffer: device buffer.
+ * \param src_buffer: host buffer.
+ * \param src_size: size of the host buffer.
+ * \param src_row_length: Length of a single row of the buffer (in pixels).
+ * \param host_format: format of the host buffer.
+ * \param device_format: format of the device buffer.
+ *
+ * \note Will assert when the host_format/device_format combination isn't valid
+ * (#validate_data_format) or supported. Some combinations aren't supported in Vulkan due to
+ * platform incompatibility.
+ */
+void convert_host_to_device(void *dst_buffer,
+                            const void *src_buffer,
+                            uint2 src_size,
+                            uint src_row_length,
+                            eGPUDataFormat host_format,
+                            eGPUTextureFormat device_format);
+
 /**
 * Convert device buffer to host buffer.
 *
--- a/source/blender/gpu/vulkan/vk_debug.hh
+++ b/source/blender/gpu/vulkan/vk_debug.hh
@ -35,7 +35,10 @@ typedef struct VKDebuggingTools {

 bool init_callbacks(VKContext *context, PFN_vkGetInstanceProcAddr instance_proc_addr);
 void destroy_callbacks(VKContext *context);
-void object_label(VKContext *context, VkObjectType vk_object_type, uint64_t object_handle, const char *name);
+void object_label(VKContext *context,
+                  VkObjectType vk_object_type,
+                  uint64_t object_handle,
+                  const char *name);

 template<typename T> void object_label(VKContext *context, T vk_object_type, const char *name)
 {
@ -47,7 +50,8 @@ template<typename T> void object_label(VKContext *context, T vk_object_type, con
  memset(label, 0, label_size);
  static int stats = 0;
  SNPRINTF(label, "%s_%d", name, stats++);
-  object_label(context, to_vk_object_type(vk_object_type), (uint64_t)vk_object_type, (const char *)label);
+  object_label(
+      context, to_vk_object_type(vk_object_type), (uint64_t)vk_object_type, (const char *)label);
 };

 void push_marker(VKContext *context, VkCommandBuffer vk_command_buffer, const char *name);
--- a/source/blender/gpu/vulkan/vk_index_buffer.cc
+++ b/source/blender/gpu/vulkan/vk_index_buffer.cc
@ -11,15 +11,39 @@

 namespace blender::gpu {

-void VKIndexBuffer::upload_data() {}
-
-void VKIndexBuffer::bind_as_ssbo(uint binding)
+void VKIndexBuffer::ensure_updated()
 {
+  if (is_subrange_) {
+    src_->upload_data();
+    return;
+  }
+
  VKContext &context = *VKContext::get();
  if (!buffer_.is_allocated()) {
    allocate(context);
  }

+  if (data_ != nullptr) {
+    buffer_.update(data_);
+    MEM_SAFE_FREE(data_);
+  }
+}
+
+void VKIndexBuffer::upload_data()
+{
+  ensure_updated();
+}
+
+void VKIndexBuffer::bind(VKContext &context)
+{
+  context.command_buffer_get().bind(*this, to_vk_index_type(index_type_));
+}
+
+void VKIndexBuffer::bind_as_ssbo(uint binding)
+{
+  ensure_updated();
+
+  VKContext &context = *VKContext::get();
  VKShader *shader = static_cast<VKShader *>(context.shader);
  const VKShaderInterface &shader_interface = shader->interface_get();
  const VKDescriptorSet::Location location = shader_interface.descriptor_set_location(
@ -48,6 +72,7 @@ void VKIndexBuffer::allocate(VKContext &context)
                 usage,
                 static_cast<VkBufferUsageFlagBits>(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
                                                    VK_BUFFER_USAGE_INDEX_BUFFER_BIT));
+  debug::object_label(&context, buffer_.vk_handle(), "IndexBuffer");
 }

 }  // namespace blender::gpu
--- a/source/blender/gpu/vulkan/vk_index_buffer.hh
+++ b/source/blender/gpu/vulkan/vk_index_buffer.hh
@ -20,12 +20,13 @@ class VKIndexBuffer : public IndexBuf {
  void upload_data() override;

  void bind_as_ssbo(uint binding) override;
+  void bind(VKContext &context);

  void read(uint32_t *data) const override;

  void update_sub(uint start, uint len, const void *data) override;

-  VkBuffer vk_handle()
+  VkBuffer vk_handle() const
  {
    return buffer_.vk_handle();
  }
@ -33,6 +34,12 @@ class VKIndexBuffer : public IndexBuf {
 private:
  void strip_restart_indices() override;
  void allocate(VKContext &context);
+  void ensure_updated();
 };

+static inline VKIndexBuffer *unwrap(IndexBuf *index_buffer)
+{
+  return static_cast<VKIndexBuffer *>(index_buffer);
+}
+
 }  // namespace blender::gpu
--- a/source/blender/gpu/vulkan/vk_state_manager.cc
+++ b/source/blender/gpu/vulkan/vk_state_manager.cc
@ -38,6 +38,14 @@ void VKStateManager::image_unbind(Texture * /*tex*/) {}

 void VKStateManager::image_unbind_all() {}

-void VKStateManager::texture_unpack_row_length_set(uint /*len*/) {}
+void VKStateManager::texture_unpack_row_length_set(uint len)
+{
+  texture_unpack_row_length_ = len;
+}
+
+uint VKStateManager::texture_unpack_row_length_get() const
+{
+  return texture_unpack_row_length_;
+}

 }  // namespace blender::gpu
--- a/source/blender/gpu/vulkan/vk_state_manager.hh
+++ b/source/blender/gpu/vulkan/vk_state_manager.hh
@ -11,6 +11,8 @@

 namespace blender::gpu {
 class VKStateManager : public StateManager {
+  uint texture_unpack_row_length_;
+
 public:
  void apply_state() override;
  void force_state() override;
@ -26,5 +28,12 @@ class VKStateManager : public StateManager {
  void image_unbind_all() override;

  void texture_unpack_row_length_set(uint len) override;
+
+  /**
+   * Row length for unpacking host data when uploading texture data.
+   *
+   * When set to zero (0) host data can be assumed to be stored sequential.
+   */
+  uint texture_unpack_row_length_get() const;
 };
 }  // namespace blender::gpu
--- a/source/blender/gpu/vulkan/vk_texture.cc
+++ b/source/blender/gpu/vulkan/vk_texture.cc
@ -13,6 +13,7 @@
 #include "vk_memory.hh"
 #include "vk_shader.hh"
 #include "vk_shader_interface.hh"
+#include "vk_state_manager.hh"

 #include "BLI_math_vector.hh"

@ -94,8 +95,12 @@ void *VKTexture::read(int mip, eGPUDataFormat format)
 }

 void VKTexture::update_sub(
-    int mip, int offset[3], int extent[3], eGPUDataFormat format, const void *data)
+    int mip, int offset[3], int extent_[3], eGPUDataFormat format, const void *data)
 {
+  if (mip != 0) {
+    /* TODO: not implemented yet. */
+    return;
+  }
  if (!is_allocated()) {
    allocate();
  }
@ -103,17 +108,31 @@ void VKTexture::update_sub(
  /* Vulkan images cannot be directly mapped to host memory and requires a staging buffer. */
  VKContext &context = *VKContext::get();
  VKBuffer staging_buffer;
-  size_t sample_len = extent[0] * extent[1] * extent[2];
+  int3 extent = int3(extent_[0], max_ii(extent_[1], 1), max_ii(extent_[2], 1));
+  size_t sample_len = extent.x * extent.y * extent.z;
  size_t device_memory_size = sample_len * to_bytesize(format_);

  staging_buffer.create(
      context, device_memory_size, GPU_USAGE_DEVICE_ONLY, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
-  convert_host_to_device(staging_buffer.mapped_memory_get(), data, sample_len, format, format_);
+
+  uint buffer_row_length = context.state_manager_get().texture_unpack_row_length_get();
+  if (buffer_row_length) {
+    /* Use custom row length #GPU_texture_unpack_row_length */
+    convert_host_to_device(staging_buffer.mapped_memory_get(),
+                           data,
+                           uint2(extent),
+                           buffer_row_length,
+                           format,
+                           format_);
+  }
+  else {
+    convert_host_to_device(staging_buffer.mapped_memory_get(), data, sample_len, format, format_);
+  }

  VkBufferImageCopy region = {};
-  region.imageExtent.width = extent[0];
-  region.imageExtent.height = extent[1];
-  region.imageExtent.depth = extent[2];
+  region.imageExtent.width = extent.x;
+  region.imageExtent.height = extent.y;
+  region.imageExtent.depth = extent.z;
  region.imageOffset.x = offset[0];
  region.imageOffset.y = offset[1];
  region.imageOffset.z = offset[2];
@ -175,7 +194,8 @@ bool VKTexture::is_allocated() const
 static VkImageUsageFlagBits to_vk_image_usage(const eGPUTextureUsage usage,
                                              const eGPUTextureFormatFlag format_flag)
 {
-  VkImageUsageFlagBits result = static_cast<VkImageUsageFlagBits>(VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+  VkImageUsageFlagBits result = static_cast<VkImageUsageFlagBits>(VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+                                                                  VK_IMAGE_USAGE_TRANSFER_DST_BIT |
                                                                  VK_IMAGE_USAGE_SAMPLED_BIT);
  if (usage & GPU_TEXTURE_USAGE_SHADER_READ) {
    result = static_cast<VkImageUsageFlagBits>(result | VK_IMAGE_USAGE_STORAGE_BIT);
@ -184,7 +204,7 @@ static VkImageUsageFlagBits to_vk_image_usage(const eGPUTextureUsage usage,
    result = static_cast<VkImageUsageFlagBits>(result | VK_IMAGE_USAGE_STORAGE_BIT);
  }
  if (usage & GPU_TEXTURE_USAGE_ATTACHMENT) {
-    if (format_flag & (GPU_FORMAT_NORMALIZED_INTEGER | GPU_FORMAT_COMPRESSED)) {
+    if (format_flag & GPU_FORMAT_COMPRESSED) {
      /* These formats aren't supported as an attachment. When using GPU_TEXTURE_USAGE_DEFAULT they
       * are still being evaluated to be attachable. So we need to skip them. */
    }
@ -207,6 +227,7 @@ static VkImageUsageFlagBits to_vk_image_usage(const eGPUTextureUsage usage,

 bool VKTexture::allocate()
 {
+  BLI_assert(vk_image_ == VK_NULL_HANDLE);
  BLI_assert(!is_allocated());

  int extent[3] = {1, 1, 1};
@ -260,6 +281,7 @@ bool VKTexture::allocate()
  if (result != VK_SUCCESS) {
    return false;
  }
+  debug::object_label(&context, vk_image_, name_);

  /* Promote image to the correct layout. */
  layout_ensure(context, VK_IMAGE_LAYOUT_GENERAL);
@ -277,6 +299,7 @@ bool VKTexture::allocate()

  result = vkCreateImageView(
      context.device_get(), &image_view_info, vk_allocation_callbacks, &vk_image_view_);
+  debug::object_label(&context, vk_image_view_, name_);
  return result == VK_SUCCESS;
 }

--- a/source/blender/makesdna/DNA_ID.h
+++ b/source/blender/makesdna/DNA_ID.h
@ -974,7 +974,7 @@ typedef enum IDRecalcFlag {
  /* ** Particle system changed. ** */
  /* Only do pathcache etc. */
  ID_RECALC_PSYS_REDO = (1 << 3),
-  /* Reset everything including pointcache. */
+  /* Reset everything including point-cache. */
  ID_RECALC_PSYS_RESET = (1 << 4),
  /* Only child settings changed. */
  ID_RECALC_PSYS_CHILD = (1 << 5),
--- a/source/blender/makesdna/DNA_asset_types.h
+++ b/source/blender/makesdna/DNA_asset_types.h
@ -126,6 +126,10 @@ typedef enum eAssetImportMethod {
  ASSET_IMPORT_APPEND_REUSE = 2,
 } eAssetImportMethod;

+typedef enum eAssetLibrary_Flag {
+  ASSET_LIBRARY_RELATIVE_PATH = (1 << 0),
+} eAssetLibrary_Flag;
+
 /**
 * Information to identify an asset library. May be either one of the predefined types (current
 * 'Main', builtin library, project library), or a custom type as defined in the Preferences.
--- a/source/blender/makesdna/DNA_mesh_types.h
+++ b/source/blender/makesdna/DNA_mesh_types.h
@ -321,7 +321,7 @@ typedef struct Mesh {
   */
  void loose_edges_tag_none() const;
  /**
-   * Set the number of verices not connected to edges to zero. Similar to #loose_edges_tag_none().
+   * Set the number of vertices not connected to edges to zero. Similar to #loose_edges_tag_none().
   * There may still be vertices only used by loose edges though.
   *
   * \note If both #loose_edges_tag_none() and #tag_loose_verts_none() are called,
--- a/source/blender/makesdna/DNA_modifier_types.h
+++ b/source/blender/makesdna/DNA_modifier_types.h
@ -2333,14 +2333,7 @@ typedef struct MeshToVolumeModifierData {
   * different. */
  int voxel_amount;

-  /** If true, every cell in the enclosed volume gets a density. Otherwise, the interior_band_width
-   * is used. */
-  char fill_volume;
-  char _pad1[3];
-
-  /** Band widths are in object space. */
  float interior_band_width;
-  float exterior_band_width;

  float density;
  char _pad2[4];
--- a/source/blender/makesdna/DNA_userdef_defaults.h
+++ b/source/blender/makesdna/DNA_userdef_defaults.h
@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup DNA
+ */
+
+#pragma once
+
+#include "DNA_asset_types.h"
+
+/* Struct members on own line. */
+/* clang-format off */
+
+/* -------------------------------------------------------------------- */
+/** \name bUserAssetLibrary Struct
+ * \{ */
+
+#define _DNA_DEFAULT_bUserAssetLibrary \
+  { \
+    .import_method = ASSET_IMPORT_APPEND_REUSE, \
+    .flag = ASSET_LIBRARY_RELATIVE_PATH, \
+  }
+
+/** \} */
+
+/* clang-format off */
+
+/** \} */
--- a/source/blender/makesdna/DNA_userdef_types.h
+++ b/source/blender/makesdna/DNA_userdef_types.h
@ -593,7 +593,8 @@ typedef struct bUserAssetLibrary {
  char path[1024]; /* FILE_MAX */

  short import_method; /* eAssetImportMethod */
-  char _pad0[6];
+  short flag;          /* eAssetLibrary_Flag */
+  char _pad0[4];
 } bUserAssetLibrary;

 typedef struct SolidLight {
--- a/Show More
+++ b/Show More