Fix and cleanup: Pack to correct/Specified UDIM

After recent changes made in master branch, it is now possible to display UDIM grid and tiled images simultaneously in the UV Editor. This commit updates the pack islands to correct/specified UDIM implementation to work with the new behavior. Also includes some code and comment cleanups.
UV grid: Resolve merge conflicts
2021-09-26 00:40:51 +05:30 · 2021-09-22 23:08:30 +05:30 · 2021-09-22 21:07:15 +05:30 · 2021-08-16 10:34:46 +05:30 · 2021-08-16 10:04:49 +05:30 · 2021-08-16 06:44:00 +05:30
983 changed files with 9374 additions and 38358 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -419,8 +419,6 @@ mark_as_advanced(WITH_CYCLES_NATIVE_ONLY)

 option(WITH_CYCLES_DEVICE_CUDA              "Enable Cycles CUDA compute support" ON)
 option(WITH_CYCLES_DEVICE_OPTIX             "Enable Cycles OptiX support" ON)
-option(WITH_CYCLES_DEVICE_HIP               "Enable Cycles HIP support" OFF)
-mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
 mark_as_advanced(WITH_CYCLES_DEVICE_CUDA)

 option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime" ON)
@@ -823,11 +821,6 @@ if(NOT WITH_CUDA_DYNLOAD)
  endif()
 endif()

-if(WITH_CYCLES_DEVICE_HIP)
-  # Currently HIP must be dynamically loaded, this may change in future toolkits
-  set(WITH_HIP_DYNLOAD ON)
-endif()
-
 #-----------------------------------------------------------------------------
 # Check check if submodules are cloned

@@ -1857,9 +1850,6 @@ elseif(WITH_CYCLES_STANDALONE)
  if(WITH_CUDA_DYNLOAD)
    add_subdirectory(extern/cuew)
  endif()
-  if(WITH_HIP_DYNLOAD)
-    add_subdirectory(extern/hipew)
-  endif()
  if(NOT WITH_SYSTEM_GLEW)
    add_subdirectory(extern/glew)
  endif()
--- a/build_files/build_environment/patches/ffmpeg.diff
+++ b/build_files/build_environment/patches/ffmpeg.diff
@@ -70,18 +70,16 @@
 }
 --- a/libavcodec/rl.c
 +++ b/libavcodec/rl.c
-@@ -71,17 +71,19 @@
+@@ -71,7 +71,7 @@ av_cold void ff_rl_init(RLTable *rl,
 av_cold void ff_rl_init_vlc(RLTable *rl, unsigned static_size)
 {
     int i, q;
 -    VLC_TYPE table[1500][2] = {{0}};
 +    VLC_TYPE (*table)[2] = av_calloc(sizeof(VLC_TYPE), 1500 * 2);
     VLC vlc = { .table = table, .table_allocated = static_size };
-    av_assert0(static_size <= FF_ARRAY_ELEMS(table));
-+    av_assert0(static_size < 1500);
+     av_assert0(static_size <= FF_ARRAY_ELEMS(table));
     init_vlc(&vlc, 9, rl->n + 1, &rl->table_vlc[0][1], 4, 2, &rl->table_vlc[0][0], 4, 2, INIT_VLC_USE_NEW_STATIC);
- 
-     for (q = 0; q < 32; q++) {
+@@ -80,8 +80,10 @@ av_cold void ff_rl_init_vlc(RLTable *rl, unsigned static_size)
         int qmul = q * 2;
         int qadd = (q - 1) | 1;
 
@@ -93,7 +91,7 @@
 
         if (q == 0) {
             qmul = 1;
-@@ -113,4 +115,5 @@
+@@ -113,4 +115,5 @@ av_cold void ff_rl_init_vlc(RLTable *rl, unsigned static_size)
             rl->rl_vlc[q][i].run   = run;
         }
     }
--- a/build_files/config/pipeline_config.yaml
+++ b/build_files/config/pipeline_config.yaml
@@ -5,7 +5,7 @@
 update-code:
    git:
        submodules:
-        -   branch: xr-controller-support
+        -   branch: master
            commit_id: HEAD
            path: release/scripts/addons
        -   branch: master
--- a/build_files/utils/make_update.py
+++ b/build_files/utils/make_update.py
@@ -178,7 +178,7 @@ def submodules_update(args, release_version, branch):
        branch = branch_fallback

    submodules = [
-        ("release/scripts/addons", "xr-controller-support", branch_fallback),
+        ("release/scripts/addons", branch, branch_fallback),
        ("release/scripts/addons_contrib", branch, branch_fallback),
        ("release/datafiles/locale", branch, branch_fallback),
        ("source/tools", branch, branch_fallback),
--- a/doc/python_api/examples/bpy.types.Bone.convert_local_to_pose.py
+++ b/doc/python_api/examples/bpy.types.Bone.convert_local_to_pose.py
@@ -1,40 +0,0 @@
-"""
-This method enables conversions between Local and Pose space for bones in
-the middle of updating the armature without having to update dependencies
-after each change, by manually carrying updated matrices in a recursive walk.
-"""
-
-def set_pose_matrices(obj, matrix_map):
-    "Assign pose space matrices of all bones at once, ignoring constraints."
-
-    def rec(pbone, parent_matrix):
-        matrix = matrix_map[pbone.name]
-
-        ## Instead of:
-        # pbone.matrix = matrix
-        # bpy.context.view_layer.update()
-
-        # Compute and assign local matrix, using the new parent matrix
-        if pbone.parent:
-            pbone.matrix_basis = pbone.bone.convert_local_to_pose(
-                matrix,
-                pbone.bone.matrix_local,
-                parent_matrix=parent_matrix,
-                parent_matrix_local=pbone.parent.bone.matrix_local,
-                invert=True
-            )
-        else:
-            pbone.matrix_basis = pbone.bone.convert_local_to_pose(
-                matrix,
-                pbone.bone.matrix_local,
-                invert=True
-            )
-
-        # Recursively process children, passing the new matrix through
-        for child in pbone.children:
-            rec(child, matrix)
-
-    # Scan all bone trees from their roots
-    for pbone in obj.pose.bones:
-        if not pbone.parent:
-            rec(pbone, None)
--- a/extern/CMakeLists.txt
+++ b/extern/CMakeLists.txt
@@ -67,12 +67,9 @@ endif()

 if(WITH_CYCLES OR WITH_COMPOSITOR OR WITH_OPENSUBDIV)
  add_subdirectory(clew)
-  if((WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX) AND WITH_CUDA_DYNLOAD)
+  if(WITH_CUDA_DYNLOAD)
    add_subdirectory(cuew)
  endif()
-  if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
-    add_subdirectory(hipew)
-  endif()
 endif()

 if(WITH_GHOST_X11 AND WITH_GHOST_XDND)
--- a/extern/audaspace/bindings/C/AUD_Sound.cpp
+++ b/extern/audaspace/bindings/C/AUD_Sound.cpp
@@ -102,30 +102,26 @@ AUD_API int AUD_Sound_getFileStreams(AUD_Sound* sound, AUD_StreamInfo **stream_i

 	if(file)
 	{
-		try
-		{
-			auto streams = file->queryStreams();
-
-			size_t size = sizeof(AUD_StreamInfo) * streams.size();
-
-			if(!size)
-			{
-				*stream_infos = nullptr;
-				return 0;
-			}
-
-			*stream_infos = reinterpret_cast<AUD_StreamInfo*>(std::malloc(size));
-			std::memcpy(*stream_infos, streams.data(), size);
-
-			return streams.size();
-		}
-		catch(Exception&)
+		auto streams = file->queryStreams();
+
+		size_t size = sizeof(AUD_StreamInfo) * streams.size();
+
+		if(!size)
 		{
+			*stream_infos = nullptr;
+			return 0;
 		}
+
+		*stream_infos = reinterpret_cast<AUD_StreamInfo*>(std::malloc(size));
+		std::memcpy(*stream_infos, streams.data(), size);
+
+		return streams.size();
+	}
+	else
+	{
+		*stream_infos = nullptr;
+		return 0;
 	}
-
-	*stream_infos = nullptr;
-	return 0;
 }

 AUD_API sample_t* AUD_Sound_data(AUD_Sound* sound, int* length, AUD_Specs* specs)
--- a/extern/hipew/CMakeLists.txt
+++ b/extern/hipew/CMakeLists.txt
@@ -1,39 +0,0 @@
-# ***** BEGIN GPL LICENSE BLOCK *****
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-#
-# The Original Code is Copyright (C) 2021, Blender Foundation
-# All rights reserved.
-# ***** END GPL LICENSE BLOCK *****
-
-set(INC
-  .
-  include
-)
-
-set(INC_SYS
-
-)
-
-set(SRC
-  src/hipew.c
-
-  include/hipew.h
-)
-
-set(LIB
-)
-
-blender_add_lib(extern_hipew "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
--- a/extern/hipew/include/hipew.h
+++ b/extern/hipew/include/hipew.h
--- a/extern/hipew/src/hipew.c
+++ b/extern/hipew/src/hipew.c
@@ -1,533 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License
- */
-#ifdef _MSC_VER
-#  if _MSC_VER < 1900
-#    define snprintf _snprintf
-#  endif
-#  define popen _popen
-#  define pclose _pclose
-#  define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <hipew.h>
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/stat.h>
-
-#ifdef _WIN32
-#  define WIN32_LEAN_AND_MEAN
-#  define VC_EXTRALEAN
-#  include <windows.h>
-
-/* Utility macros. */
-
-typedef HMODULE DynamicLibrary;
-
-#  define dynamic_library_open(path)         LoadLibraryA(path)
-#  define dynamic_library_close(lib)         FreeLibrary(lib)
-#  define dynamic_library_find(lib, symbol)  GetProcAddress(lib, symbol)
-#else
-#  include <dlfcn.h>
-
-typedef void* DynamicLibrary;
-
-#  define dynamic_library_open(path)         dlopen(path, RTLD_NOW)
-#  define dynamic_library_close(lib)         dlclose(lib)
-#  define dynamic_library_find(lib, symbol)  dlsym(lib, symbol)
-#endif
-
-#define _LIBRARY_FIND_CHECKED(lib, name) \
-        name = (t##name *)dynamic_library_find(lib, #name); \
-        assert(name);
-
-#define _LIBRARY_FIND(lib, name) \
-        name = (t##name *)dynamic_library_find(lib, #name);
-
-#define HIP_LIBRARY_FIND_CHECKED(name) \
-        _LIBRARY_FIND_CHECKED(hip_lib, name)
-#define HIP_LIBRARY_FIND(name) _LIBRARY_FIND(hip_lib, name)
-
-
-static DynamicLibrary hip_lib;
-
-/* Function definitions. */
-thipGetErrorName *hipGetErrorName;
-thipInit *hipInit;
-thipDriverGetVersion *hipDriverGetVersion;
-thipGetDevice *hipGetDevice;
-thipGetDeviceCount *hipGetDeviceCount;
-thipDeviceGetName *hipDeviceGetName;
-thipDeviceGetAttribute *hipDeviceGetAttribute;
-thipDeviceComputeCapability *hipDeviceComputeCapability;
-thipDevicePrimaryCtxRetain *hipDevicePrimaryCtxRetain;
-thipDevicePrimaryCtxRelease *hipDevicePrimaryCtxRelease;
-thipDevicePrimaryCtxSetFlags *hipDevicePrimaryCtxSetFlags;
-thipDevicePrimaryCtxGetState *hipDevicePrimaryCtxGetState;
-thipDevicePrimaryCtxReset *hipDevicePrimaryCtxReset;
-thipCtxCreate *hipCtxCreate;
-thipCtxDestroy *hipCtxDestroy;
-thipCtxPushCurrent *hipCtxPushCurrent;
-thipCtxPopCurrent *hipCtxPopCurrent;
-thipCtxSetCurrent *hipCtxSetCurrent;
-thipCtxGetCurrent *hipCtxGetCurrent;
-thipCtxGetDevice *hipCtxGetDevice;
-thipCtxGetFlags *hipCtxGetFlags;
-thipCtxSynchronize *hipCtxSynchronize;
-thipDeviceSynchronize *hipDeviceSynchronize;
-thipCtxGetCacheConfig *hipCtxGetCacheConfig;
-thipCtxSetCacheConfig *hipCtxSetCacheConfig;
-thipCtxGetSharedMemConfig *hipCtxGetSharedMemConfig;
-thipCtxSetSharedMemConfig *hipCtxSetSharedMemConfig;
-thipCtxGetApiVersion *hipCtxGetApiVersion;
-thipModuleLoad *hipModuleLoad;
-thipModuleLoadData *hipModuleLoadData;
-thipModuleLoadDataEx *hipModuleLoadDataEx;
-thipModuleUnload *hipModuleUnload;
-thipModuleGetFunction *hipModuleGetFunction;
-thipModuleGetGlobal *hipModuleGetGlobal;
-thipModuleGetTexRef *hipModuleGetTexRef;
-thipMemGetInfo *hipMemGetInfo;
-thipMalloc *hipMalloc;
-thipMemAllocPitch *hipMemAllocPitch;
-thipFree *hipFree;
-thipMemGetAddressRange *hipMemGetAddressRange;
-thipHostMalloc *hipHostMalloc;
-thipHostFree *hipHostFree;
-thipHostGetDevicePointer *hipHostGetDevicePointer;
-thipHostGetFlags *hipHostGetFlags;
-thipMallocManaged *hipMallocManaged;
-thipDeviceGetByPCIBusId *hipDeviceGetByPCIBusId;
-thipDeviceGetPCIBusId *hipDeviceGetPCIBusId;
-thipMemcpyPeer *hipMemcpyPeer;
-thipMemcpyHtoD *hipMemcpyHtoD;
-thipMemcpyDtoH *hipMemcpyDtoH;
-thipMemcpyDtoD *hipMemcpyDtoD;
-thipDrvMemcpy2DUnaligned *hipDrvMemcpy2DUnaligned;
-thipMemcpyParam2D *hipMemcpyParam2D;
-thipDrvMemcpy3D *hipDrvMemcpy3D;
-thipMemcpyHtoDAsync *hipMemcpyHtoDAsync;
-thipMemcpyDtoHAsync *hipMemcpyDtoHAsync;
-thipMemcpyParam2DAsync *hipMemcpyParam2DAsync;
-thipDrvMemcpy3DAsync *hipDrvMemcpy3DAsync;
-thipMemsetD8 *hipMemsetD8;
-thipMemsetD16 *hipMemsetD16;
-thipMemsetD32 *hipMemsetD32;
-thipMemsetD8Async *hipMemsetD8Async;
-thipMemsetD16Async *hipMemsetD16Async;
-thipMemsetD32Async *hipMemsetD32Async;
-thipArrayCreate *hipArrayCreate;
-thipArrayDestroy *hipArrayDestroy;
-thipArray3DCreate *hipArray3DCreate;
-thipStreamCreateWithFlags *hipStreamCreateWithFlags;
-thipStreamCreateWithPriority *hipStreamCreateWithPriority;
-thipStreamGetPriority *hipStreamGetPriority;
-thipStreamGetFlags *hipStreamGetFlags;
-thipStreamWaitEvent *hipStreamWaitEvent;
-thipStreamAddCallback *hipStreamAddCallback;
-thipStreamQuery *hipStreamQuery;
-thipStreamSynchronize *hipStreamSynchronize;
-thipStreamDestroy *hipStreamDestroy;
-thipEventCreateWithFlags *hipEventCreateWithFlags;
-thipEventRecord *hipEventRecord;
-thipEventQuery *hipEventQuery;
-thipEventSynchronize *hipEventSynchronize;
-thipEventDestroy *hipEventDestroy;
-thipEventElapsedTime *hipEventElapsedTime;
-thipFuncGetAttribute *hipFuncGetAttribute;
-thipFuncSetCacheConfig *hipFuncSetCacheConfig;
-thipModuleLaunchKernel *hipModuleLaunchKernel;
-thipDrvOccupancyMaxActiveBlocksPerMultiprocessor *hipDrvOccupancyMaxActiveBlocksPerMultiprocessor;
-thipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
-thipModuleOccupancyMaxPotentialBlockSize *hipModuleOccupancyMaxPotentialBlockSize;
-thipTexRefSetArray *hipTexRefSetArray;
-thipTexRefSetAddress *hipTexRefSetAddress;
-thipTexRefSetAddress2D *hipTexRefSetAddress2D;
-thipTexRefSetFormat *hipTexRefSetFormat;
-thipTexRefSetAddressMode *hipTexRefSetAddressMode;
-thipTexRefSetFilterMode *hipTexRefSetFilterMode;
-thipTexRefSetFlags *hipTexRefSetFlags;
-thipTexRefGetAddress *hipTexRefGetAddress;
-thipTexRefGetArray *hipTexRefGetArray;
-thipTexRefGetAddressMode *hipTexRefGetAddressMode;
-thipTexObjectCreate *hipTexObjectCreate;
-thipTexObjectDestroy *hipTexObjectDestroy;
-thipDeviceCanAccessPeer *hipDeviceCanAccessPeer;
-
-thipCtxEnablePeerAccess *hipCtxEnablePeerAccess;
-thipCtxDisablePeerAccess *hipCtxDisablePeerAccess;
-thipDeviceGetP2PAttribute *hipDeviceGetP2PAttribute;
-thipGraphicsUnregisterResource *hipGraphicsUnregisterResource;
-thipGraphicsMapResources *hipGraphicsMapResources;
-thipGraphicsUnmapResources *hipGraphicsUnmapResources;
-thipGraphicsResourceGetMappedPointer *hipGraphicsResourceGetMappedPointer;
-
-thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
-thipGLGetDevices *hipGLGetDevices;
-
-
-
-static DynamicLibrary dynamic_library_open_find(const char **paths) {
-  int i = 0;
-  while (paths[i] != NULL) {
-      DynamicLibrary lib = dynamic_library_open(paths[i]);
-      if (lib != NULL) {
-        return lib;
-      }
-      ++i;
-  }
-  return NULL;
-}
-
-/* Implementation function. */
-static void hipewHipExit(void) {
-  if (hip_lib != NULL) {
-    /*  Ignore errors. */
-    dynamic_library_close(hip_lib);
-    hip_lib = NULL;
-  }
-}
-
-static int hipewHipInit(void) {
-  /* Library paths. */
-#ifdef _WIN32
-  /* Expected in c:/windows/system or similar, no path needed. */
-  const char *hip_paths[] = {"amdhip64.dll", NULL};
-#elif defined(__APPLE__)
-  /* Default installation path. */
-  const char *hip_paths[] = {"", NULL};
-#else
-  const char *hip_paths[] = {"/opt/rocm/hip/lib/libamdhip64.so", NULL};
-#endif
-  static int initialized = 0;
-  static int result = 0;
-  int error, driver_version;
-
-  if (initialized) {
-    return result;
-  }
-
-  initialized = 1;
-
-  error = atexit(hipewHipExit);
-  if (error) {
-    result = HIPEW_ERROR_ATEXIT_FAILED;
-    return result;
-  }
-
-  /* Load library. */
-  hip_lib = dynamic_library_open_find(hip_paths);
-
-  if (hip_lib == NULL) {
-    result = HIPEW_ERROR_OPEN_FAILED;
-    return result;
-  }
-
-  /* Fetch all function pointers. */
-  HIP_LIBRARY_FIND_CHECKED(hipGetErrorName);
-  HIP_LIBRARY_FIND_CHECKED(hipInit);
-  HIP_LIBRARY_FIND_CHECKED(hipDriverGetVersion);
-  HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
-  HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
-  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRetain);
-  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRelease);
-  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxSetFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxGetState);
-  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxReset);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxCreate);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxDestroy);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxPushCurrent);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxPopCurrent);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxSetCurrent);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetCurrent);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetDevice);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxSynchronize);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceSynchronize);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetCacheConfig);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxSetCacheConfig);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetSharedMemConfig);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxSetSharedMemConfig);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetApiVersion);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleLoad);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleLoadData);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleLoadDataEx);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleUnload);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleGetFunction);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleGetGlobal);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleGetTexRef);
-  HIP_LIBRARY_FIND_CHECKED(hipMemGetInfo);
-  HIP_LIBRARY_FIND_CHECKED(hipMalloc);
-  HIP_LIBRARY_FIND_CHECKED(hipMemAllocPitch);
-  HIP_LIBRARY_FIND_CHECKED(hipFree);
-  HIP_LIBRARY_FIND_CHECKED(hipMemGetAddressRange);
-  HIP_LIBRARY_FIND_CHECKED(hipHostMalloc);
-  HIP_LIBRARY_FIND_CHECKED(hipHostFree);
-  HIP_LIBRARY_FIND_CHECKED(hipHostGetDevicePointer);
-  HIP_LIBRARY_FIND_CHECKED(hipHostGetFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipMallocManaged);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetByPCIBusId);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetPCIBusId);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyPeer);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoD);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoH);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoD);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyParam2D);
-  HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3D);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoDAsync);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoHAsync);
-  HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy2DUnaligned);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyParam2DAsync);
-  HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3DAsync);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD8);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD16);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD32);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD8Async);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD16Async);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD32Async);
-  HIP_LIBRARY_FIND_CHECKED(hipArrayCreate);
-  HIP_LIBRARY_FIND_CHECKED(hipArrayDestroy);
-  HIP_LIBRARY_FIND_CHECKED(hipArray3DCreate);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithPriority);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamGetPriority);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamGetFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamWaitEvent);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamAddCallback);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamQuery);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamSynchronize);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamDestroy);
-  HIP_LIBRARY_FIND_CHECKED(hipEventCreateWithFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipEventRecord);
-  HIP_LIBRARY_FIND_CHECKED(hipEventQuery);
-  HIP_LIBRARY_FIND_CHECKED(hipEventSynchronize);
-  HIP_LIBRARY_FIND_CHECKED(hipEventDestroy);
-  HIP_LIBRARY_FIND_CHECKED(hipEventElapsedTime);
-  HIP_LIBRARY_FIND_CHECKED(hipFuncGetAttribute);
-  HIP_LIBRARY_FIND_CHECKED(hipFuncSetCacheConfig);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleLaunchKernel);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleOccupancyMaxPotentialBlockSize);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetArray);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddress);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddress2D);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFormat);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddressMode);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFilterMode);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefGetAddress);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefGetAddressMode);
-  HIP_LIBRARY_FIND_CHECKED(hipTexObjectCreate);
-  HIP_LIBRARY_FIND_CHECKED(hipTexObjectDestroy);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceCanAccessPeer);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxEnablePeerAccess);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxDisablePeerAccess);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetP2PAttribute);
-#ifdef _WIN32
-  HIP_LIBRARY_FIND_CHECKED(hipGraphicsUnregisterResource);
-  HIP_LIBRARY_FIND_CHECKED(hipGraphicsMapResources);
-  HIP_LIBRARY_FIND_CHECKED(hipGraphicsUnmapResources);
-  HIP_LIBRARY_FIND_CHECKED(hipGraphicsResourceGetMappedPointer);
-  HIP_LIBRARY_FIND_CHECKED(hipGraphicsGLRegisterBuffer);
-  HIP_LIBRARY_FIND_CHECKED(hipGLGetDevices);
-#endif
-  result = HIPEW_SUCCESS;
-  return result;
-}
-
-
-
-int hipewInit(hipuint32_t flags) {
-  int result = HIPEW_SUCCESS;
-
-  if (flags & HIPEW_INIT_HIP) {
-    result = hipewHipInit();
-    if (result != HIPEW_SUCCESS) {
-      return result;
-    }
-  }
-
-  return result;
-}
-
-
-const char *hipewErrorString(hipError_t result) {
-  switch (result) {
-    case hipSuccess: return "No errors";
-    case hipErrorInvalidValue: return "Invalid value";
-    case hipErrorOutOfMemory: return "Out of memory";
-    case hipErrorNotInitialized: return "Driver not initialized";
-    case hipErrorDeinitialized: return "Driver deinitialized";
-    case hipErrorProfilerDisabled: return "Profiler disabled";
-    case hipErrorProfilerNotInitialized: return "Profiler not initialized";
-    case hipErrorProfilerAlreadyStarted: return "Profiler already started";
-    case hipErrorProfilerAlreadyStopped: return "Profiler already stopped";
-    case hipErrorNoDevice: return "No HIP-capable device available";
-    case hipErrorInvalidDevice: return "Invalid device";
-    case hipErrorInvalidImage: return "Invalid kernel image";
-    case hipErrorInvalidContext: return "Invalid context";
-    case hipErrorContextAlreadyCurrent: return "Context already current";
-    case hipErrorMapFailed: return "Map failed";
-    case hipErrorUnmapFailed: return "Unmap failed";
-    case hipErrorArrayIsMapped: return "Array is mapped";
-    case hipErrorAlreadyMapped: return "Already mapped";
-    case hipErrorNoBinaryForGpu: return "No binary for GPU";
-    case hipErrorAlreadyAcquired: return "Already acquired";
-    case hipErrorNotMapped: return "Not mapped";
-    case hipErrorNotMappedAsArray: return "Mapped resource not available for access as an array";
-    case hipErrorNotMappedAsPointer: return "Mapped resource not available for access as a pointer";
-    case hipErrorECCNotCorrectable: return "Uncorrectable ECC error detected";
-    case hipErrorUnsupportedLimit: return "hipLimit_t not supported by device";
-    case hipErrorContextAlreadyInUse: return "Context already in use";
-    case hipErrorPeerAccessUnsupported: return "Peer access unsupported";
-    case hipErrorInvalidKernelFile: return "Invalid ptx";
-    case hipErrorInvalidGraphicsContext: return "Invalid graphics context";
-    case hipErrorInvalidSource: return "Invalid source";
-    case hipErrorFileNotFound: return "File not found";
-    case hipErrorSharedObjectSymbolNotFound: return "Link to a shared object failed to resolve";
-    case hipErrorSharedObjectInitFailed: return "Shared object initialization failed";
-    case hipErrorOperatingSystem: return "Operating system";
-    case hipErrorInvalidHandle: return "Invalid handle";
-    case hipErrorNotFound: return "Not found";
-    case hipErrorNotReady: return "HIP not ready";
-    case hipErrorIllegalAddress: return "Illegal address";
-    case hipErrorLaunchOutOfResources: return "Launch exceeded resources";
-    case hipErrorLaunchTimeOut: return "Launch exceeded timeout";
-    case hipErrorPeerAccessAlreadyEnabled: return "Peer access already enabled";
-    case hipErrorPeerAccessNotEnabled: return "Peer access not enabled";
-    case hipErrorSetOnActiveProcess: return "Primary context active";
-    case hipErrorAssert: return "Assert";
-    case hipErrorHostMemoryAlreadyRegistered: return "Host memory already registered";
-    case hipErrorHostMemoryNotRegistered: return "Host memory not registered";
-    case hipErrorLaunchFailure: return "Launch failed";
-    case hipErrorCooperativeLaunchTooLarge: return "Cooperative launch too large";
-    case hipErrorNotSupported: return "Not supported";
-    case hipErrorUnknown: return "Unknown error";
-    default: return "Unknown HIP error value";
-  }
-}
-
-static void path_join(const char *path1,
-                      const char *path2,
-                      int maxlen,
-                      char *result) {
-#if defined(WIN32) || defined(_WIN32)
-  const char separator = '\\';
-#else
-  const char separator = '/';
-#endif
-  int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
-  if (n != -1 && n < maxlen) {
-    result[n] = '\0';
-  }
-  else {
-    result[maxlen - 1] = '\0';
-  }
-}
-
-static int path_exists(const char *path) {
-  struct stat st;
-  if (stat(path, &st)) {
-    return 0;
-  }
-  return 1;
-}
-
-const char *hipewCompilerPath(void) {
-    #ifdef _WIN32
-    const char *hipPath = getenv("HIP_ROCCLR_HOME");
-    const char *windowsCommand = "perl ";
-    const char *executable = "bin/hipcc";
-
-    static char hipcc[65536];
-    static char finalCommand[65536];
-    if(hipPath) {
-      path_join(hipPath, executable, sizeof(hipcc), hipcc);
-      if(path_exists(hipcc)) {
-        snprintf(finalCommand, sizeof(hipcc), "%s %s", windowsCommand, hipcc);
-        return finalCommand;
-      } else {
-        printf("Could not find hipcc. Make sure HIP_ROCCLR_HOME points to the directory holding /bin/hipcc");
-      }
-    }
-    #else
-    const char *hipPath =  "opt/rocm/hip/bin";
-    const char *executable = "hipcc";
-
-    static char hipcc[65536];
-    if(hipPath) {
-      path_join(hipPath, executable, sizeof(hipcc), hipcc);
-      if(path_exists(hipcc)){
-        return hipcc;
-      }
-    }
-    #endif
-
-  {
-#ifdef _WIN32
-    FILE *handle = popen("where hipcc", "r");
-#else
-    FILE *handle = popen("which hipcc", "r");
-#endif
-    if (handle) {
-      char buffer[4096] = {0};
-      int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
-      buffer[len] = '\0';
-      pclose(handle);
-      if (buffer[0]) {
-        return "hipcc";
-      }
-    }
-  }
-
-  return NULL;
-}
-
-int hipewCompilerVersion(void) {
-  const char *path = hipewCompilerPath();
-  const char *marker = "Hip compilation tools, release ";
-  FILE *pipe;
-  int major, minor;
-  char *versionstr;
-  char buf[128];
-  char output[65536] = "\0";
-  char command[65536] = "\0";
-
-  if (path == NULL) {
-    return 0;
-  }
-
-  /* get --version output */
-  strcat(command, "\"");
-  strncat(command, path, sizeof(command) - 1);
-  strncat(command, "\" --version", sizeof(command) - strlen(path) - 1);
-  pipe = popen(command, "r");
-  if (!pipe) {
-    fprintf(stderr, "HIP: failed to run compiler to retrieve version");
-    return 0;
-  }
-
-  while (!feof(pipe)) {
-    if (fgets(buf, sizeof(buf), pipe) != NULL) {
-      strncat(output, buf, sizeof(output) - strlen(output) - 1);
-    }
-  }
-
-  pclose(pipe);
-  return 40;
-}
--- a/extern/tinygltf/README.blender
+++ b/extern/tinygltf/README.blender
@@ -2,5 +2,4 @@ Project: TinyGLTF
 URL: https://github.com/syoyo/tinygltf
 License: MIT
 Upstream version: 2.5.0, 19a41d20ec0
-Local modifications: 
-* Silence "enum value not handled in switch" warnings due to JSON dependency.
+Local modifications: None
--- a/extern/tinygltf/tiny_gltf.h
+++ b/extern/tinygltf/tiny_gltf.h
@@ -3201,7 +3201,6 @@ static bool ParseJsonAsValue(Value *ret, const json &o) {
      val = Value(o.get<double>());
      break;
    case json::value_t::null:
-    case json::value_t::binary:
    case json::value_t::discarded:
      // default:
      break;
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -297,7 +297,6 @@ endif()

 if(WITH_CYCLES_STANDALONE)
  set(WITH_CYCLES_DEVICE_CUDA TRUE)
-  set(WITH_CYCLES_DEVICE_HIP TRUE)
 endif()
 # TODO(sergey): Consider removing it, only causes confusion in interface.
 set(WITH_CYCLES_DEVICE_MULTI TRUE)
--- a/intern/cycles/app/CMakeLists.txt
+++ b/intern/cycles/app/CMakeLists.txt
@@ -64,8 +64,6 @@ if(WITH_CYCLES_STANDALONE)
    cycles_standalone.cpp
    cycles_xml.cpp
    cycles_xml.h
-    oiio_output_driver.cpp
-    oiio_output_driver.h
  )
  add_executable(cycles ${SRC} ${INC} ${INC_SYS})
  unset(SRC)
@@ -75,7 +73,7 @@ if(WITH_CYCLES_STANDALONE)

  if(APPLE)
    if(WITH_OPENCOLORIO)
-      set_property(TARGET cycles APPEND_STRING PROPERTY LINK_FLAGS " -framework IOKit -framework Carbon")
+      set_property(TARGET cycles APPEND_STRING PROPERTY LINK_FLAGS " -framework IOKit")
    endif()
    if(WITH_OPENIMAGEDENOISE AND "${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
      # OpenImageDenoise uses BNNS from the Accelerate framework.
--- a/intern/cycles/app/cycles_standalone.cpp
+++ b/intern/cycles/app/cycles_standalone.cpp
@@ -36,9 +36,6 @@
 #include "util/util_unique_ptr.h"
 #include "util/util_version.h"

-#include "app/cycles_xml.h"
-#include "app/oiio_output_driver.h"
-
 #ifdef WITH_CYCLES_STANDALONE_GUI
 #  include "util/util_view.h"
 #endif
@@ -56,8 +53,7 @@ struct Options {
  SessionParams session_params;
  bool quiet;
  bool show_help, interactive, pause;
-  string output_filepath;
-  string output_pass;
+  string output_path;
 } options;

 static void session_print(const string &str)
@@ -93,6 +89,30 @@ static void session_print_status()
  session_print(status);
 }

+static bool write_render(const uchar *pixels, int w, int h, int channels)
+{
+  string msg = string_printf("Writing image %s", options.output_path.c_str());
+  session_print(msg);
+
+  unique_ptr<ImageOutput> out = unique_ptr<ImageOutput>(ImageOutput::create(options.output_path));
+  if (!out) {
+    return false;
+  }
+
+  ImageSpec spec(w, h, channels, TypeDesc::UINT8);
+  if (!out->open(options.output_path, spec)) {
+    return false;
+  }
+
+  /* conversion for different top/bottom convention */
+  out->write_image(
+      TypeDesc::UINT8, pixels + (h - 1) * w * channels, AutoStride, -w * channels, AutoStride);
+
+  out->close();
+
+  return true;
+}
+
 static BufferParams &session_buffer_params()
 {
  static BufferParams buffer_params;
@@ -127,14 +147,9 @@ static void scene_init()

 static void session_init()
 {
-  options.output_pass = "combined";
+  options.session_params.write_render_cb = write_render;
  options.session = new Session(options.session_params, options.scene_params);

-  if (!options.output_filepath.empty()) {
-    options.session->set_output_driver(make_unique<OIIOOutputDriver>(
-        options.output_filepath, options.output_pass, session_print));
-  }
-
  if (options.session_params.background && !options.quiet)
    options.session->progress.set_update_callback(function_bind(&session_print_status));
 #ifdef WITH_CYCLES_STANDALONE_GUI
@@ -145,12 +160,7 @@ static void session_init()
  /* load scene */
  scene_init();

-  /* add pass for output. */
-  Pass *pass = options.scene->create_node<Pass>();
-  pass->set_name(ustring(options.output_pass.c_str()));
-  pass->set_type(PASS_COMBINED);
-
-  options.session->reset(options.session_params, session_buffer_params());
+  options.session->reset(session_buffer_params(), options.session_params.samples);
  options.session->start();
 }

@@ -212,7 +222,9 @@ static void display_info(Progress &progress)

 static void display()
 {
-  options.session->draw();
+  static DeviceDrawParams draw_params = DeviceDrawParams();
+
+  options.session->draw(session_buffer_params(), draw_params);

  display_info(options.session->progress);
 }
@@ -242,7 +254,7 @@ static void motion(int x, int y, int button)
    options.session->scene->camera->need_flags_update = true;
    options.session->scene->camera->need_device_update = true;

-    options.session->reset(options.session_params, session_buffer_params());
+    options.session->reset(session_buffer_params(), options.session_params.samples);
  }
 }

@@ -259,7 +271,7 @@ static void resize(int width, int height)
    options.session->scene->camera->need_flags_update = true;
    options.session->scene->camera->need_device_update = true;

-    options.session->reset(options.session_params, session_buffer_params());
+    options.session->reset(session_buffer_params(), options.session_params.samples);
  }
 }

@@ -271,7 +283,7 @@ static void keyboard(unsigned char key)

  /* Reset */
  else if (key == 'r')
-    options.session->reset(options.session_params, session_buffer_params());
+    options.session->reset(session_buffer_params(), options.session_params.samples);

  /* Cancel */
  else if (key == 27)  // escape
@@ -308,7 +320,7 @@ static void keyboard(unsigned char key)
    options.session->scene->camera->need_flags_update = true;
    options.session->scene->camera->need_device_update = true;

-    options.session->reset(options.session_params, session_buffer_params());
+    options.session->reset(session_buffer_params(), options.session_params.samples);
  }

  /* Set Max Bounces */
@@ -334,7 +346,7 @@ static void keyboard(unsigned char key)

    options.session->scene->integrator->set_max_bounce(bounce);

-    options.session->reset(options.session_params, session_buffer_params());
+    options.session->reset(session_buffer_params(), options.session_params.samples);
  }
 }
 #endif
@@ -349,13 +361,11 @@ static int files_parse(int argc, const char *argv[])

 static void options_parse(int argc, const char **argv)
 {
-  options.width = 1024;
-  options.height = 512;
+  options.width = 0;
+  options.height = 0;
  options.filepath = "";
  options.session = NULL;
  options.quiet = false;
-  options.session_params.use_auto_tile = false;
-  options.session_params.tile_size = 0;

  /* device names */
  string device_names = "";
@@ -401,7 +411,7 @@ static void options_parse(int argc, const char **argv)
             &options.session_params.samples,
             "Number of samples to render",
             "--output %s",
-             &options.output_filepath,
+             &options.output_path,
             "File path to write output image",
             "--threads %d",
             &options.session_params.threads,
@@ -412,9 +422,12 @@ static void options_parse(int argc, const char **argv)
             "--height %d",
             &options.height,
             "Window height in pixel",
-             "--tile-size %d",
-             &options.session_params.tile_size,
-             "Tile size in pixels",
+             "--tile-width %d",
+             &options.session_params.tile_size.x,
+             "Tile width in pixels",
+             "--tile-height %d",
+             &options.session_params.tile_size.y,
+             "Tile height in pixels",
             "--list-devices",
             &list,
             "List information about all available devices",
@@ -476,9 +489,8 @@ static void options_parse(int argc, const char **argv)
  options.session_params.background = true;
 #endif

-  if (options.session_params.tile_size > 0) {
-    options.session_params.use_auto_tile = true;
-  }
+  /* Use progressive rendering */
+  options.session_params.progressive = true;

  /* find matching device */
  DeviceType device_type = Device::type_from_string(devicename.c_str());
--- a/intern/cycles/app/cycles_xml.cpp
+++ b/intern/cycles/app/cycles_xml.cpp
@@ -333,7 +333,6 @@ static void xml_read_shader_graph(XMLReadState &state, Shader *shader, xml_node
      }

      snode = (ShaderNode *)node_type->create(node_type);
-      snode->set_owner(graph);
    }

    xml_read_node(graph_reader, snode, node);
--- a/intern/cycles/app/oiio_output_driver.cpp
+++ b/intern/cycles/app/oiio_output_driver.cpp
@@ -1,71 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "app/oiio_output_driver.h"
-
-CCL_NAMESPACE_BEGIN
-
-OIIOOutputDriver::OIIOOutputDriver(const string_view filepath,
-                                   const string_view pass,
-                                   LogFunction log)
-    : filepath_(filepath), pass_(pass), log_(log)
-{
-}
-
-OIIOOutputDriver::~OIIOOutputDriver()
-{
-}
-
-void OIIOOutputDriver::write_render_tile(const Tile &tile)
-{
-  /* Only write the full buffer, no intermediate tiles. */
-  if (!(tile.size == tile.full_size)) {
-    return;
-  }
-
-  log_(string_printf("Writing image %s", filepath_.c_str()));
-
-  unique_ptr<ImageOutput> image_output(ImageOutput::create(filepath_));
-  if (image_output == nullptr) {
-    log_("Failed to create image file");
-    return;
-  }
-
-  const int width = tile.size.x;
-  const int height = tile.size.y;
-
-  ImageSpec spec(width, height, 4, TypeDesc::FLOAT);
-  if (!image_output->open(filepath_, spec)) {
-    log_("Failed to create image file");
-    return;
-  }
-
-  vector<float> pixels(width * height * 4);
-  if (!tile.get_pass_pixels(pass_, 4, pixels.data())) {
-    log_("Failed to read render pass pixels");
-    return;
-  }
-
-  /* Manipulate offset and stride to convert from bottom-up to top-down convention. */
-  image_output->write_image(TypeDesc::FLOAT,
-                            pixels.data() + (height - 1) * width * 4,
-                            AutoStride,
-                            -width * 4 * sizeof(float),
-                            AutoStride);
-  image_output->close();
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/app/oiio_output_driver.h
+++ b/intern/cycles/app/oiio_output_driver.h
@@ -1,42 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "render/output_driver.h"
-
-#include "util/util_function.h"
-#include "util/util_image.h"
-#include "util/util_string.h"
-#include "util/util_unique_ptr.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class OIIOOutputDriver : public OutputDriver {
- public:
-  typedef function<void(const string &)> LogFunction;
-
-  OIIOOutputDriver(const string_view filepath, const string_view pass, LogFunction log);
-  virtual ~OIIOOutputDriver();
-
-  void write_render_tile(const Tile &tile) override;
-
- protected:
-  string filepath_;
-  string pass_;
-  LogFunction log_;
-};
-
-CCL_NAMESPACE_END
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -31,14 +31,13 @@ set(INC_SYS
 set(SRC
  blender_camera.cpp
  blender_device.cpp
-  blender_display_driver.cpp
  blender_image.cpp
  blender_geometry.cpp
+  blender_gpu_display.cpp
  blender_light.cpp
  blender_mesh.cpp
  blender_object.cpp
  blender_object_cull.cpp
-  blender_output_driver.cpp
  blender_particles.cpp
  blender_curves.cpp
  blender_logging.cpp
@@ -52,11 +51,10 @@ set(SRC

  CCL_api.h
  blender_device.h
-  blender_display_driver.h
+  blender_gpu_display.h
  blender_id_map.h
  blender_image.h
  blender_object_cull.h
-  blender_output_driver.h
  blender_sync.h
  blender_session.h
  blender_texture.h
@@ -97,9 +95,6 @@ set(ADDON_FILES

 add_definitions(${GL_DEFINITIONS})

-if(WITH_CYCLES_DEVICE_HIP)
-  add_definitions(-DWITH_HIP)
-endif()
 if(WITH_MOD_FLUID)
  add_definitions(-DWITH_FLUID)
 endif()
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -28,7 +28,7 @@ def _configure_argument_parser():
                        action='store_true')
    parser.add_argument("--cycles-device",
                        help="Set the device to use for Cycles, overriding user preferences and the scene setting."
-                             "Valid options are 'CPU', 'CUDA', 'OPTIX', or 'HIP'"
+                             "Valid options are 'CPU', 'CUDA' or 'OPTIX'."
                             "Additionally, you can append '+CPU' to any GPU type for hybrid rendering.",
                        default=None)
    return parser
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -111,7 +111,6 @@ enum_device_type = (
    ('CPU', "CPU", "CPU", 0),
    ('CUDA', "CUDA", "CUDA", 1),
    ('OPTIX', "OptiX", "OptiX", 3),
-    ("HIP", "HIP", "HIP", 4)
 )

 enum_texture_limit = (
@@ -124,7 +123,7 @@ enum_texture_limit = (
    ('4096', "4096", "Limit texture size to 4096 pixels", 6),
    ('8192', "8192", "Limit texture size to 8192 pixels", 7),
 )
-
+ 
 # NOTE: Identifiers are expected to be an upper case version of identifiers from  `Pass::get_type_enum()`
 enum_view3d_shading_render_pass = (
    ('', "General", ""),
@@ -740,14 +739,14 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):

    use_auto_tile: BoolProperty(
        name="Auto Tiles",
-        description="Automatically render high resolution images in tiles to reduce memory usage, using the specified tile size. Tiles are cached to disk while rendering to save memory",
+        description="Automatically split image into tiles",
        default=True,
    )
    tile_size: IntProperty(
        name="Tile Size",
        default=2048,
        description="",
-        min=8, max=16384,
+        min=0, max=16384,
    )

    # Various fine-tuning debug flags
@@ -1267,16 +1266,12 @@ class CyclesPreferences(bpy.types.AddonPreferences):

    def get_device_types(self, context):
        import _cycles
-        has_cuda, has_optix, has_hip = _cycles.get_device_types()
-
+        has_cuda, has_optix = _cycles.get_device_types()
        list = [('NONE', "None", "Don't use compute device", 0)]
        if has_cuda:
            list.append(('CUDA', "CUDA", "Use CUDA for GPU acceleration", 1))
        if has_optix:
            list.append(('OPTIX', "OptiX", "Use OptiX for GPU acceleration", 3))
-        if has_hip:
-            list.append(('HIP', "HIP", "Use HIP for GPU acceleration", 4))
-
        return list

    compute_device_type: EnumProperty(
@@ -1301,7 +1296,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):

    def update_device_entries(self, device_list):
        for device in device_list:
-            if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP'}:
+            if not device[1] in {'CUDA', 'OPTIX', 'CPU'}:
                continue
            # Try to find existing Device entry
            entry = self.find_existing_device_entry(device)
@@ -1335,7 +1330,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
            elif entry.type == 'CPU':
                cpu_devices.append(entry)
        # Extend all GPU devices with CPU.
-        if compute_device_type != 'CPU' and compute_device_type != 'HIP':
+        if compute_device_type != 'CPU':
            devices.extend(cpu_devices)
        return devices

@@ -1345,7 +1340,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
        import _cycles
        # Ensure `self.devices` is not re-allocated when the second call to
        # get_devices_for_type is made, freeing items from the first list.
-        for device_type in ('CUDA', 'OPTIX', 'HIP'):
+        for device_type in ('CUDA', 'OPTIX', 'OPENCL'):
            self.update_device_entries(_cycles.available_devices(device_type))

    # Deprecated: use refresh_devices instead.
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -99,11 +99,6 @@ def use_cuda(context):
    return (get_device_type(context) == 'CUDA' and cscene.device == 'GPU')


-def use_hip(context):
-    cscene = context.scene.cycles
-
-    return (get_device_type(context) == 'HIP' and cscene.device == 'GPU')
-
 def use_optix(context):
    cscene = context.scene.cycles

@@ -618,8 +613,8 @@ class CYCLES_RENDER_PT_performance_threads(CyclesButtonsPanel, Panel):
        sub.prop(rd, "threads")


-class CYCLES_RENDER_PT_performance_memory(CyclesButtonsPanel, Panel):
-    bl_label = "Memory"
+class CYCLES_RENDER_PT_performance_tiles(CyclesButtonsPanel, Panel):
+    bl_label = "Tiles"
    bl_parent_id = "CYCLES_RENDER_PT_performance"

    def draw(self, context):
@@ -2112,7 +2107,7 @@ classes = (
    CYCLES_RENDER_PT_film_transparency,
    CYCLES_RENDER_PT_performance,
    CYCLES_RENDER_PT_performance_threads,
-    CYCLES_RENDER_PT_performance_memory,
+    CYCLES_RENDER_PT_performance_tiles,
    CYCLES_RENDER_PT_performance_acceleration_structure,
    CYCLES_RENDER_PT_performance_final_render,
    CYCLES_RENDER_PT_performance_viewport,
--- a/intern/cycles/blender/blender_curves.cpp
+++ b/intern/cycles/blender/blender_curves.cpp
@@ -283,13 +283,10 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
    return;

  Attribute *attr_intercept = NULL;
-  Attribute *attr_length = NULL;
  Attribute *attr_random = NULL;

  if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT))
    attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
-  if (hair->need_attribute(scene, ATTR_STD_CURVE_LENGTH))
-    attr_length = hair->attributes.add(ATTR_STD_CURVE_LENGTH);
  if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM))
    attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM);

@@ -339,10 +336,6 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
        num_curve_keys++;
      }

-      if (attr_length != NULL) {
-        attr_length->add(CData->curve_length[curve]);
-      }
-
      if (attr_random != NULL) {
        attr_random->add(hash_uint2_to_float(num_curves, 0));
      }
@@ -664,15 +657,11 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)

  /* Add requested attributes. */
  Attribute *attr_intercept = NULL;
-  Attribute *attr_length = NULL;
  Attribute *attr_random = NULL;

  if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT)) {
    attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
  }
-  if (hair->need_attribute(scene, ATTR_STD_CURVE_LENGTH)) {
-    attr_length = hair->attributes.add(ATTR_STD_CURVE_LENGTH);
-  }
  if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM)) {
    attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM);
  }
@@ -725,10 +714,6 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)
      }
    }

-    if (attr_length) {
-      attr_length->add(length);
-    }
-
    /* Random number per curve. */
    if (attr_random != NULL) {
      attr_random->add(hash_uint2_to_float(b_curve.index(), 0));
--- a/intern/cycles/blender/blender_device.cpp
+++ b/intern/cycles/blender/blender_device.cpp
@@ -26,7 +26,6 @@ enum ComputeDevice {
  COMPUTE_DEVICE_CPU = 0,
  COMPUTE_DEVICE_CUDA = 1,
  COMPUTE_DEVICE_OPTIX = 3,
-  COMPUTE_DEVICE_HIP = 4,

  COMPUTE_DEVICE_NUM
 };
@@ -82,9 +81,6 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
      else if (compute_device == COMPUTE_DEVICE_OPTIX) {
        mask |= DEVICE_MASK_OPTIX;
      }
-      else if (compute_device == COMPUTE_DEVICE_HIP) {
-        mask |= DEVICE_MASK_HIP;
-      }
      vector<DeviceInfo> devices = Device::available_devices(mask);

      /* Match device preferences and available devices. */
--- a/intern/cycles/blender/blender_geometry.cpp
+++ b/intern/cycles/blender/blender_geometry.cpp
@@ -80,11 +80,7 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
 {
  /* Test if we can instance or if the object is modified. */
  Geometry::Type geom_type = determine_geom_type(b_ob_info, use_particle_hair);
-  BL::ID b_key_id = (b_ob_info.is_real_object_data() &&
-                     BKE_object_is_modified(b_ob_info.real_object)) ?
-                        b_ob_info.real_object :
-                        b_ob_info.object_data;
-  GeometryKey key(b_key_id.ptr.data, geom_type);
+  GeometryKey key(b_ob_info.object_data, geom_type);

  /* Find shader indices. */
  array<Node *> used_shaders = find_used_shaders(b_ob_info.iter_object);
@@ -114,7 +110,7 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
  }
  else {
    /* Test if we need to update existing geometry. */
-    sync = geometry_map.update(geom, b_key_id);
+    sync = geometry_map.update(geom, b_ob_info.object_data);
  }

  if (!sync) {
--- a/intern/cycles/blender/blender_display_driver.cpp
+++ b/intern/cycles/blender/blender_display_driver.cpp
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

-#include "blender/blender_display_driver.h"
+#include "blender/blender_gpu_display.h"

 #include "device/device.h"
 #include "util/util_logging.h"
@@ -273,17 +273,17 @@ uint BlenderDisplaySpaceShader::get_shader_program()
 }

 /* --------------------------------------------------------------------
- * BlenderDisplayDriver.
+ * BlenderGPUDisplay.
 */

-BlenderDisplayDriver::BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene)
+BlenderGPUDisplay::BlenderGPUDisplay(BL::RenderEngine &b_engine, BL::Scene &b_scene)
    : b_engine_(b_engine), display_shader_(BlenderDisplayShader::create(b_engine, b_scene))
 {
  /* Create context while on the main thread. */
  gl_context_create();
 }

-BlenderDisplayDriver::~BlenderDisplayDriver()
+BlenderGPUDisplay::~BlenderGPUDisplay()
 {
  gl_resources_destroy();
 }
@@ -292,18 +292,19 @@ BlenderDisplayDriver::~BlenderDisplayDriver()
 * Update procedure.
 */

-bool BlenderDisplayDriver::update_begin(const Params &params,
+bool BlenderGPUDisplay::do_update_begin(const GPUDisplayParams &params,
                                        int texture_width,
                                        int texture_height)
 {
-  /* Note that it's the responsibility of BlenderDisplayDriver to ensure updating and drawing
+  /* Note that it's the responsibility of BlenderGPUDisplay to ensure updating and drawing
   * the texture does not happen at the same time. This is achieved indirectly.
   *
   * When enabling the OpenGL context, it uses an internal mutex lock DST.gl_context_lock.
   * This same lock is also held when do_draw() is called, which together ensure mutual
   * exclusion.
   *
-   * This locking is not performed on the Cycles side, because that would cause lock inversion. */
+   * This locking is not performed at the GPU display level, because that would cause lock
+   * inversion. */
  if (!gl_context_enable()) {
    return false;
  }
@@ -360,7 +361,7 @@ bool BlenderDisplayDriver::update_begin(const Params &params,
  return true;
 }

-void BlenderDisplayDriver::update_end()
+void BlenderGPUDisplay::do_update_end()
 {
  gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
  glFlush();
@@ -368,18 +369,54 @@ void BlenderDisplayDriver::update_end()
  gl_context_disable();
 }

+/* --------------------------------------------------------------------
+ * Texture update from CPU buffer.
+ */
+
+void BlenderGPUDisplay::do_copy_pixels_to_texture(
+    const half4 *rgba_pixels, int texture_x, int texture_y, int pixels_width, int pixels_height)
+{
+  /* This call copies pixels to a Pixel Buffer Object (PBO) which is much cheaper from CPU time
+   * point of view than to copy data directly to the OpenGL texture.
+   *
+   * The possible downside of this approach is that it might require a higher peak memory when
+   * doing partial updates of the texture (although, in practice even partial updates might peak
+   * with a full-frame buffer stored on the CPU if the GPU is currently occupied). */
+
+  half4 *mapped_rgba_pixels = map_texture_buffer();
+  if (!mapped_rgba_pixels) {
+    return;
+  }
+
+  if (texture_x == 0 && texture_y == 0 && pixels_width == texture_.width &&
+      pixels_height == texture_.height) {
+    const size_t size_in_bytes = sizeof(half4) * texture_.width * texture_.height;
+    memcpy(mapped_rgba_pixels, rgba_pixels, size_in_bytes);
+  }
+  else {
+    const half4 *rgba_row = rgba_pixels;
+    half4 *mapped_rgba_row = mapped_rgba_pixels + texture_y * texture_.width + texture_x;
+    for (int y = 0; y < pixels_height;
+         ++y, rgba_row += pixels_width, mapped_rgba_row += texture_.width) {
+      memcpy(mapped_rgba_row, rgba_row, sizeof(half4) * pixels_width);
+    }
+  }
+
+  unmap_texture_buffer();
+}
+
 /* --------------------------------------------------------------------
 * Texture buffer mapping.
 */

-half4 *BlenderDisplayDriver::map_texture_buffer()
+half4 *BlenderGPUDisplay::do_map_texture_buffer()
 {
  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);

  half4 *mapped_rgba_pixels = reinterpret_cast<half4 *>(
      glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY));
  if (!mapped_rgba_pixels) {
-    LOG(ERROR) << "Error mapping BlenderDisplayDriver pixel buffer object.";
+    LOG(ERROR) << "Error mapping BlenderGPUDisplay pixel buffer object.";
  }

  if (texture_.need_clear) {
@@ -394,7 +431,7 @@ half4 *BlenderDisplayDriver::map_texture_buffer()
  return mapped_rgba_pixels;
 }

-void BlenderDisplayDriver::unmap_texture_buffer()
+void BlenderGPUDisplay::do_unmap_texture_buffer()
 {
  glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);

@@ -405,9 +442,9 @@ void BlenderDisplayDriver::unmap_texture_buffer()
 * Graphics interoperability.
 */

-BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get()
+DeviceGraphicsInteropDestination BlenderGPUDisplay::do_graphics_interop_get()
 {
-  GraphicsInterop interop_dst;
+  DeviceGraphicsInteropDestination interop_dst;

  interop_dst.buffer_width = texture_.buffer_width;
  interop_dst.buffer_height = texture_.buffer_height;
@@ -419,12 +456,12 @@ BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get
  return interop_dst;
 }

-void BlenderDisplayDriver::graphics_interop_activate()
+void BlenderGPUDisplay::graphics_interop_activate()
 {
  gl_context_enable();
 }

-void BlenderDisplayDriver::graphics_interop_deactivate()
+void BlenderGPUDisplay::graphics_interop_deactivate()
 {
  gl_context_disable();
 }
@@ -433,21 +470,27 @@ void BlenderDisplayDriver::graphics_interop_deactivate()
 * Drawing.
 */

-void BlenderDisplayDriver::clear()
+void BlenderGPUDisplay::clear()
 {
  texture_.need_clear = true;
 }

-void BlenderDisplayDriver::set_zoom(float zoom_x, float zoom_y)
+void BlenderGPUDisplay::set_zoom(float zoom_x, float zoom_y)
 {
  zoom_ = make_float2(zoom_x, zoom_y);
 }

-void BlenderDisplayDriver::draw(const Params &params)
+void BlenderGPUDisplay::do_draw(const GPUDisplayParams &params)
 {
  /* See do_update_begin() for why no locking is required here. */
  const bool transparent = true;  // TODO(sergey): Derive this from Film.

+  if (texture_.need_clear) {
+    /* Texture is requested to be cleared and was not yet cleared.
+     * Do early return which should be equivalent of drawing all-zero texture. */
+    return;
+  }
+
  if (!gl_draw_resources_ensure()) {
    return;
  }
@@ -456,16 +499,6 @@ void BlenderDisplayDriver::draw(const Params &params)
    gl_context_mutex_.lock();
  }

-  if (texture_.need_clear) {
-    /* Texture is requested to be cleared and was not yet cleared.
-     *
-     * Do early return which should be equivalent of drawing all-zero texture.
-     * Watch out for the lock though so that the clear happening during update is properly
-     * synchronized here. */
-    gl_context_mutex_.unlock();
-    return;
-  }
-
  if (gl_upload_sync_) {
    glWaitSync((GLsync)gl_upload_sync_, 0, GL_TIMEOUT_IGNORED);
  }
@@ -491,7 +524,7 @@ void BlenderDisplayDriver::draw(const Params &params)
  const float zoomed_width = params.size.x * zoom_.x;
  const float zoomed_height = params.size.y * zoom_.y;
  if (texture_.width != params.size.x || texture_.height != params.size.y) {
-    /* Resolution divider is different from 1, force nearest interpolation. */
+    /* Resolution divider is different from 1, force enarest interpolation. */
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
  }
  else if (zoomed_width - params.size.x > 0.5f || zoomed_height - params.size.y > 0.5f) {
@@ -547,7 +580,7 @@ void BlenderDisplayDriver::draw(const Params &params)
  }
 }

-void BlenderDisplayDriver::gl_context_create()
+void BlenderGPUDisplay::gl_context_create()
 {
  /* When rendering in viewport there is no render context available via engine.
   * Check whether own context is to be created here.
@@ -576,7 +609,7 @@ void BlenderDisplayDriver::gl_context_create()
  }
 }

-bool BlenderDisplayDriver::gl_context_enable()
+bool BlenderGPUDisplay::gl_context_enable()
 {
  if (use_gl_context_) {
    if (!gl_context_) {
@@ -591,7 +624,7 @@ bool BlenderDisplayDriver::gl_context_enable()
  return true;
 }

-void BlenderDisplayDriver::gl_context_disable()
+void BlenderGPUDisplay::gl_context_disable()
 {
  if (use_gl_context_) {
    if (gl_context_) {
@@ -604,7 +637,7 @@ void BlenderDisplayDriver::gl_context_disable()
  RE_engine_render_context_disable(reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
 }

-void BlenderDisplayDriver::gl_context_dispose()
+void BlenderGPUDisplay::gl_context_dispose()
 {
  if (gl_context_) {
    const bool drw_state = DRW_opengl_context_release();
@@ -616,7 +649,7 @@ void BlenderDisplayDriver::gl_context_dispose()
  }
 }

-bool BlenderDisplayDriver::gl_draw_resources_ensure()
+bool BlenderGPUDisplay::gl_draw_resources_ensure()
 {
  if (!texture_.gl_id) {
    /* If there is no texture allocated, there is nothing to draw. Inform the draw call that it can
@@ -643,7 +676,7 @@ bool BlenderDisplayDriver::gl_draw_resources_ensure()
  return true;
 }

-void BlenderDisplayDriver::gl_resources_destroy()
+void BlenderGPUDisplay::gl_resources_destroy()
 {
  gl_context_enable();

@@ -666,7 +699,7 @@ void BlenderDisplayDriver::gl_resources_destroy()
  gl_context_dispose();
 }

-bool BlenderDisplayDriver::gl_texture_resources_ensure()
+bool BlenderGPUDisplay::gl_texture_resources_ensure()
 {
  if (texture_.creation_attempted) {
    return texture_.is_created;
@@ -703,7 +736,7 @@ bool BlenderDisplayDriver::gl_texture_resources_ensure()
  return true;
 }

-void BlenderDisplayDriver::texture_update_if_needed()
+void BlenderGPUDisplay::texture_update_if_needed()
 {
  if (!texture_.need_update) {
    return;
@@ -717,7 +750,7 @@ void BlenderDisplayDriver::texture_update_if_needed()
  texture_.need_update = false;
 }

-void BlenderDisplayDriver::vertex_buffer_update(const Params &params)
+void BlenderGPUDisplay::vertex_buffer_update(const GPUDisplayParams &params)
 {
  /* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be
   * rendered. */
@@ -730,23 +763,23 @@ void BlenderDisplayDriver::vertex_buffer_update(const Params &params)

  vpointer[0] = 0.0f;
  vpointer[1] = 0.0f;
-  vpointer[2] = params.full_offset.x;
-  vpointer[3] = params.full_offset.y;
+  vpointer[2] = params.offset.x;
+  vpointer[3] = params.offset.y;

  vpointer[4] = 1.0f;
  vpointer[5] = 0.0f;
-  vpointer[6] = (float)params.size.x + params.full_offset.x;
-  vpointer[7] = params.full_offset.y;
+  vpointer[6] = (float)params.size.x + params.offset.x;
+  vpointer[7] = params.offset.y;

  vpointer[8] = 1.0f;
  vpointer[9] = 1.0f;
-  vpointer[10] = (float)params.size.x + params.full_offset.x;
-  vpointer[11] = (float)params.size.y + params.full_offset.y;
+  vpointer[10] = (float)params.size.x + params.offset.x;
+  vpointer[11] = (float)params.size.y + params.offset.y;

  vpointer[12] = 0.0f;
  vpointer[13] = 1.0f;
-  vpointer[14] = params.full_offset.x;
-  vpointer[15] = (float)params.size.y + params.full_offset.y;
+  vpointer[14] = params.offset.x;
+  vpointer[15] = (float)params.size.y + params.offset.y;

  glUnmapBuffer(GL_ARRAY_BUFFER);
 }
--- a/intern/cycles/blender/blender_display_driver.h
+++ b/intern/cycles/blender/blender_display_driver.h
@@ -22,14 +22,12 @@

 #include "RNA_blender_cpp.h"

-#include "render/display_driver.h"
-
-#include "util/util_thread.h"
+#include "render/gpu_display.h"
 #include "util/util_unique_ptr.h"

 CCL_NAMESPACE_BEGIN

-/* Base class of shader used for display driver rendering. */
+/* Base class of shader used for GPU display rendering. */
 class BlenderDisplayShader {
 public:
  static constexpr const char *position_attribute_name = "pos";
@@ -98,11 +96,11 @@ class BlenderDisplaySpaceShader : public BlenderDisplayShader {
  uint shader_program_ = 0;
 };

-/* Display driver implementation which is specific for Blender viewport integration. */
-class BlenderDisplayDriver : public DisplayDriver {
+/* GPU display implementation which is specific for Blender viewport integration. */
+class BlenderGPUDisplay : public GPUDisplay {
 public:
-  BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene);
-  ~BlenderDisplayDriver();
+  BlenderGPUDisplay(BL::RenderEngine &b_engine, BL::Scene &b_scene);
+  ~BlenderGPUDisplay();

  virtual void graphics_interop_activate() override;
  virtual void graphics_interop_deactivate() override;
@@ -112,15 +110,22 @@ class BlenderDisplayDriver : public DisplayDriver {
  void set_zoom(float zoom_x, float zoom_y);

 protected:
-  virtual bool update_begin(const Params &params, int texture_width, int texture_height) override;
-  virtual void update_end() override;
+  virtual bool do_update_begin(const GPUDisplayParams &params,
+                               int texture_width,
+                               int texture_height) override;
+  virtual void do_update_end() override;

-  virtual half4 *map_texture_buffer() override;
-  virtual void unmap_texture_buffer() override;
+  virtual void do_copy_pixels_to_texture(const half4 *rgba_pixels,
+                                         int texture_x,
+                                         int texture_y,
+                                         int pixels_width,
+                                         int pixels_height) override;
+  virtual void do_draw(const GPUDisplayParams &params) override;

-  virtual GraphicsInterop graphics_interop_get() override;
+  virtual half4 *do_map_texture_buffer() override;
+  virtual void do_unmap_texture_buffer() override;

-  virtual void draw(const Params &params) override;
+  virtual DeviceGraphicsInteropDestination do_graphics_interop_get() override;

  /* Helper function which allocates new GPU context. */
  void gl_context_create();
@@ -147,13 +152,13 @@ class BlenderDisplayDriver : public DisplayDriver {
   * This buffer is used to render texture in the viewport.
   *
   * NOTE: The buffer needs to be bound. */
-  void vertex_buffer_update(const Params &params);
+  void vertex_buffer_update(const GPUDisplayParams &params);

  BL::RenderEngine b_engine_;

  /* OpenGL context which is used the render engine doesn't have its own. */
  void *gl_context_ = nullptr;
-  /* The when Blender RenderEngine side context is not available and the DisplayDriver is to create
+  /* The when Blender RenderEngine side context is not available and the GPUDisplay is to create
   * its own context. */
  bool use_gl_context_ = false;
  /* Mutex used to guard the `gl_context_`. */
--- a/intern/cycles/blender/blender_output_driver.cpp
+++ b/intern/cycles/blender/blender_output_driver.cpp
@@ -1,127 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "blender/blender_output_driver.h"
-
-CCL_NAMESPACE_BEGIN
-
-BlenderOutputDriver::BlenderOutputDriver(BL::RenderEngine &b_engine) : b_engine_(b_engine)
-{
-}
-
-BlenderOutputDriver::~BlenderOutputDriver()
-{
-}
-
-bool BlenderOutputDriver::read_render_tile(const Tile &tile)
-{
-  /* Get render result. */
-  BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
-                                                 tile.offset.y,
-                                                 tile.size.x,
-                                                 tile.size.y,
-                                                 tile.layer.c_str(),
-                                                 tile.view.c_str());
-
-  /* Can happen if the intersected rectangle gives 0 width or height. */
-  if (b_rr.ptr.data == NULL) {
-    return false;
-  }
-
-  BL::RenderResult::layers_iterator b_single_rlay;
-  b_rr.layers.begin(b_single_rlay);
-
-  /* layer will be missing if it was disabled in the UI */
-  if (b_single_rlay == b_rr.layers.end()) {
-    return false;
-  }
-
-  BL::RenderLayer b_rlay = *b_single_rlay;
-
-  vector<float> pixels(tile.size.x * tile.size.y * 4);
-
-  /* Copy each pass.
-   * TODO:copy only the required ones for better performance? */
-  for (BL::RenderPass &b_pass : b_rlay.passes) {
-    tile.set_pass_pixels(b_pass.name(), b_pass.channels(), (float *)b_pass.rect());
-  }
-
-  b_engine_.end_result(b_rr, false, false, false);
-
-  return true;
-}
-
-bool BlenderOutputDriver::update_render_tile(const Tile &tile)
-{
-  /* Use final write for preview renders, otherwise render result wouldn't be be updated
-   * quickly on Blender side. For all other cases we use the display driver. */
-  if (b_engine_.is_preview()) {
-    write_render_tile(tile);
-    return true;
-  }
-  else {
-    /* Don't highlight full-frame tile. */
-    if (!(tile.size == tile.full_size)) {
-      b_engine_.tile_highlight_clear_all();
-      b_engine_.tile_highlight_set(tile.offset.x, tile.offset.y, tile.size.x, tile.size.y, true);
-    }
-
-    return false;
-  }
-}
-
-void BlenderOutputDriver::write_render_tile(const Tile &tile)
-{
-  b_engine_.tile_highlight_clear_all();
-
-  /* Get render result. */
-  BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
-                                                 tile.offset.y,
-                                                 tile.size.x,
-                                                 tile.size.y,
-                                                 tile.layer.c_str(),
-                                                 tile.view.c_str());
-
-  /* Can happen if the intersected rectangle gives 0 width or height. */
-  if (b_rr.ptr.data == NULL) {
-    return;
-  }
-
-  BL::RenderResult::layers_iterator b_single_rlay;
-  b_rr.layers.begin(b_single_rlay);
-
-  /* Layer will be missing if it was disabled in the UI. */
-  if (b_single_rlay == b_rr.layers.end()) {
-    return;
-  }
-
-  BL::RenderLayer b_rlay = *b_single_rlay;
-
-  vector<float> pixels(tile.size.x * tile.size.y * 4);
-
-  /* Copy each pass. */
-  for (BL::RenderPass &b_pass : b_rlay.passes) {
-    if (!tile.get_pass_pixels(b_pass.name(), b_pass.channels(), &pixels[0])) {
-      memset(&pixels[0], 0, pixels.size() * sizeof(float));
-    }
-
-    b_pass.rect(&pixels[0]);
-  }
-
-  b_engine_.end_result(b_rr, true, false, true);
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/blender/blender_output_driver.h
+++ b/intern/cycles/blender/blender_output_driver.h
@@ -1,40 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "MEM_guardedalloc.h"
-
-#include "RNA_blender_cpp.h"
-
-#include "render/output_driver.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BlenderOutputDriver : public OutputDriver {
- public:
-  BlenderOutputDriver(BL::RenderEngine &b_engine);
-  ~BlenderOutputDriver();
-
-  virtual void write_render_tile(const Tile &tile) override;
-  virtual bool update_render_tile(const Tile &tile) override;
-  virtual bool read_render_tile(const Tile &tile) override;
-
- protected:
-  BL::RenderEngine b_engine_;
-};
-
-CCL_NAMESPACE_END
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -911,16 +911,14 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
 static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
 {
  vector<DeviceType> device_types = Device::available_types();
-  bool has_cuda = false, has_optix = false, has_hip = false;
+  bool has_cuda = false, has_optix = false;
  foreach (DeviceType device_type, device_types) {
    has_cuda |= (device_type == DEVICE_CUDA);
    has_optix |= (device_type == DEVICE_OPTIX);
-    has_hip |= (device_type == DEVICE_HIP);
  }
-  PyObject *list = PyTuple_New(3);
+  PyObject *list = PyTuple_New(2);
  PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
  PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
-  PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
  return list;
 }

@@ -946,9 +944,6 @@ static PyObject *set_device_override_func(PyObject * /*self*/, PyObject *arg)
  else if (override == "OPTIX") {
    BlenderSession::device_override = DEVICE_MASK_OPTIX;
  }
-  else if (override == "HIP") {
-    BlenderSession::device_override = DEVICE_MASK_HIP;
-  }
  else {
    printf("\nError: %s is not a valid Cycles device.\n", override.c_str());
    Py_RETURN_FALSE;
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -42,8 +42,7 @@
 #include "util/util_progress.h"
 #include "util/util_time.h"

-#include "blender/blender_display_driver.h"
-#include "blender/blender_output_driver.h"
+#include "blender/blender_gpu_display.h"
 #include "blender/blender_session.h"
 #include "blender/blender_sync.h"
 #include "blender/blender_util.h"
@@ -72,8 +71,7 @@ BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
      width(0),
      height(0),
      preview_osl(preview_osl),
-      python_thread_state(NULL),
-      use_developer_ui(false)
+      python_thread_state(NULL)
 {
  /* offline render */
  background = true;
@@ -158,13 +156,11 @@ void BlenderSession::create_session()
      b_v3d, b_rv3d, scene->camera, width, height);
  session->reset(session_params, buffer_params);

-  /* Create GPU display.
-   * TODO(sergey): Investigate whether DisplayDriver can be used for the preview as well. */
+  /* Create GPU display. */
  if (!b_engine.is_preview() && !headless) {
-    unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(b_engine,
-                                                                                        b_scene);
-    display_driver_ = display_driver.get();
-    session->set_display_driver(move(display_driver));
+    unique_ptr<BlenderGPUDisplay> gpu_display = make_unique<BlenderGPUDisplay>(b_engine, b_scene);
+    gpu_display_ = gpu_display.get();
+    session->set_gpu_display(move(gpu_display));
  }

  /* Viewport and preview (as in, material preview) does not do tiled rendering, so can inform
@@ -281,6 +277,94 @@ void BlenderSession::free_session()
  session = nullptr;
 }

+void BlenderSession::read_render_tile()
+{
+  const int2 tile_offset = session->get_render_tile_offset();
+  const int2 tile_size = session->get_render_tile_size();
+
+  /* get render result */
+  BL::RenderResult b_rr = b_engine.begin_result(tile_offset.x,
+                                                tile_offset.y,
+                                                tile_size.x,
+                                                tile_size.y,
+                                                b_rlay_name.c_str(),
+                                                b_rview_name.c_str());
+
+  /* can happen if the intersected rectangle gives 0 width or height */
+  if (b_rr.ptr.data == NULL) {
+    return;
+  }
+
+  BL::RenderResult::layers_iterator b_single_rlay;
+  b_rr.layers.begin(b_single_rlay);
+
+  /* layer will be missing if it was disabled in the UI */
+  if (b_single_rlay == b_rr.layers.end())
+    return;
+
+  BL::RenderLayer b_rlay = *b_single_rlay;
+
+  vector<float> pixels(tile_size.x * tile_size.y * 4);
+
+  /* Copy each pass.
+   * TODO:copy only the required ones for better performance? */
+  for (BL::RenderPass &b_pass : b_rlay.passes) {
+    session->set_render_tile_pixels(b_pass.name(), b_pass.channels(), (float *)b_pass.rect());
+  }
+}
+
+void BlenderSession::write_render_tile()
+{
+  const int2 tile_offset = session->get_render_tile_offset();
+  const int2 tile_size = session->get_render_tile_size();
+
+  const string_view render_layer_name = session->get_render_tile_layer();
+  const string_view render_view_name = session->get_render_tile_view();
+
+  b_engine.tile_highlight_clear_all();
+
+  /* get render result */
+  BL::RenderResult b_rr = b_engine.begin_result(tile_offset.x,
+                                                tile_offset.y,
+                                                tile_size.x,
+                                                tile_size.y,
+                                                render_layer_name.c_str(),
+                                                render_view_name.c_str());
+
+  /* can happen if the intersected rectangle gives 0 width or height */
+  if (b_rr.ptr.data == NULL) {
+    return;
+  }
+
+  BL::RenderResult::layers_iterator b_single_rlay;
+  b_rr.layers.begin(b_single_rlay);
+
+  /* layer will be missing if it was disabled in the UI */
+  if (b_single_rlay == b_rr.layers.end()) {
+    return;
+  }
+
+  BL::RenderLayer b_rlay = *b_single_rlay;
+
+  write_render_result(b_rlay);
+
+  b_engine.end_result(b_rr, true, false, true);
+}
+
+void BlenderSession::update_render_tile()
+{
+  if (!session->has_multiple_render_tiles()) {
+    /* Don't highlight full-frame tile. */
+    return;
+  }
+
+  const int2 tile_offset = session->get_render_tile_offset();
+  const int2 tile_size = session->get_render_tile_size();
+
+  b_engine.tile_highlight_clear_all();
+  b_engine.tile_highlight_set(tile_offset.x, tile_offset.y, tile_size.x, tile_size.y, true);
+}
+
 void BlenderSession::full_buffer_written(string_view filename)
 {
  full_buffer_files_.emplace_back(filename);
@@ -354,8 +438,18 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
    return;
  }

-  /* Create driver to write out render results. */
-  session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
+  /* set callback to write out render results */
+  session->write_render_tile_cb = [&]() { write_render_tile(); };
+
+  /* Use final write for preview renders, otherwise render result wouldn't be be updated on Blender
+   * side. */
+  /* TODO(sergey): Investigate whether GPUDisplay can be used for the preview as well. */
+  if (b_engine.is_preview()) {
+    session->update_render_tile_cb = [&]() { write_render_tile(); };
+  }
+  else {
+    session->update_render_tile_cb = [&]() { update_render_tile(); };
+  }

  session->full_buffer_written_cb = [&](string_view filename) { full_buffer_written(filename); };

@@ -463,11 +557,6 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
  /* free result without merging */
  b_engine.end_result(b_rr, true, false, false);

-  /* When tiled rendering is used there will be no "write" done for the tile. Forcefully clear
-   * highlighted tiles now, so that the highlight will be removed while processing full frame from
-   * file. */
-  b_engine.tile_highlight_clear_all();
-
  double total_time, render_time;
  session->progress.get_time(total_time, render_time);
  VLOG(1) << "Total render time: " << total_time;
@@ -492,17 +581,12 @@ void BlenderSession::render_frame_finish()

  for (string_view filename : full_buffer_files_) {
    session->process_full_buffer_from_disk(filename);
-    if (check_and_report_session_error()) {
-      break;
-    }
-  }
-
-  for (string_view filename : full_buffer_files_) {
    path_remove(filename);
  }

-  /* Clear driver. */
-  session->set_output_driver(nullptr);
+  /* clear callback */
+  session->write_render_tile_cb = function_null;
+  session->update_render_tile_cb = function_null;
  session->full_buffer_written_cb = function_null;
 }

@@ -608,8 +692,9 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
  pass->set_type(bake_type_to_pass(bake_type, bake_filter));
  pass->set_include_albedo((bake_filter & BL::BakeSettings::pass_filter_COLOR));

-  session->set_display_driver(nullptr);
-  session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
+  session->read_render_tile_cb = [&]() { read_render_tile(); };
+  session->write_render_tile_cb = [&]() { write_render_tile(); };
+  session->set_gpu_display(nullptr);

  if (!session->progress.get_cancel()) {
    /* Sync scene. */
@@ -652,7 +737,43 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
    session->wait();
  }

-  session->set_output_driver(nullptr);
+  session->read_render_tile_cb = function_null;
+  session->write_render_tile_cb = function_null;
+}
+
+void BlenderSession::write_render_result(BL::RenderLayer &b_rlay)
+{
+  if (!session->copy_render_tile_from_device()) {
+    return;
+  }
+
+  const int2 tile_size = session->get_render_tile_size();
+  vector<float> pixels(tile_size.x * tile_size.y * 4);
+
+  /* Copy each pass. */
+  for (BL::RenderPass &b_pass : b_rlay.passes) {
+    if (!session->get_render_tile_pixels(b_pass.name(), b_pass.channels(), &pixels[0])) {
+      memset(&pixels[0], 0, pixels.size() * sizeof(float));
+    }
+
+    b_pass.rect(&pixels[0]);
+  }
+}
+
+void BlenderSession::update_render_result(BL::RenderLayer &b_rlay)
+{
+  if (!session->copy_render_tile_from_device()) {
+    return;
+  }
+
+  const int2 tile_size = session->get_render_tile_size();
+  vector<float> pixels(tile_size.x * tile_size.y * 4);
+
+  /* Copy combined pass. */
+  BL::RenderPass b_combined_pass(b_rlay.passes.find_by_name("Combined", b_rview_name.c_str()));
+  if (session->get_render_tile_pixels("Combined", b_combined_pass.channels(), &pixels[0])) {
+    b_combined_pass.rect(&pixels[0]);
+  }
 }

 void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
@@ -760,7 +881,7 @@ void BlenderSession::draw(BL::SpaceImageEditor &space_image)
  }

  BL::Array<float, 2> zoom = space_image.zoom();
-  display_driver_->set_zoom(zoom[0], zoom[1]);
+  gpu_display_->set_zoom(zoom[0], zoom[1]);

  session->draw();
 }
@@ -867,9 +988,8 @@ void BlenderSession::update_status_progress()
  get_status(status, substatus);
  get_progress(progress, total_time, render_time);

-  if (progress > 0) {
-    remaining_time = session->get_estimated_remaining_time();
-  }
+  if (progress > 0)
+    remaining_time = (1.0 - (double)progress) * (render_time / (double)progress);

  if (background) {
    if (scene)
@@ -907,27 +1027,20 @@ void BlenderSession::update_status_progress()
    last_progress = progress;
  }

-  check_and_report_session_error();
-}
-
-bool BlenderSession::check_and_report_session_error()
-{
-  if (!session->progress.get_error()) {
-    return false;
+  if (session->progress.get_error()) {
+    string error = session->progress.get_error_message();
+    if (error != last_error) {
+      /* TODO(sergey): Currently C++ RNA API doesn't let us to
+       * use mnemonic name for the variable. Would be nice to
+       * have this figured out.
+       *
+       * For until then, 1 << 5 means RPT_ERROR.
+       */
+      b_engine.report(1 << 5, error.c_str());
+      b_engine.error_set(error.c_str());
+      last_error = error;
+    }
  }
-
-  const string error = session->progress.get_error_message();
-  if (error != last_error) {
-    /* TODO(sergey): Currently C++ RNA API doesn't let us to use mnemonic name for the variable.
-     * Would be nice to have this figured out.
-     *
-     * For until then, 1 << 5 means RPT_ERROR. */
-    b_engine.report(1 << 5, error.c_str());
-    b_engine.error_set(error.c_str());
-    last_error = error;
-  }
-
-  return true;
 }

 void BlenderSession::tag_update()
--- a/intern/cycles/blender/blender_session.h
+++ b/intern/cycles/blender/blender_session.h
@@ -29,7 +29,7 @@

 CCL_NAMESPACE_BEGIN

-class BlenderDisplayDriver;
+class BlenderGPUDisplay;
 class BlenderSync;
 class ImageMetaData;
 class Scene;
@@ -70,7 +70,20 @@ class BlenderSession {
            const int bake_width,
            const int bake_height);

+  void write_render_result(BL::RenderLayer &b_rlay);
+  void write_render_tile();
+
+  void update_render_tile();
+
  void full_buffer_written(string_view filename);
+
+  /* update functions are used to update display buffer only after sample was rendered
+   * only needed for better visual feedback */
+  void update_render_result(BL::RenderLayer &b_rlay);
+
+  /* read functions for baking input */
+  void read_render_tile();
+
  /* interactive updates */
  void synchronize(BL::Depsgraph &b_depsgraph);

@@ -97,7 +110,8 @@ class BlenderSession {
  BL::RenderSettings b_render;
  BL::Depsgraph b_depsgraph;
  /* NOTE: Blender's scene might become invalid after call
-   * #free_blender_memory_if_possible(). */
+   * free_blender_memory_if_possible().
+   */
  BL::Scene b_scene;
  BL::SpaceView3D b_v3d;
  BL::RegionView3D b_rv3d;
@@ -133,11 +147,6 @@ class BlenderSession {
 protected:
  void stamp_view_layer_metadata(Scene *scene, const string &view_layer_name);

-  /* Check whether session error happened.
-   * If so, it is reported to the render engine and true is returned.
-   * Otherwise false is returned. */
-  bool check_and_report_session_error();
-
  void builtin_images_load();

  /* Is used after each render layer synchronization is done with the goal
@@ -151,8 +160,8 @@ class BlenderSession {
    int last_pass_index = -1;
  } draw_state_;

-  /* NOTE: The BlenderSession references the display driver. */
-  BlenderDisplayDriver *display_driver_ = nullptr;
+  /* NOTE: The BlenderSession references the GPU display. */
+  BlenderGPUDisplay *gpu_display_ = nullptr;

  vector<string> full_buffer_files_;
 };
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -279,7 +279,7 @@ static ShaderNode *add_node(Scene *scene,
    array<float3> curve_mapping_curves;
    float min_x, max_x;
    curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, true);
-    curvemapping_minmax(mapping, 4, &min_x, &max_x);
+    curvemapping_minmax(mapping, true, &min_x, &max_x);
    curves->set_min_x(min_x);
    curves->set_max_x(max_x);
    curves->set_curves(curve_mapping_curves);
@@ -292,25 +292,12 @@ static ShaderNode *add_node(Scene *scene,
    array<float3> curve_mapping_curves;
    float min_x, max_x;
    curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, false);
-    curvemapping_minmax(mapping, 3, &min_x, &max_x);
+    curvemapping_minmax(mapping, false, &min_x, &max_x);
    curves->set_min_x(min_x);
    curves->set_max_x(max_x);
    curves->set_curves(curve_mapping_curves);
    node = curves;
  }
-  else if (b_node.is_a(&RNA_ShaderNodeFloatCurve)) {
-    BL::ShaderNodeFloatCurve b_curve_node(b_node);
-    BL::CurveMapping mapping(b_curve_node.mapping());
-    FloatCurveNode *curve = graph->create_node<FloatCurveNode>();
-    array<float> curve_mapping_curve;
-    float min_x, max_x;
-    curvemapping_float_to_array(mapping, curve_mapping_curve, RAMP_TABLE_SIZE);
-    curvemapping_minmax(mapping, 1, &min_x, &max_x);
-    curve->set_min_x(min_x);
-    curve->set_max_x(max_x);
-    curve->set_curve(curve_mapping_curve);
-    node = curve;
-  }
  else if (b_node.is_a(&RNA_ShaderNodeValToRGB)) {
    RGBRampNode *ramp = graph->create_node<RGBRampNode>();
    BL::ShaderNodeValToRGB b_ramp_node(b_node);
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -855,7 +855,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,

  if (background) {
    params.use_auto_tile = RNA_boolean_get(&cscene, "use_auto_tile");
-    params.tile_size = max(get_int(cscene, "tile_size"), 8);
+    params.tile_size = get_int(cscene, "tile_size");
  }
  else {
    params.use_auto_tile = false;
--- a/intern/cycles/blender/blender_util.h
+++ b/intern/cycles/blender/blender_util.h
@@ -90,27 +90,26 @@ static inline BL::Mesh object_to_mesh(BL::BlendData & /*data*/,
  }
 #endif

-  BL::Mesh mesh = (b_ob_info.object_data.is_a(&RNA_Mesh)) ? BL::Mesh(b_ob_info.object_data) :
-                                                            BL::Mesh(PointerRNA_NULL);
+  BL::Mesh mesh(PointerRNA_NULL);
+  if (b_ob_info.object_data.is_a(&RNA_Mesh)) {
+    /* TODO: calc_undeformed is not used. */
+    mesh = BL::Mesh(b_ob_info.object_data);

-  if (b_ob_info.is_real_object_data()) {
-    if (mesh) {
-      /* Make a copy to split faces if we use autosmooth, otherwise not needed.
-       * Also in edit mode do we need to make a copy, to ensure data layers like
-       * UV are not empty. */
-      if (mesh.is_editmode() ||
-          (mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
-        BL::Depsgraph depsgraph(PointerRNA_NULL);
-        mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
-      }
-    }
-    else {
+    /* Make a copy to split faces if we use autosmooth, otherwise not needed.
+     * Also in edit mode do we need to make a copy, to ensure data layers like
+     * UV are not empty. */
+    if (mesh.is_editmode() ||
+        (mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
      BL::Depsgraph depsgraph(PointerRNA_NULL);
+      assert(b_ob_info.is_real_object_data());
      mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
    }
  }
  else {
-    /* TODO: what to do about non-mesh geometry instances? */
+    BL::Depsgraph depsgraph(PointerRNA_NULL);
+    if (b_ob_info.is_real_object_data()) {
+      mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
+    }
  }

 #if 0
@@ -171,11 +170,12 @@ static inline void curvemap_minmax_curve(/*const*/ BL::CurveMap &curve, float *m
 }

 static inline void curvemapping_minmax(/*const*/ BL::CurveMapping &cumap,
-                                       int num_curves,
+                                       bool rgb_curve,
                                       float *min_x,
                                       float *max_x)
 {
  // const int num_curves = cumap.curves.length(); /* Gives linking error so far. */
+  const int num_curves = rgb_curve ? 4 : 3;
  *min_x = FLT_MAX;
  *max_x = -FLT_MAX;
  for (int i = 0; i < num_curves; ++i) {
@@ -195,28 +195,6 @@ static inline void curvemapping_to_array(BL::CurveMapping &cumap, array<float> &
  }
 }

-static inline void curvemapping_float_to_array(BL::CurveMapping &cumap,
-                                               array<float> &data,
-                                               int size)
-{
-  float min = 0.0f, max = 1.0f;
-
-  curvemapping_minmax(cumap, 1, &min, &max);
-
-  const float range = max - min;
-
-  cumap.update();
-
-  BL::CurveMap map = cumap.curves[0];
-
-  data.resize(size);
-
-  for (int i = 0; i < size; i++) {
-    float t = min + (float)i / (float)(size - 1) * range;
-    data[i] = cumap.evaluate(map, t);
-  }
-}
-
 static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
                                               array<float3> &data,
                                               int size,
@@ -235,8 +213,7 @@ static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
   *
   * There might be some better estimations here tho.
   */
-  const int num_curves = rgb_curve ? 4 : 3;
-  curvemapping_minmax(cumap, num_curves, &min_x, &max_x);
+  curvemapping_minmax(cumap, rgb_curve, &min_x, &max_x);

  const float range_x = max_x - min_x;

--- a/intern/cycles/bvh/bvh_embree.cpp
+++ b/intern/cycles/bvh/bvh_embree.cpp
@@ -213,7 +213,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
      if (ctx->num_hits < ctx->max_hits) {
        Intersection current_isect;
        kernel_embree_convert_hit(kg, ray, hit, &current_isect);
-        for (size_t i = 0; i < ctx->num_hits; ++i) {
+        for (size_t i = 0; i < ctx->max_hits; ++i) {
          if (current_isect.object == ctx->isect_s[i].object &&
              current_isect.prim == ctx->isect_s[i].prim && current_isect.t == ctx->isect_s[i].t) {
            /* This intersection was already recorded, skip it. */
--- a/intern/cycles/cmake/external_libs.cmake
+++ b/intern/cycles/cmake/external_libs.cmake
@@ -532,13 +532,4 @@ if(WITH_CYCLES_CUDA_BINARIES OR NOT WITH_CUDA_DYNLOAD)
  endif()
 endif()

-
-###########################################################################
-# HIP
-###########################################################################
-
-if(NOT WITH_HIP_DYNLOAD)
-  set(WITH_HIP_DYNLOAD ON)
-endif()
-
 unset(_cycles_lib_dir)
--- a/intern/cycles/cmake/macros.cmake
+++ b/intern/cycles/cmake/macros.cmake
@@ -156,16 +156,10 @@ macro(cycles_target_link_libraries target)
    ${PLATFORM_LINKLIBS}
  )

-  if(WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX)
-    if(WITH_CUDA_DYNLOAD)
-      target_link_libraries(${target} extern_cuew)
-    else()
-      target_link_libraries(${target} ${CUDA_CUDA_LIBRARY})
-    endif()
-  endif()
-
-  if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
-    target_link_libraries(${target} extern_hipew)
+  if(WITH_CUDA_DYNLOAD)
+    target_link_libraries(${target} extern_cuew)
+  else()
+    target_link_libraries(${target} ${CUDA_CUDA_LIBRARY})
  endif()

  if(CYCLES_STANDALONE_REPOSITORY)
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -22,25 +22,16 @@ set(INC_SYS
  ../../../extern/clew/include
 )

-if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
-  if(WITH_CUDA_DYNLOAD)
-    list(APPEND INC
-      ../../../extern/cuew/include
-    )
-    add_definitions(-DWITH_CUDA_DYNLOAD)
-  else()
-    list(APPEND INC_SYS
-      ${CUDA_TOOLKIT_INCLUDE}
-    )
-    add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
-  endif()
-endif()
-
-if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
+if(WITH_CUDA_DYNLOAD)
  list(APPEND INC
-    ../../../extern/hipew/include
+    ../../../extern/cuew/include
  )
-  add_definitions(-DWITH_HIP_DYNLOAD)
+  add_definitions(-DWITH_CUDA_DYNLOAD)
+else()
+  list(APPEND INC_SYS
+    ${CUDA_TOOLKIT_INCLUDE}
+  )
+  add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
 endif()

 set(SRC
@@ -79,21 +70,6 @@ set(SRC_CUDA
  cuda/util.h
 )

-set(SRC_HIP
-  hip/device.cpp
-  hip/device.h
-  hip/device_impl.cpp
-  hip/device_impl.h
-  hip/graphics_interop.cpp
-  hip/graphics_interop.h
-  hip/kernel.cpp
-  hip/kernel.h
-  hip/queue.cpp
-  hip/queue.h
-  hip/util.cpp
-  hip/util.h
-)
-
 set(SRC_DUMMY
  dummy/device.cpp
  dummy/device.h
@@ -129,21 +105,13 @@ set(LIB
  ${CYCLES_GL_LIBRARIES}
 )

-if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
-  if(WITH_CUDA_DYNLOAD)
-    list(APPEND LIB
-      extern_cuew
-    )
-  else()
-    list(APPEND LIB
-      ${CUDA_CUDA_LIBRARY}
-    )
-  endif()
-endif()
-
-if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
+if(WITH_CUDA_DYNLOAD)
  list(APPEND LIB
-    extern_hipew
+    extern_cuew
+  )
+else()
+  list(APPEND LIB
+    ${CUDA_CUDA_LIBRARY}
  )
 endif()

@@ -152,9 +120,6 @@ add_definitions(${GL_DEFINITIONS})
 if(WITH_CYCLES_DEVICE_CUDA)
  add_definitions(-DWITH_CUDA)
 endif()
-if(WITH_CYCLES_DEVICE_HIP)
-  add_definitions(-DWITH_HIP)
-endif()
 if(WITH_CYCLES_DEVICE_OPTIX)
  add_definitions(-DWITH_OPTIX)
 endif()
@@ -175,7 +140,6 @@ cycles_add_library(cycles_device "${LIB}"
  ${SRC}
  ${SRC_CPU}
  ${SRC_CUDA}
-  ${SRC_HIP}
  ${SRC_DUMMY}
  ${SRC_MULTI}
  ${SRC_OPTIX}
--- a/intern/cycles/device/cpu/device_impl.cpp
+++ b/intern/cycles/device/cpu/device_impl.cpp
@@ -54,6 +54,7 @@
 #include "util/util_function.h"
 #include "util/util_logging.h"
 #include "util/util_map.h"
+#include "util/util_opengl.h"
 #include "util/util_openimagedenoise.h"
 #include "util/util_optimization.h"
 #include "util/util_progress.h"
@@ -169,7 +170,7 @@ void CPUDevice::mem_copy_to(device_memory &mem)
 }

 void CPUDevice::mem_copy_from(
-    device_memory & /*mem*/, size_t /*y*/, size_t /*w*/, size_t /*h*/, size_t /*elem*/)
+    device_memory & /*mem*/, int /*y*/, int /*w*/, int /*h*/, int /*elem*/)
 {
  /* no-op */
 }
@@ -203,7 +204,7 @@ void CPUDevice::mem_free(device_memory &mem)
  }
 }

-device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
+device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
 {
  return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
 }
@@ -297,6 +298,154 @@ void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
    Device::build_bvh(bvh, progress, refit);
 }

+#if 0
+void CPUDevice::render(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
+{
+  const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;
+
+  scoped_timer timer(&tile.buffers->render_time);
+
+  Coverage coverage(kg, tile);
+  if (use_coverage) {
+    coverage.init_path_trace();
+  }
+
+  float *render_buffer = (float *)tile.buffer;
+  int start_sample = tile.start_sample;
+  int end_sample = tile.start_sample + tile.num_samples;
+
+  /* Needed for Embree. */
+  SIMD_SET_FLUSH_TO_ZERO;
+
+  for (int sample = start_sample; sample < end_sample; sample++) {
+    if (task.get_cancel() || TaskPool::canceled()) {
+      if (task.need_finish_queue == false)
+        break;
+    }
+
+    if (tile.stealing_state == RenderTile::CAN_BE_STOLEN && task.get_tile_stolen()) {
+      tile.stealing_state = RenderTile::WAS_STOLEN;
+      break;
+    }
+
+    if (tile.task == RenderTile::PATH_TRACE) {
+      for (int y = tile.y; y < tile.y + tile.h; y++) {
+        for (int x = tile.x; x < tile.x + tile.w; x++) {
+          if (use_coverage) {
+            coverage.init_pixel(x, y);
+          }
+          kernels.path_trace(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
+        }
+      }
+    }
+    else {
+      for (int y = tile.y; y < tile.y + tile.h; y++) {
+        for (int x = tile.x; x < tile.x + tile.w; x++) {
+          kernels.bake(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
+        }
+      }
+    }
+    tile.sample = sample + 1;
+
+    if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(sample)) {
+      const bool stop = adaptive_sampling_filter(kg, tile, sample);
+      if (stop) {
+        const int num_progress_samples = end_sample - sample;
+        tile.sample = end_sample;
+        task.update_progress(&tile, tile.w * tile.h * num_progress_samples);
+        break;
+      }
+    }
+
+    task.update_progress(&tile, tile.w * tile.h);
+  }
+  if (use_coverage) {
+    coverage.finalize();
+  }
+
+  if (task.adaptive_sampling.use && (tile.stealing_state != RenderTile::WAS_STOLEN)) {
+    adaptive_sampling_post(tile, kg);
+  }
+}
+
+void CPUDevice::thread_render(DeviceTask &task)
+{
+  if (TaskPool::canceled()) {
+    if (task.need_finish_queue == false)
+      return;
+  }
+
+  /* allocate buffer for kernel globals */
+  CPUKernelThreadGlobals kg(kernel_globals, get_cpu_osl_memory());
+
+  profiler.add_state(&kg.profiler);
+
+  /* NLM denoiser. */
+  DenoisingTask *denoising = NULL;
+
+  /* OpenImageDenoise: we can only denoise with one thread at a time, so to
+   * avoid waiting with mutex locks in the denoiser, we let only a single
+   * thread acquire denoising tiles. */
+  uint tile_types = task.tile_types;
+  bool hold_denoise_lock = false;
+  if ((tile_types & RenderTile::DENOISE) && task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+    if (!oidn_task_lock.try_lock()) {
+      tile_types &= ~RenderTile::DENOISE;
+      hold_denoise_lock = true;
+    }
+  }
+
+  RenderTile tile;
+  while (task.acquire_tile(this, tile, tile_types)) {
+    if (tile.task == RenderTile::PATH_TRACE) {
+      render(task, tile, &kg);
+    }
+    else if (tile.task == RenderTile::BAKE) {
+      render(task, tile, &kg);
+    }
+    else if (tile.task == RenderTile::DENOISE) {
+      denoise_openimagedenoise(task, tile);
+      task.update_progress(&tile, tile.w * tile.h);
+    }
+
+    task.release_tile(tile);
+
+    if (TaskPool::canceled()) {
+      if (task.need_finish_queue == false)
+        break;
+    }
+  }
+
+  if (hold_denoise_lock) {
+    oidn_task_lock.unlock();
+  }
+
+  profiler.remove_state(&kg.profiler);
+
+  delete denoising;
+}
+
+void CPUDevice::thread_denoise(DeviceTask &task)
+{
+  RenderTile tile;
+  tile.x = task.x;
+  tile.y = task.y;
+  tile.w = task.w;
+  tile.h = task.h;
+  tile.buffer = task.buffer;
+  tile.sample = task.sample + task.num_samples;
+  tile.num_samples = task.num_samples;
+  tile.start_sample = task.sample;
+  tile.offset = task.offset;
+  tile.stride = task.stride;
+  tile.buffers = task.buffers;
+
+  denoise_openimagedenoise(task, tile);
+
+  task.update_progress(&tile, tile.w * tile.h);
+}
+#endif
+
 const CPUKernels *CPUDevice::get_cpu_kernels() const
 {
  return &kernels;
--- a/intern/cycles/device/cpu/device_impl.h
+++ b/intern/cycles/device/cpu/device_impl.h
@@ -72,13 +72,10 @@ class CPUDevice : public Device {

  virtual void mem_alloc(device_memory &mem) override;
  virtual void mem_copy_to(device_memory &mem) override;
-  virtual void mem_copy_from(
-      device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
+  virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override;
  virtual void mem_zero(device_memory &mem) override;
  virtual void mem_free(device_memory &mem) override;
-  virtual device_ptr mem_alloc_sub_ptr(device_memory &mem,
-                                       size_t offset,
-                                       size_t /*size*/) override;
+  virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override;

  virtual void const_copy_to(const char *name, void *host, size_t size) override;

--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -31,6 +31,7 @@
 #  include "util/util_logging.h"
 #  include "util/util_map.h"
 #  include "util/util_md5.h"
+#  include "util/util_opengl.h"
 #  include "util/util_path.h"
 #  include "util/util_string.h"
 #  include "util/util_system.h"
@@ -836,7 +837,7 @@ void CUDADevice::mem_copy_to(device_memory &mem)
  }
 }

-void CUDADevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem)
+void CUDADevice::mem_copy_from(device_memory &mem, int y, int w, int h, int elem)
 {
  if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) {
    assert(!"mem_copy_from not supported for textures.");
@@ -890,7 +891,7 @@ void CUDADevice::mem_free(device_memory &mem)
  }
 }

-device_ptr CUDADevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
+device_ptr CUDADevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
 {
  return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
 }
@@ -1168,6 +1169,141 @@ void CUDADevice::tex_free(device_texture &mem)
  }
 }

+#  if 0
+void CUDADevice::render(DeviceTask &task,
+                        RenderTile &rtile,
+                        device_vector<KernelWorkTile> &work_tiles)
+{
+  scoped_timer timer(&rtile.buffers->render_time);
+
+  if (have_error())
+    return;
+
+  CUDAContextScope scope(this);
+  CUfunction cuRender;
+
+  /* Get kernel function. */
+  if (rtile.task == RenderTile::BAKE) {
+    cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake"));
+  }
+  else {
+    cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace"));
+  }
+
+  if (have_error()) {
+    return;
+  }
+
+  cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1));
+
+  /* Allocate work tile. */
+  work_tiles.alloc(1);
+
+  KernelWorkTile *wtile = work_tiles.data();
+  wtile->x = rtile.x;
+  wtile->y = rtile.y;
+  wtile->w = rtile.w;
+  wtile->h = rtile.h;
+  wtile->offset = rtile.offset;
+  wtile->stride = rtile.stride;
+  wtile->buffer = (float *)(CUdeviceptr)rtile.buffer;
+
+  /* Prepare work size. More step samples render faster, but for now we
+   * remain conservative for GPUs connected to a display to avoid driver
+   * timeouts and display freezing. */
+  int min_blocks, num_threads_per_block;
+  cuda_assert(
+      cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0));
+  if (!info.display_device) {
+    min_blocks *= 8;
+  }
+
+  uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);
+
+  /* Render all samples. */
+  uint start_sample = rtile.start_sample;
+  uint end_sample = rtile.start_sample + rtile.num_samples;
+
+  for (int sample = start_sample; sample < end_sample;) {
+    /* Setup and copy work tile to device. */
+    wtile->start_sample = sample;
+    wtile->num_samples = step_samples;
+    if (task.adaptive_sampling.use) {
+      wtile->num_samples = task.adaptive_sampling.align_samples(sample, step_samples);
+    }
+    wtile->num_samples = min(wtile->num_samples, end_sample - sample);
+    work_tiles.copy_to_device();
+
+    CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer;
+    uint total_work_size = wtile->w * wtile->h * wtile->num_samples;
+    uint num_blocks = divide_up(total_work_size, num_threads_per_block);
+
+    /* Launch kernel. */
+    void *args[] = {&d_work_tiles, &total_work_size};
+
+    cuda_assert(
+        cuLaunchKernel(cuRender, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+
+    /* Run the adaptive sampling kernels at selected samples aligned to step samples. */
+    uint filter_sample = sample + wtile->num_samples - 1;
+    if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(filter_sample)) {
+      adaptive_sampling_filter(filter_sample, wtile, d_work_tiles);
+    }
+
+    cuda_assert(cuCtxSynchronize());
+
+    /* Update progress. */
+    sample += wtile->num_samples;
+    rtile.sample = sample;
+    task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
+
+    if (task.get_cancel()) {
+      if (task.need_finish_queue == false)
+        break;
+    }
+  }
+
+  /* Finalize adaptive sampling. */
+  if (task.adaptive_sampling.use) {
+    CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer;
+    adaptive_sampling_post(rtile, wtile, d_work_tiles);
+    cuda_assert(cuCtxSynchronize());
+    task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
+  }
+}
+
+void CUDADevice::thread_run(DeviceTask &task)
+{
+  CUDAContextScope scope(this);
+
+  if (task.type == DeviceTask::RENDER) {
+    device_vector<KernelWorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY);
+
+    /* keep rendering tiles until done */
+    RenderTile tile;
+    DenoisingTask denoising(this, task);
+
+    while (task.acquire_tile(this, tile, task.tile_types)) {
+      if (tile.task == RenderTile::PATH_TRACE) {
+        render(task, tile, work_tiles);
+      }
+      else if (tile.task == RenderTile::BAKE) {
+        render(task, tile, work_tiles);
+      }
+
+      task.release_tile(tile);
+
+      if (task.get_cancel()) {
+        if (task.need_finish_queue == false)
+          break;
+      }
+    }
+
+    work_tiles.free();
+  }
+}
+#  endif
+
 unique_ptr<DeviceQueue> CUDADevice::gpu_queue_create()
 {
  return make_unique<CUDADeviceQueue>(this);
--- a/intern/cycles/device/cuda/device_impl.h
+++ b/intern/cycles/device/cuda/device_impl.h
@@ -26,6 +26,7 @@
 #  ifdef WITH_CUDA_DYNLOAD
 #    include "cuew.h"
 #  else
+#    include "util/util_opengl.h"
 #    include <cuda.h>
 #    include <cudaGL.h>
 #  endif
@@ -119,13 +120,13 @@ class CUDADevice : public Device {

  void mem_copy_to(device_memory &mem) override;

-  void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
+  void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override;

  void mem_zero(device_memory &mem) override;

  void mem_free(device_memory &mem) override;

-  device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
+  device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override;

  virtual void const_copy_to(const char *name, void *host, size_t size) override;

--- a/intern/cycles/device/cuda/graphics_interop.cpp
+++ b/intern/cycles/device/cuda/graphics_interop.cpp
@@ -37,15 +37,14 @@ CUDADeviceGraphicsInterop::~CUDADeviceGraphicsInterop()
  }
 }

-void CUDADeviceGraphicsInterop::set_display_interop(
-    const DisplayDriver::GraphicsInterop &display_interop)
+void CUDADeviceGraphicsInterop::set_destination(
+    const DeviceGraphicsInteropDestination &destination)
 {
-  const int64_t new_buffer_area = int64_t(display_interop.buffer_width) *
-                                  display_interop.buffer_height;
+  const int64_t new_buffer_area = int64_t(destination.buffer_width) * destination.buffer_height;

-  need_clear_ = display_interop.need_clear;
+  need_clear_ = destination.need_clear;

-  if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
+  if (opengl_pbo_id_ == destination.opengl_pbo_id && buffer_area_ == new_buffer_area) {
    return;
  }

@@ -56,12 +55,12 @@ void CUDADeviceGraphicsInterop::set_display_interop(
  }

  const CUresult result = cuGraphicsGLRegisterBuffer(
-      &cu_graphics_resource_, display_interop.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
+      &cu_graphics_resource_, destination.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
  if (result != CUDA_SUCCESS) {
    LOG(ERROR) << "Error registering OpenGL buffer: " << cuewErrorString(result);
  }

-  opengl_pbo_id_ = display_interop.opengl_pbo_id;
+  opengl_pbo_id_ = destination.opengl_pbo_id;
  buffer_area_ = new_buffer_area;
 }

--- a/intern/cycles/device/cuda/graphics_interop.h
+++ b/intern/cycles/device/cuda/graphics_interop.h
@@ -41,7 +41,7 @@ class CUDADeviceGraphicsInterop : public DeviceGraphicsInterop {
  CUDADeviceGraphicsInterop &operator=(const CUDADeviceGraphicsInterop &other) = delete;
  CUDADeviceGraphicsInterop &operator=(CUDADeviceGraphicsInterop &&other) = delete;

-  virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override;
+  virtual void set_destination(const DeviceGraphicsInteropDestination &destination) override;

  virtual device_ptr map() override;
  virtual void unmap() override;
--- a/intern/cycles/device/cuda/queue.cpp
+++ b/intern/cycles/device/cuda/queue.cpp
@@ -116,18 +116,18 @@ bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *ar
  }

  /* Launch kernel. */
-  assert_success(cuLaunchKernel(cuda_kernel.function,
-                                num_blocks,
-                                1,
-                                1,
-                                num_threads_per_block,
-                                1,
-                                1,
-                                shared_mem_bytes,
-                                cuda_stream_,
-                                args,
-                                0),
-                 "enqueue");
+  cuda_device_assert(cuda_device_,
+                     cuLaunchKernel(cuda_kernel.function,
+                                    num_blocks,
+                                    1,
+                                    1,
+                                    num_threads_per_block,
+                                    1,
+                                    1,
+                                    shared_mem_bytes,
+                                    cuda_stream_,
+                                    args,
+                                    0));

  return !(cuda_device_->have_error());
 }
@@ -139,8 +139,7 @@ bool CUDADeviceQueue::synchronize()
  }

  const CUDAContextScope scope(cuda_device_);
-  assert_success(cuStreamSynchronize(cuda_stream_), "synchronize");
-
+  cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_));
  debug_synchronize();

  return !(cuda_device_->have_error());
@@ -163,9 +162,9 @@ void CUDADeviceQueue::zero_to_device(device_memory &mem)
  assert(mem.device_pointer != 0);

  const CUDAContextScope scope(cuda_device_);
-  assert_success(
-      cuMemsetD8Async((CUdeviceptr)mem.device_pointer, 0, mem.memory_size(), cuda_stream_),
-      "zero_to_device");
+  cuda_device_assert(
+      cuda_device_,
+      cuMemsetD8Async((CUdeviceptr)mem.device_pointer, 0, mem.memory_size(), cuda_stream_));
 }

 void CUDADeviceQueue::copy_to_device(device_memory &mem)
@@ -186,10 +185,10 @@ void CUDADeviceQueue::copy_to_device(device_memory &mem)

  /* Copy memory to device. */
  const CUDAContextScope scope(cuda_device_);
-  assert_success(
+  cuda_device_assert(
+      cuda_device_,
      cuMemcpyHtoDAsync(
-          (CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size(), cuda_stream_),
-      "copy_to_device");
+          (CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size(), cuda_stream_));
 }

 void CUDADeviceQueue::copy_from_device(device_memory &mem)
@@ -205,19 +204,10 @@ void CUDADeviceQueue::copy_from_device(device_memory &mem)

  /* Copy memory from device. */
  const CUDAContextScope scope(cuda_device_);
-  assert_success(
+  cuda_device_assert(
+      cuda_device_,
      cuMemcpyDtoHAsync(
-          mem.host_pointer, (CUdeviceptr)mem.device_pointer, mem.memory_size(), cuda_stream_),
-      "copy_from_device");
-}
-
-void CUDADeviceQueue::assert_success(CUresult result, const char *operation)
-{
-  if (result != CUDA_SUCCESS) {
-    const char *name = cuewErrorString(result);
-    cuda_device_->set_error(string_printf(
-        "%s in CUDA queue %s (%s)", name, operation, debug_active_kernels().c_str()));
-  }
+          mem.host_pointer, (CUdeviceptr)mem.device_pointer, mem.memory_size(), cuda_stream_));
 }

 unique_ptr<DeviceGraphicsInterop> CUDADeviceQueue::graphics_interop_create()
--- a/intern/cycles/device/cuda/queue.h
+++ b/intern/cycles/device/cuda/queue.h
@@ -60,8 +60,6 @@ class CUDADeviceQueue : public DeviceQueue {
 protected:
  CUDADevice *cuda_device_;
  CUstream cuda_stream_;
-
-  void assert_success(CUresult result, const char *operation);
 };

 CCL_NAMESPACE_END
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -25,7 +25,6 @@
 #include "device/cpu/device.h"
 #include "device/cuda/device.h"
 #include "device/dummy/device.h"
-#include "device/hip/device.h"
 #include "device/multi/device.h"
 #include "device/optix/device.h"

@@ -33,6 +32,7 @@
 #include "util/util_half.h"
 #include "util/util_logging.h"
 #include "util/util_math.h"
+#include "util/util_opengl.h"
 #include "util/util_string.h"
 #include "util/util_system.h"
 #include "util/util_time.h"
@@ -47,7 +47,6 @@ thread_mutex Device::device_mutex;
 vector<DeviceInfo> Device::cuda_devices;
 vector<DeviceInfo> Device::optix_devices;
 vector<DeviceInfo> Device::cpu_devices;
-vector<DeviceInfo> Device::hip_devices;
 uint Device::devices_initialized_mask = 0;

 /* Device */
@@ -98,14 +97,6 @@ Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
        device = device_optix_create(info, stats, profiler);
      break;
 #endif
-
-#ifdef WITH_HIP
-    case DEVICE_HIP:
-      if (device_hip_init())
-        device = device_hip_create(info, stats, profiler);
-      break;
-#endif
-
    default:
      break;
  }
@@ -127,8 +118,6 @@ DeviceType Device::type_from_string(const char *name)
    return DEVICE_OPTIX;
  else if (strcmp(name, "MULTI") == 0)
    return DEVICE_MULTI;
-  else if (strcmp(name, "HIP") == 0)
-    return DEVICE_HIP;

  return DEVICE_NONE;
 }
@@ -143,8 +132,6 @@ string Device::string_from_type(DeviceType type)
    return "OPTIX";
  else if (type == DEVICE_MULTI)
    return "MULTI";
-  else if (type == DEVICE_HIP)
-    return "HIP";

  return "";
 }
@@ -159,10 +146,6 @@ vector<DeviceType> Device::available_types()
 #ifdef WITH_OPTIX
  types.push_back(DEVICE_OPTIX);
 #endif
-#ifdef WITH_HIP
-  types.push_back(DEVICE_HIP);
-#endif
-
  return types;
 }

@@ -204,20 +187,6 @@ vector<DeviceInfo> Device::available_devices(uint mask)
  }
 #endif

-#ifdef WITH_HIP
-  if (mask & DEVICE_MASK_HIP) {
-    if (!(devices_initialized_mask & DEVICE_MASK_HIP)) {
-      if (device_hip_init()) {
-        device_hip_info(hip_devices);
-      }
-      devices_initialized_mask |= DEVICE_MASK_HIP;
-    }
-    foreach (DeviceInfo &info, hip_devices) {
-      devices.push_back(info);
-    }
-  }
-#endif
-
  if (mask & DEVICE_MASK_CPU) {
    if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
      device_cpu_info(cpu_devices);
@@ -258,15 +227,6 @@ string Device::device_capabilities(uint mask)
  }
 #endif

-#ifdef WITH_HIP
-  if (mask & DEVICE_MASK_HIP) {
-    if (device_hip_init()) {
-      capabilities += "\nHIP device capabilities:\n";
-      capabilities += device_hip_capabilities();
-    }
-  }
-#endif
-
  return capabilities;
 }

@@ -355,7 +315,6 @@ void Device::free_memory()
  devices_initialized_mask = 0;
  cuda_devices.free_memory();
  optix_devices.free_memory();
-  hip_devices.free_memory();
  cpu_devices.free_memory();
 }

--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -51,7 +51,6 @@ enum DeviceType {
  DEVICE_CUDA,
  DEVICE_MULTI,
  DEVICE_OPTIX,
-  DEVICE_HIP,
  DEVICE_DUMMY,
 };

@@ -59,7 +58,6 @@ enum DeviceTypeMask {
  DEVICE_MASK_CPU = (1 << DEVICE_CPU),
  DEVICE_MASK_CUDA = (1 << DEVICE_CUDA),
  DEVICE_MASK_OPTIX = (1 << DEVICE_OPTIX),
-  DEVICE_MASK_HIP = (1 << DEVICE_HIP),
  DEVICE_MASK_ALL = ~0
 };

@@ -121,7 +119,7 @@ class Device {

  string error_msg;

-  virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, size_t /*offset*/, size_t /*size*/)
+  virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, int /*offset*/, int /*size*/)
  {
    /* Only required for devices that implement denoising. */
    assert(false);
@@ -275,7 +273,7 @@ class Device {

  virtual void mem_alloc(device_memory &mem) = 0;
  virtual void mem_copy_to(device_memory &mem) = 0;
-  virtual void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) = 0;
+  virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) = 0;
  virtual void mem_zero(device_memory &mem) = 0;
  virtual void mem_free(device_memory &mem) = 0;

@@ -286,7 +284,6 @@ class Device {
  static vector<DeviceInfo> cuda_devices;
  static vector<DeviceInfo> optix_devices;
  static vector<DeviceInfo> cpu_devices;
-  static vector<DeviceInfo> hip_devices;
  static uint devices_initialized_mask;
 };

--- a/intern/cycles/device/device_graphics_interop.h
+++ b/intern/cycles/device/device_graphics_interop.h
@@ -16,12 +16,25 @@

 #pragma once

-#include "render/display_driver.h"
-
 #include "util/util_types.h"

 CCL_NAMESPACE_BEGIN

+/* Information about interoperability destination.
+ * Is provided by the GPUDisplay. */
+class DeviceGraphicsInteropDestination {
+ public:
+  /* Dimensions of the buffer, in pixels. */
+  int buffer_width = 0;
+  int buffer_height = 0;
+
+  /* OpenGL pixel buffer object. */
+  int opengl_pbo_id = 0;
+
+  /* Clear the entire destination before doing partial write to it. */
+  bool need_clear = false;
+};
+
 /* Device-side graphics interoperability support.
 *
 * Takes care of holding all the handlers needed by the device to implement interoperability with
@@ -33,7 +46,7 @@ class DeviceGraphicsInterop {

  /* Update this device-side graphics interoperability object with the given destination resource
   * information. */
-  virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) = 0;
+  virtual void set_destination(const DeviceGraphicsInteropDestination &destination) = 0;

  virtual device_ptr map() = 0;
  virtual void unmap() = 0;
--- a/intern/cycles/device/device_memory.cpp
+++ b/intern/cycles/device/device_memory.cpp
@@ -136,7 +136,7 @@ void device_memory::device_copy_to()
  }
 }

-void device_memory::device_copy_from(size_t y, size_t w, size_t h, size_t elem)
+void device_memory::device_copy_from(int y, int w, int h, int elem)
 {
  assert(type != MEM_TEXTURE && type != MEM_READ_ONLY && type != MEM_GLOBAL);
  device->mem_copy_from(*this, y, w, h, elem);
@@ -181,7 +181,7 @@ bool device_memory::is_resident(Device *sub_device) const

 /* Device Sub Ptr */

-device_sub_ptr::device_sub_ptr(device_memory &mem, size_t offset, size_t size) : device(mem.device)
+device_sub_ptr::device_sub_ptr(device_memory &mem, int offset, int size) : device(mem.device)
 {
  ptr = device->mem_alloc_sub_ptr(mem, offset, size);
 }
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -81,154 +81,154 @@ static constexpr size_t datatype_size(DataType datatype)

 template<typename T> struct device_type_traits {
  static const DataType data_type = TYPE_UNKNOWN;
-  static const size_t num_elements_cpu = sizeof(T);
-  static const size_t num_elements_gpu = sizeof(T);
+  static const int num_elements_cpu = sizeof(T);
+  static const int num_elements_gpu = sizeof(T);
 };

 template<> struct device_type_traits<uchar> {
  static const DataType data_type = TYPE_UCHAR;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
+  static const int num_elements_cpu = 1;
+  static const int num_elements_gpu = 1;
  static_assert(sizeof(uchar) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<uchar2> {
  static const DataType data_type = TYPE_UCHAR;
-  static const size_t num_elements_cpu = 2;
-  static const size_t num_elements_gpu = 2;
+  static const int num_elements_cpu = 2;
+  static const int num_elements_gpu = 2;
  static_assert(sizeof(uchar2) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<uchar3> {
  static const DataType data_type = TYPE_UCHAR;
-  static const size_t num_elements_cpu = 3;
-  static const size_t num_elements_gpu = 3;
+  static const int num_elements_cpu = 3;
+  static const int num_elements_gpu = 3;
  static_assert(sizeof(uchar3) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<uchar4> {
  static const DataType data_type = TYPE_UCHAR;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
+  static const int num_elements_cpu = 4;
+  static const int num_elements_gpu = 4;
  static_assert(sizeof(uchar4) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<uint> {
  static const DataType data_type = TYPE_UINT;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
+  static const int num_elements_cpu = 1;
+  static const int num_elements_gpu = 1;
  static_assert(sizeof(uint) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<uint2> {
  static const DataType data_type = TYPE_UINT;
-  static const size_t num_elements_cpu = 2;
-  static const size_t num_elements_gpu = 2;
+  static const int num_elements_cpu = 2;
+  static const int num_elements_gpu = 2;
  static_assert(sizeof(uint2) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<uint3> {
  static const DataType data_type = TYPE_UINT;
-  static const size_t num_elements_cpu = 3;
-  static const size_t num_elements_gpu = 3;
+  static const int num_elements_cpu = 3;
+  static const int num_elements_gpu = 3;
  static_assert(sizeof(uint3) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<uint4> {
  static const DataType data_type = TYPE_UINT;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
+  static const int num_elements_cpu = 4;
+  static const int num_elements_gpu = 4;
  static_assert(sizeof(uint4) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<int> {
  static const DataType data_type = TYPE_INT;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
+  static const int num_elements_cpu = 1;
+  static const int num_elements_gpu = 1;
  static_assert(sizeof(int) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<int2> {
  static const DataType data_type = TYPE_INT;
-  static const size_t num_elements_cpu = 2;
-  static const size_t num_elements_gpu = 2;
+  static const int num_elements_cpu = 2;
+  static const int num_elements_gpu = 2;
  static_assert(sizeof(int2) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<int3> {
  static const DataType data_type = TYPE_INT;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 3;
+  static const int num_elements_cpu = 4;
+  static const int num_elements_gpu = 3;
  static_assert(sizeof(int3) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<int4> {
  static const DataType data_type = TYPE_INT;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
+  static const int num_elements_cpu = 4;
+  static const int num_elements_gpu = 4;
  static_assert(sizeof(int4) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<float> {
  static const DataType data_type = TYPE_FLOAT;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
+  static const int num_elements_cpu = 1;
+  static const int num_elements_gpu = 1;
  static_assert(sizeof(float) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<float2> {
  static const DataType data_type = TYPE_FLOAT;
-  static const size_t num_elements_cpu = 2;
-  static const size_t num_elements_gpu = 2;
+  static const int num_elements_cpu = 2;
+  static const int num_elements_gpu = 2;
  static_assert(sizeof(float2) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<float3> {
  static const DataType data_type = TYPE_FLOAT;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 3;
+  static const int num_elements_cpu = 4;
+  static const int num_elements_gpu = 3;
  static_assert(sizeof(float3) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<float4> {
  static const DataType data_type = TYPE_FLOAT;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
+  static const int num_elements_cpu = 4;
+  static const int num_elements_gpu = 4;
  static_assert(sizeof(float4) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<half> {
  static const DataType data_type = TYPE_HALF;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
+  static const int num_elements_cpu = 1;
+  static const int num_elements_gpu = 1;
  static_assert(sizeof(half) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<ushort4> {
  static const DataType data_type = TYPE_UINT16;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
+  static const int num_elements_cpu = 4;
+  static const int num_elements_gpu = 4;
  static_assert(sizeof(ushort4) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<uint16_t> {
  static const DataType data_type = TYPE_UINT16;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
+  static const int num_elements_cpu = 1;
+  static const int num_elements_gpu = 1;
  static_assert(sizeof(uint16_t) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<half4> {
  static const DataType data_type = TYPE_HALF;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
+  static const int num_elements_cpu = 4;
+  static const int num_elements_gpu = 4;
  static_assert(sizeof(half4) == num_elements_cpu * datatype_size(data_type));
 };

 template<> struct device_type_traits<uint64_t> {
  static const DataType data_type = TYPE_UINT64;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
+  static const int num_elements_cpu = 1;
+  static const int num_elements_gpu = 1;
  static_assert(sizeof(uint64_t) == num_elements_cpu * datatype_size(data_type));
 };

@@ -277,7 +277,6 @@ class device_memory {
 protected:
  friend class CUDADevice;
  friend class OptiXDevice;
-  friend class HIPDevice;

  /* Only create through subclasses. */
  device_memory(Device *device, const char *name, MemoryType type);
@@ -297,7 +296,7 @@ class device_memory {
  void device_alloc();
  void device_free();
  void device_copy_to();
-  void device_copy_from(size_t y, size_t w, size_t h, size_t elem);
+  void device_copy_from(int y, int w, int h, int elem);
  void device_zero();

  bool device_is_cpu();
@@ -566,7 +565,7 @@ template<typename T> class device_vector : public device_memory {
    device_copy_from(0, data_width, (data_height == 0) ? 1 : data_height, sizeof(T));
  }

-  void copy_from_device(size_t y, size_t w, size_t h)
+  void copy_from_device(int y, int w, int h)
  {
    device_copy_from(y, w, h, sizeof(T));
  }
@@ -602,7 +601,7 @@ template<typename T> class device_vector : public device_memory {

 class device_sub_ptr {
 public:
-  device_sub_ptr(device_memory &mem, size_t offset, size_t size);
+  device_sub_ptr(device_memory &mem, int offset, int size);
  ~device_sub_ptr();

  device_ptr operator*() const
--- a/intern/cycles/device/device_queue.cpp
+++ b/intern/cycles/device/device_queue.cpp
@@ -57,9 +57,8 @@ void DeviceQueue::debug_init_execution()
 {
  if (VLOG_IS_ON(3)) {
    last_sync_time_ = time_dt();
+    last_kernels_enqueued_ = 0;
  }
-
-  last_kernels_enqueued_ = 0;
 }

 void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size)
@@ -67,9 +66,8 @@ void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size)
  if (VLOG_IS_ON(3)) {
    VLOG(4) << "GPU queue launch " << device_kernel_as_string(kernel) << ", work_size "
            << work_size;
+    last_kernels_enqueued_ |= (uint64_t(1) << (uint64_t)kernel);
  }
-
-  last_kernels_enqueued_ |= (uint64_t(1) << (uint64_t)kernel);
 }

 void DeviceQueue::debug_synchronize()
@@ -82,14 +80,8 @@ void DeviceQueue::debug_synchronize()
    stats_kernel_time_[last_kernels_enqueued_] += elapsed_time;

    last_sync_time_ = new_time;
+    last_kernels_enqueued_ = 0;
  }
-
-  last_kernels_enqueued_ = 0;
-}
-
-string DeviceQueue::debug_active_kernels()
-{
-  return device_kernel_mask_as_string(last_kernels_enqueued_);
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/device/device_queue.h
+++ b/intern/cycles/device/device_queue.h
@@ -21,7 +21,6 @@
 #include "device/device_graphics_interop.h"
 #include "util/util_logging.h"
 #include "util/util_map.h"
-#include "util/util_string.h"
 #include "util/util_unique_ptr.h"

 CCL_NAMESPACE_BEGIN
@@ -102,7 +101,6 @@ class DeviceQueue {
  void debug_init_execution();
  void debug_enqueue(DeviceKernel kernel, const int work_size);
  void debug_synchronize();
-  string debug_active_kernels();

  /* Combination of kernels enqueued together sync last synchronize. */
  DeviceKernelMask last_kernels_enqueued_;
--- a/intern/cycles/device/dummy/device.cpp
+++ b/intern/cycles/device/dummy/device.cpp
@@ -48,7 +48,7 @@ class DummyDevice : public Device {
  {
  }

-  virtual void mem_copy_from(device_memory &, size_t, size_t, size_t, size_t) override
+  virtual void mem_copy_from(device_memory &, int, int, int, int) override
  {
  }

--- a/intern/cycles/device/hip/device.cpp
+++ b/intern/cycles/device/hip/device.cpp
@@ -1,276 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/hip/device.h"
-
-#include "util/util_logging.h"
-
-#ifdef WITH_HIP
-#  include "device/device.h"
-#  include "device/hip/device_impl.h"
-
-#  include "util/util_string.h"
-#  include "util/util_windows.h"
-#endif /* WITH_HIP */
-
-CCL_NAMESPACE_BEGIN
-
-bool device_hip_init()
-{
-#if !defined(WITH_HIP)
-  return false;
-#elif defined(WITH_HIP_DYNLOAD)
-  static bool initialized = false;
-  static bool result = false;
-
-  if (initialized)
-    return result;
-
-  initialized = true;
-  int hipew_result = hipewInit(HIPEW_INIT_HIP);
-  if (hipew_result == HIPEW_SUCCESS) {
-    VLOG(1) << "HIPEW initialization succeeded";
-    if (HIPDevice::have_precompiled_kernels()) {
-      VLOG(1) << "Found precompiled kernels";
-      result = true;
-    }
-    else if (hipewCompilerPath() != NULL) {
-      VLOG(1) << "Found HIPCC " << hipewCompilerPath();
-      result = true;
-    }
-    else {
-      VLOG(1) << "Neither precompiled kernels nor HIPCC was found,"
-              << " unable to use HIP";
-    }
-  }
-  else {
-    VLOG(1) << "HIPEW initialization failed: "
-            << ((hipew_result == HIPEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
-                                                              "Error opening the library");
-  }
-
-  return result;
-#else  /* WITH_HIP_DYNLOAD */
-  return true;
-#endif /* WITH_HIP_DYNLOAD */
-}
-
-Device *device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
-{
-#ifdef WITH_HIP
-  return new HIPDevice(info, stats, profiler);
-#else
-  (void)info;
-  (void)stats;
-  (void)profiler;
-
-  LOG(FATAL) << "Request to create HIP device without compiled-in support. Should never happen.";
-
-  return nullptr;
-#endif
-}
-
-#ifdef WITH_HIP
-static hipError_t device_hip_safe_init()
-{
-#  ifdef _WIN32
-  __try {
-    return hipInit(0);
-  }
-  __except (EXCEPTION_EXECUTE_HANDLER) {
-    /* Ignore crashes inside the HIP driver and hope we can
-     * survive even with corrupted HIP installs. */
-    fprintf(stderr, "Cycles HIP: driver crashed, continuing without HIP.\n");
-  }
-
-  return hipErrorNoDevice;
-#  else
-  return hipInit(0);
-#  endif
-}
-#endif /* WITH_HIP */
-
-void device_hip_info(vector<DeviceInfo> &devices)
-{
-#ifdef WITH_HIP
-  hipError_t result = device_hip_safe_init();
-  if (result != hipSuccess) {
-    if (result != hipErrorNoDevice)
-      fprintf(stderr, "HIP hipInit: %s\n", hipewErrorString(result));
-    return;
-  }
-
-  int count = 0;
-  result = hipGetDeviceCount(&count);
-  if (result != hipSuccess) {
-    fprintf(stderr, "HIP hipGetDeviceCount: %s\n", hipewErrorString(result));
-    return;
-  }
-
-  vector<DeviceInfo> display_devices;
-
-  for (int num = 0; num < count; num++) {
-    char name[256];
-
-    result = hipDeviceGetName(name, 256, num);
-    if (result != hipSuccess) {
-      fprintf(stderr, "HIP :hipDeviceGetName: %s\n", hipewErrorString(result));
-      continue;
-    }
-
-    int major;
-    hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, num);
-    // TODO : (Arya) What is the last major version we are supporting?
-
-    DeviceInfo info;
-
-    info.type = DEVICE_HIP;
-    info.description = string(name);
-    info.num = num;
-
-    info.has_half_images = (major >= 3);
-    info.has_nanovdb = true;
-    info.denoisers = 0;
-
-    info.has_gpu_queue = true;
-    /* Check if the device has P2P access to any other device in the system. */
-    for (int peer_num = 0; peer_num < count && !info.has_peer_memory; peer_num++) {
-      if (num != peer_num) {
-        int can_access = 0;
-        hipDeviceCanAccessPeer(&can_access, num, peer_num);
-        info.has_peer_memory = (can_access != 0);
-      }
-    }
-
-    int pci_location[3] = {0, 0, 0};
-    hipDeviceGetAttribute(&pci_location[0], hipDeviceAttributePciDomainID, num);
-    hipDeviceGetAttribute(&pci_location[1], hipDeviceAttributePciBusId, num);
-    hipDeviceGetAttribute(&pci_location[2], hipDeviceAttributePciDeviceId, num);
-    info.id = string_printf("HIP_%s_%04x:%02x:%02x",
-                            name,
-                            (unsigned int)pci_location[0],
-                            (unsigned int)pci_location[1],
-                            (unsigned int)pci_location[2]);
-
-    /* If device has a kernel timeout and no compute preemption, we assume
-     * it is connected to a display and will freeze the display while doing
-     * computations. */
-    int timeout_attr = 0, preempt_attr = 0;
-    hipDeviceGetAttribute(&timeout_attr, hipDeviceAttributeKernelExecTimeout, num);
-
-    if (timeout_attr && !preempt_attr) {
-      VLOG(1) << "Device is recognized as display.";
-      info.description += " (Display)";
-      info.display_device = true;
-      display_devices.push_back(info);
-    }
-    else {
-      VLOG(1) << "Device has compute preemption or is not used for display.";
-      devices.push_back(info);
-    }
-    VLOG(1) << "Added device \"" << name << "\" with id \"" << info.id << "\".";
-  }
-
-  if (!display_devices.empty())
-    devices.insert(devices.end(), display_devices.begin(), display_devices.end());
-#else  /* WITH_HIP */
-  (void)devices;
-#endif /* WITH_HIP */
-}
-
-string device_hip_capabilities()
-{
-#ifdef WITH_HIP
-  hipError_t result = device_hip_safe_init();
-  if (result != hipSuccess) {
-    if (result != hipErrorNoDevice) {
-      return string("Error initializing HIP: ") + hipewErrorString(result);
-    }
-    return "No HIP device found\n";
-  }
-
-  int count;
-  result = hipGetDeviceCount(&count);
-  if (result != hipSuccess) {
-    return string("Error getting devices: ") + hipewErrorString(result);
-  }
-
-  string capabilities = "";
-  for (int num = 0; num < count; num++) {
-    char name[256];
-    if (hipDeviceGetName(name, 256, num) != hipSuccess) {
-      continue;
-    }
-    capabilities += string("\t") + name + "\n";
-    int value;
-#  define GET_ATTR(attr) \
-    { \
-      if (hipDeviceGetAttribute(&value, hipDeviceAttribute##attr, num) == hipSuccess) { \
-        capabilities += string_printf("\t\thipDeviceAttribute" #attr "\t\t\t%d\n", value); \
-      } \
-    } \
-    (void)0
-    /* TODO(sergey): Strip all attributes which are not useful for us
-     * or does not depend on the driver.
-     */
-    GET_ATTR(MaxThreadsPerBlock);
-    GET_ATTR(MaxBlockDimX);
-    GET_ATTR(MaxBlockDimY);
-    GET_ATTR(MaxBlockDimZ);
-    GET_ATTR(MaxGridDimX);
-    GET_ATTR(MaxGridDimY);
-    GET_ATTR(MaxGridDimZ);
-    GET_ATTR(MaxSharedMemoryPerBlock);
-    GET_ATTR(TotalConstantMemory);
-    GET_ATTR(WarpSize);
-    GET_ATTR(MaxPitch);
-    GET_ATTR(MaxRegistersPerBlock);
-    GET_ATTR(ClockRate);
-    GET_ATTR(TextureAlignment);
-    GET_ATTR(MultiprocessorCount);
-    GET_ATTR(KernelExecTimeout);
-    GET_ATTR(Integrated);
-    GET_ATTR(CanMapHostMemory);
-    GET_ATTR(ComputeMode);
-    GET_ATTR(MaxTexture1DWidth);
-    GET_ATTR(MaxTexture2DWidth);
-    GET_ATTR(MaxTexture2DHeight);
-    GET_ATTR(MaxTexture3DWidth);
-    GET_ATTR(MaxTexture3DHeight);
-    GET_ATTR(MaxTexture3DDepth);
-    GET_ATTR(ConcurrentKernels);
-    GET_ATTR(EccEnabled);
-    GET_ATTR(MemoryClockRate);
-    GET_ATTR(MemoryBusWidth);
-    GET_ATTR(L2CacheSize);
-    GET_ATTR(MaxThreadsPerMultiProcessor);
-    GET_ATTR(ComputeCapabilityMajor);
-    GET_ATTR(ComputeCapabilityMinor);
-    GET_ATTR(MaxSharedMemoryPerMultiprocessor);
-    GET_ATTR(ManagedMemory);
-    GET_ATTR(IsMultiGpuBoard);
-#  undef GET_ATTR
-    capabilities += "\n";
-  }
-
-  return capabilities;
-
-#else  /* WITH_HIP */
-  return "";
-#endif /* WITH_HIP */
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/hip/device.h
+++ b/intern/cycles/device/hip/device.h
@@ -1,37 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "util/util_string.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class Device;
-class DeviceInfo;
-class Profiler;
-class Stats;
-
-bool device_hip_init();
-
-Device *device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
-
-void device_hip_info(vector<DeviceInfo> &devices);
-
-string device_hip_capabilities();
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
--- a/intern/cycles/device/hip/device_impl.h
+++ b/intern/cycles/device/hip/device_impl.h
@@ -1,153 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_HIP
-
-#  include "device/device.h"
-#  include "device/hip/kernel.h"
-#  include "device/hip/queue.h"
-#  include "device/hip/util.h"
-
-#  include "util/util_map.h"
-
-#  ifdef WITH_HIP_DYNLOAD
-#    include "hipew.h"
-#  else
-#    include "util/util_opengl.h"
-#  endif
-
-CCL_NAMESPACE_BEGIN
-
-class DeviceQueue;
-
-class HIPDevice : public Device {
-
-  friend class HIPContextScope;
-
- public:
-  hipDevice_t hipDevice;
-  hipCtx_t hipContext;
-  hipModule_t hipModule;
-  size_t device_texture_headroom;
-  size_t device_working_headroom;
-  bool move_texture_to_host;
-  size_t map_host_used;
-  size_t map_host_limit;
-  int can_map_host;
-  int pitch_alignment;
-  int hipDevId;
-  int hipDevArchitecture;
-  bool first_error;
-
-  struct HIPMem {
-    HIPMem() : texobject(0), array(0), use_mapped_host(false)
-    {
-    }
-
-    hipTextureObject_t texobject;
-    hArray array;
-
-    /* If true, a mapped host memory in shared_pointer is being used. */
-    bool use_mapped_host;
-  };
-  typedef map<device_memory *, HIPMem> HIPMemMap;
-  HIPMemMap hip_mem_map;
-  thread_mutex hip_mem_map_mutex;
-
-  /* Bindless Textures */
-  device_vector<TextureInfo> texture_info;
-  bool need_texture_info;
-
-  HIPDeviceKernels kernels;
-
-  static bool have_precompiled_kernels();
-
-  virtual bool show_samples() const override;
-
-  virtual BVHLayoutMask get_bvh_layout_mask() const override;
-
-  void set_error(const string &error) override;
-
-  HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
-
-  virtual ~HIPDevice();
-
-  bool support_device(const uint /*kernel_features*/);
-
-  bool check_peer_access(Device *peer_device) override;
-
-  bool use_adaptive_compilation();
-
-  virtual string compile_kernel_get_common_cflags(const uint kernel_features);
-
-  string compile_kernel(const uint kernel_features,
-                        const char *name,
-                        const char *base = "hip",
-                        bool force_ptx = false);
-
-  virtual bool load_kernels(const uint kernel_features) override;
-  void reserve_local_memory(const uint kernel_features);
-
-  void init_host_memory();
-
-  void load_texture_info();
-
-  void move_textures_to_host(size_t size, bool for_texture);
-
-  HIPMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
-
-  void generic_copy_to(device_memory &mem);
-
-  void generic_free(device_memory &mem);
-
-  void mem_alloc(device_memory &mem) override;
-
-  void mem_copy_to(device_memory &mem) override;
-
-  void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
-
-  void mem_zero(device_memory &mem) override;
-
-  void mem_free(device_memory &mem) override;
-
-  device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
-
-  virtual void const_copy_to(const char *name, void *host, size_t size) override;
-
-  void global_alloc(device_memory &mem);
-
-  void global_free(device_memory &mem);
-
-  void tex_alloc(device_texture &mem);
-
-  void tex_free(device_texture &mem);
-
-  /* Graphics resources interoperability. */
-  virtual bool should_use_graphics_interop() override;
-
-  virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
-
-  int get_num_multiprocessors();
-  int get_max_num_threads_per_multiprocessor();
-
- protected:
-  bool get_device_attribute(hipDeviceAttribute_t attribute, int *value);
-  int get_device_default_attribute(hipDeviceAttribute_t attribute, int default_value);
-};
-
-CCL_NAMESPACE_END
-
-#endif
--- a/intern/cycles/device/hip/graphics_interop.cpp
+++ b/intern/cycles/device/hip/graphics_interop.cpp
@@ -1,105 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_HIP
-
-#  include "device/hip/graphics_interop.h"
-
-#  include "device/hip/device_impl.h"
-#  include "device/hip/util.h"
-
-CCL_NAMESPACE_BEGIN
-
-HIPDeviceGraphicsInterop::HIPDeviceGraphicsInterop(HIPDeviceQueue *queue)
-    : queue_(queue), device_(static_cast<HIPDevice *>(queue->device))
-{
-}
-
-HIPDeviceGraphicsInterop::~HIPDeviceGraphicsInterop()
-{
-  HIPContextScope scope(device_);
-
-  if (hip_graphics_resource_) {
-    hip_device_assert(device_, hipGraphicsUnregisterResource(hip_graphics_resource_));
-  }
-}
-
-void HIPDeviceGraphicsInterop::set_display_interop(
-    const DisplayDriver::GraphicsInterop &display_interop)
-{
-  const int64_t new_buffer_area = int64_t(display_interop.buffer_width) *
-                                  display_interop.buffer_height;
-
-  need_clear_ = display_interop.need_clear;
-
-  if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
-    return;
-  }
-
-  HIPContextScope scope(device_);
-
-  if (hip_graphics_resource_) {
-    hip_device_assert(device_, hipGraphicsUnregisterResource(hip_graphics_resource_));
-  }
-
-  const hipError_t result = hipGraphicsGLRegisterBuffer(
-      &hip_graphics_resource_, display_interop.opengl_pbo_id, hipGraphicsRegisterFlagsNone);
-  if (result != hipSuccess) {
-    LOG(ERROR) << "Error registering OpenGL buffer: " << hipewErrorString(result);
-  }
-
-  opengl_pbo_id_ = display_interop.opengl_pbo_id;
-  buffer_area_ = new_buffer_area;
-}
-
-device_ptr HIPDeviceGraphicsInterop::map()
-{
-  if (!hip_graphics_resource_) {
-    return 0;
-  }
-
-  HIPContextScope scope(device_);
-
-  hipDeviceptr_t hip_buffer;
-  size_t bytes;
-
-  hip_device_assert(device_,
-                    hipGraphicsMapResources(1, &hip_graphics_resource_, queue_->stream()));
-  hip_device_assert(
-      device_, hipGraphicsResourceGetMappedPointer(&hip_buffer, &bytes, hip_graphics_resource_));
-
-  if (need_clear_) {
-    hip_device_assert(
-        device_,
-        hipMemsetD8Async(static_cast<hipDeviceptr_t>(hip_buffer), 0, bytes, queue_->stream()));
-
-    need_clear_ = false;
-  }
-
-  return static_cast<device_ptr>(hip_buffer);
-}
-
-void HIPDeviceGraphicsInterop::unmap()
-{
-  HIPContextScope scope(device_);
-
-  hip_device_assert(device_,
-                    hipGraphicsUnmapResources(1, &hip_graphics_resource_, queue_->stream()));
-}
-
-CCL_NAMESPACE_END
-
-#endif
--- a/intern/cycles/device/hip/graphics_interop.h
+++ b/intern/cycles/device/hip/graphics_interop.h
@@ -1,64 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_HIP
-
-#  include "device/device_graphics_interop.h"
-
-#  ifdef WITH_HIP_DYNLOAD
-#    include "hipew.h"
-#  endif
-
-CCL_NAMESPACE_BEGIN
-
-class HIPDevice;
-class HIPDeviceQueue;
-
-class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
- public:
-  explicit HIPDeviceGraphicsInterop(HIPDeviceQueue *queue);
-
-  HIPDeviceGraphicsInterop(const HIPDeviceGraphicsInterop &other) = delete;
-  HIPDeviceGraphicsInterop(HIPDeviceGraphicsInterop &&other) noexcept = delete;
-
-  ~HIPDeviceGraphicsInterop();
-
-  HIPDeviceGraphicsInterop &operator=(const HIPDeviceGraphicsInterop &other) = delete;
-  HIPDeviceGraphicsInterop &operator=(HIPDeviceGraphicsInterop &&other) = delete;
-
-  virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override;
-
-  virtual device_ptr map() override;
-  virtual void unmap() override;
-
- protected:
-  HIPDeviceQueue *queue_ = nullptr;
-  HIPDevice *device_ = nullptr;
-
-  /* OpenGL PBO which is currently registered as the destination for the CUDA buffer. */
-  uint opengl_pbo_id_ = 0;
-  /* Buffer area in pixels of the corresponding PBO. */
-  int64_t buffer_area_ = 0;
-
-  /* The destination was requested to be cleared. */
-  bool need_clear_ = false;
-
-  hipGraphicsResource hip_graphics_resource_ = nullptr;
-};
-
-CCL_NAMESPACE_END
-
-#endif
--- a/intern/cycles/device/hip/kernel.cpp
+++ b/intern/cycles/device/hip/kernel.cpp
@@ -1,69 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_HIP
-
-#  include "device/hip/kernel.h"
-#  include "device/hip/device_impl.h"
-
-CCL_NAMESPACE_BEGIN
-
-void HIPDeviceKernels::load(HIPDevice *device)
-{
-  hipModule_t hipModule = device->hipModule;
-
-  for (int i = 0; i < (int)DEVICE_KERNEL_NUM; i++) {
-    HIPDeviceKernel &kernel = kernels_[i];
-
-    /* No mega-kernel used for GPU. */
-    if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
-      continue;
-    }
-
-    const std::string function_name = std::string("kernel_gpu_") +
-                                      device_kernel_as_string((DeviceKernel)i);
-    hip_device_assert(device,
-                      hipModuleGetFunction(&kernel.function, hipModule, function_name.c_str()));
-
-    if (kernel.function) {
-      hip_device_assert(device, hipFuncSetCacheConfig(kernel.function, hipFuncCachePreferL1));
-
-      hip_device_assert(
-          device,
-          hipModuleOccupancyMaxPotentialBlockSize(
-              &kernel.min_blocks, &kernel.num_threads_per_block, kernel.function, 0, 0));
-    }
-    else {
-      LOG(ERROR) << "Unable to load kernel " << function_name;
-    }
-  }
-
-  loaded = true;
-}
-
-const HIPDeviceKernel &HIPDeviceKernels::get(DeviceKernel kernel) const
-{
-  return kernels_[(int)kernel];
-}
-
-bool HIPDeviceKernels::available(DeviceKernel kernel) const
-{
-  return kernels_[(int)kernel].function != nullptr;
-}
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_HIP*/
--- a/intern/cycles/device/hip/kernel.h
+++ b/intern/cycles/device/hip/kernel.h
@@ -1,54 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef WITH_HIP
-
-#  include "device/device_kernel.h"
-
-#  ifdef WITH_HIP_DYNLOAD
-#    include "hipew.h"
-#  endif
-
-CCL_NAMESPACE_BEGIN
-
-class HIPDevice;
-
-/* HIP kernel and associate occupancy information. */
-class HIPDeviceKernel {
- public:
-  hipFunction_t function = nullptr;
-
-  int num_threads_per_block = 0;
-  int min_blocks = 0;
-};
-
-/* Cache of HIP kernels for each DeviceKernel. */
-class HIPDeviceKernels {
- public:
-  void load(HIPDevice *device);
-  const HIPDeviceKernel &get(DeviceKernel kernel) const;
-  bool available(DeviceKernel kernel) const;
-
- protected:
-  HIPDeviceKernel kernels_[DEVICE_KERNEL_NUM];
-  bool loaded = false;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_HIP */
--- a/intern/cycles/device/hip/queue.cpp
+++ b/intern/cycles/device/hip/queue.cpp
@@ -1,209 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_HIP
-
-#  include "device/hip/queue.h"
-
-#  include "device/hip/device_impl.h"
-#  include "device/hip/graphics_interop.h"
-#  include "device/hip/kernel.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* HIPDeviceQueue */
-
-HIPDeviceQueue::HIPDeviceQueue(HIPDevice *device)
-    : DeviceQueue(device), hip_device_(device), hip_stream_(nullptr)
-{
-  const HIPContextScope scope(hip_device_);
-  hip_device_assert(hip_device_, hipStreamCreateWithFlags(&hip_stream_, hipStreamNonBlocking));
-}
-
-HIPDeviceQueue::~HIPDeviceQueue()
-{
-  const HIPContextScope scope(hip_device_);
-  hipStreamDestroy(hip_stream_);
-}
-
-int HIPDeviceQueue::num_concurrent_states(const size_t /*state_size*/) const
-{
-  /* TODO: compute automatically. */
-  /* TODO: must have at least num_threads_per_block. */
-  return 14416128;
-}
-
-int HIPDeviceQueue::num_concurrent_busy_states() const
-{
-  const int max_num_threads = hip_device_->get_num_multiprocessors() *
-                              hip_device_->get_max_num_threads_per_multiprocessor();
-
-  if (max_num_threads == 0) {
-    return 65536;
-  }
-
-  return 4 * max_num_threads;
-}
-
-void HIPDeviceQueue::init_execution()
-{
-  /* Synchronize all textures and memory copies before executing task. */
-  HIPContextScope scope(hip_device_);
-  hip_device_->load_texture_info();
-  hip_device_assert(hip_device_, hipDeviceSynchronize());
-
-  debug_init_execution();
-}
-
-bool HIPDeviceQueue::kernel_available(DeviceKernel kernel) const
-{
-  return hip_device_->kernels.available(kernel);
-}
-
-bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
-{
-  if (hip_device_->have_error()) {
-    return false;
-  }
-
-  debug_enqueue(kernel, work_size);
-
-  const HIPContextScope scope(hip_device_);
-  const HIPDeviceKernel &hip_kernel = hip_device_->kernels.get(kernel);
-
-  /* Compute kernel launch parameters. */
-  const int num_threads_per_block = hip_kernel.num_threads_per_block;
-  const int num_blocks = divide_up(work_size, num_threads_per_block);
-
-  int shared_mem_bytes = 0;
-
-  switch (kernel) {
-    case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
-      /* See parall_active_index.h for why this amount of shared memory is needed. */
-      shared_mem_bytes = (num_threads_per_block + 1) * sizeof(int);
-      break;
-    default:
-      break;
-  }
-
-  /* Launch kernel. */
-  hip_device_assert(hip_device_,
-                    hipModuleLaunchKernel(hip_kernel.function,
-                                          num_blocks,
-                                          1,
-                                          1,
-                                          num_threads_per_block,
-                                          1,
-                                          1,
-                                          shared_mem_bytes,
-                                          hip_stream_,
-                                          args,
-                                          0));
-  return !(hip_device_->have_error());
-}
-
-bool HIPDeviceQueue::synchronize()
-{
-  if (hip_device_->have_error()) {
-    return false;
-  }
-
-  const HIPContextScope scope(hip_device_);
-  hip_device_assert(hip_device_, hipStreamSynchronize(hip_stream_));
-  debug_synchronize();
-
-  return !(hip_device_->have_error());
-}
-
-void HIPDeviceQueue::zero_to_device(device_memory &mem)
-{
-  assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
-
-  if (mem.memory_size() == 0) {
-    return;
-  }
-
-  /* Allocate on demand. */
-  if (mem.device_pointer == 0) {
-    hip_device_->mem_alloc(mem);
-  }
-
-  /* Zero memory on device. */
-  assert(mem.device_pointer != 0);
-
-  const HIPContextScope scope(hip_device_);
-  hip_device_assert(
-      hip_device_,
-      hipMemsetD8Async((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size(), hip_stream_));
-}
-
-void HIPDeviceQueue::copy_to_device(device_memory &mem)
-{
-  assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
-
-  if (mem.memory_size() == 0) {
-    return;
-  }
-
-  /* Allocate on demand. */
-  if (mem.device_pointer == 0) {
-    hip_device_->mem_alloc(mem);
-  }
-
-  assert(mem.device_pointer != 0);
-  assert(mem.host_pointer != nullptr);
-
-  /* Copy memory to device. */
-  const HIPContextScope scope(hip_device_);
-  hip_device_assert(
-      hip_device_,
-      hipMemcpyHtoDAsync(
-          (hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size(), hip_stream_));
-}
-
-void HIPDeviceQueue::copy_from_device(device_memory &mem)
-{
-  assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
-
-  if (mem.memory_size() == 0) {
-    return;
-  }
-
-  assert(mem.device_pointer != 0);
-  assert(mem.host_pointer != nullptr);
-
-  /* Copy memory from device. */
-  const HIPContextScope scope(hip_device_);
-  hip_device_assert(
-      hip_device_,
-      hipMemcpyDtoHAsync(
-          mem.host_pointer, (hipDeviceptr_t)mem.device_pointer, mem.memory_size(), hip_stream_));
-}
-
-// TODO : (Arya) Enable this after stabilizing dev branch
-unique_ptr<DeviceGraphicsInterop> HIPDeviceQueue::graphics_interop_create()
-{
-  return make_unique<HIPDeviceGraphicsInterop>(this);
-}
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_HIP */
--- a/intern/cycles/device/hip/queue.h
+++ b/intern/cycles/device/hip/queue.h
@@ -1,68 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef WITH_HIP
-
-#  include "device/device_kernel.h"
-#  include "device/device_memory.h"
-#  include "device/device_queue.h"
-
-#  include "device/hip/util.h"
-
-CCL_NAMESPACE_BEGIN
-
-class HIPDevice;
-class device_memory;
-
-/* Base class for HIP queues. */
-class HIPDeviceQueue : public DeviceQueue {
- public:
-  HIPDeviceQueue(HIPDevice *device);
-  ~HIPDeviceQueue();
-
-  virtual int num_concurrent_states(const size_t state_size) const override;
-  virtual int num_concurrent_busy_states() const override;
-
-  virtual void init_execution() override;
-
-  virtual bool kernel_available(DeviceKernel kernel) const override;
-
-  virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;
-
-  virtual bool synchronize() override;
-
-  virtual void zero_to_device(device_memory &mem) override;
-  virtual void copy_to_device(device_memory &mem) override;
-  virtual void copy_from_device(device_memory &mem) override;
-
-  virtual hipStream_t stream()
-  {
-    return hip_stream_;
-  }
-
-  // TODO : (Arya) Enable this after stabilizing the dev branch
-  virtual unique_ptr<DeviceGraphicsInterop> graphics_interop_create() override;
-
- protected:
-  HIPDevice *hip_device_;
-  hipStream_t hip_stream_;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_HIP */
--- a/intern/cycles/device/hip/util.cpp
+++ b/intern/cycles/device/hip/util.cpp
@@ -1,61 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_HIP
-
-#  include "device/hip/util.h"
-#  include "device/hip/device_impl.h"
-
-CCL_NAMESPACE_BEGIN
-
-HIPContextScope::HIPContextScope(HIPDevice *device) : device(device)
-{
-  hip_device_assert(device, hipCtxPushCurrent(device->hipContext));
-}
-
-HIPContextScope::~HIPContextScope()
-{
-  hip_device_assert(device, hipCtxPopCurrent(NULL));
-}
-
-#  ifndef WITH_HIP_DYNLOAD
-const char *hipewErrorString(hipError_t result)
-{
-  /* We can only give error code here without major code duplication, that
-   * should be enough since dynamic loading is only being disabled by folks
-   * who knows what they're doing anyway.
-   *
-   * NOTE: Avoid call from several threads.
-   */
-  static string error;
-  error = string_printf("%d", result);
-  return error.c_str();
-}
-
-const char *hipewCompilerPath()
-{
-  return CYCLES_HIP_HIPCC_EXECUTABLE;
-}
-
-int hipewCompilerVersion()
-{
-  return (HIP_VERSION / 100) + (HIP_VERSION % 100 / 10);
-}
-#  endif
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_HIP */
--- a/intern/cycles/device/hip/util.h
+++ b/intern/cycles/device/hip/util.h
@@ -1,63 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef WITH_HIP
-
-#  ifdef WITH_HIP_DYNLOAD
-#    include "hipew.h"
-#  endif
-
-CCL_NAMESPACE_BEGIN
-
-class HIPDevice;
-
-/* Utility to push/pop HIP context. */
-class HIPContextScope {
- public:
-  HIPContextScope(HIPDevice *device);
-  ~HIPContextScope();
-
- private:
-  HIPDevice *device;
-};
-
-/* Utility for checking return values of HIP function calls. */
-#  define hip_device_assert(hip_device, stmt) \
-    { \
-      hipError_t result = stmt; \
-      if (result != hipSuccess) { \
-        const char *name = hipewErrorString(result); \
-        hip_device->set_error( \
-            string_printf("%s in %s (%s:%d)", name, #stmt, __FILE__, __LINE__)); \
-      } \
-    } \
-    (void)0
-
-#  define hip_assert(stmt) hip_device_assert(this, stmt)
-
-#  ifndef WITH_HIP_DYNLOAD
-/* Transparently implement some functions, so majority of the file does not need
- * to worry about difference between dynamically loaded and linked HIP at all. */
-const char *hipewErrorString(hipError_t result);
-const char *hipewCompilerPath();
-int hipewCompilerVersion();
-#  endif /* WITH_HIP_DYNLOAD */
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_HIP */
--- a/intern/cycles/device/multi/device.cpp
+++ b/intern/cycles/device/multi/device.cpp
@@ -315,14 +315,14 @@ class MultiDevice : public Device {
    stats.mem_alloc(mem.device_size - existing_size);
  }

-  void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override
+  void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override
  {
    device_ptr key = mem.device_pointer;
-    size_t i = 0, sub_h = h / devices.size();
+    int i = 0, sub_h = h / devices.size();

    foreach (SubDevice &sub, devices) {
-      size_t sy = y + i * sub_h;
-      size_t sh = (i == (size_t)devices.size() - 1) ? h - sub_h * i : sub_h;
+      int sy = y + i * sub_h;
+      int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;

      SubDevice *owner_sub = find_matching_mem_device(key, sub);
      mem.device = owner_sub->device;
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -315,11 +315,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
  group_descs[PG_HITS].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
  group_descs[PG_HITS].hitgroup.moduleAH = optix_module;
  group_descs[PG_HITS].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_shadow_all_hit";
-  group_descs[PG_HITV].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
-  group_descs[PG_HITV].hitgroup.moduleCH = optix_module;
-  group_descs[PG_HITV].hitgroup.entryFunctionNameCH = "__closesthit__kernel_optix_hit";
-  group_descs[PG_HITV].hitgroup.moduleAH = optix_module;
-  group_descs[PG_HITV].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_volume_test";

  if (kernel_features & KERNEL_FEATURE_HAIR) {
    if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
@@ -402,7 +397,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
  trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH);
  trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH);
  trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
-  trace_css = std::max(trace_css, stack_size[PG_HITV].cssIS + stack_size[PG_HITV].cssAH);
  trace_css = std::max(trace_css,
                       stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
  trace_css = std::max(trace_css,
@@ -427,7 +421,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
    pipeline_groups.push_back(groups[PG_HITD]);
    pipeline_groups.push_back(groups[PG_HITS]);
    pipeline_groups.push_back(groups[PG_HITL]);
-    pipeline_groups.push_back(groups[PG_HITV]);
    if (motion_blur) {
      pipeline_groups.push_back(groups[PG_HITD_MOTION]);
      pipeline_groups.push_back(groups[PG_HITS_MOTION]);
@@ -466,7 +459,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
    pipeline_groups.push_back(groups[PG_HITD]);
    pipeline_groups.push_back(groups[PG_HITS]);
    pipeline_groups.push_back(groups[PG_HITL]);
-    pipeline_groups.push_back(groups[PG_HITV]);
    if (motion_blur) {
      pipeline_groups.push_back(groups[PG_HITD_MOTION]);
      pipeline_groups.push_back(groups[PG_HITS_MOTION]);
@@ -1398,33 +1390,25 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
      /* Set user instance ID to object index (but leave low bit blank). */
      instance.instanceId = ob->get_device_index() << 1;

-      /* Add some of the object visibility bits to the mask.
-       * __prim_visibility contains the combined visibility bits of all instances, so is not
-       * reliable if they differ between instances. But the OptiX visibility mask can only contain
-       * 8 bits, so have to trade-off here and select just a few important ones.
-       */
-      instance.visibilityMask = ob->visibility_for_tracing() & 0xFF;
-
      /* Have to have at least one bit in the mask, or else instance would always be culled. */
-      if (0 == instance.visibilityMask) {
-        instance.visibilityMask = 0xFF;
+      instance.visibilityMask = 1;
+
+      if (ob->get_geometry()->has_volume) {
+        /* Volumes have a special bit set in the visibility mask so a trace can mask only volumes.
+         */
+        instance.visibilityMask |= 2;
      }

-      if (ob->get_geometry()->geometry_type == Geometry::HAIR &&
-          static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) {
-        if (motion_blur && ob->get_geometry()->has_motion_blur()) {
+      if (ob->get_geometry()->geometry_type == Geometry::HAIR) {
+        /* Same applies to curves (so they can be skipped in local trace calls). */
+        instance.visibilityMask |= 4;
+
+        if (motion_blur && ob->get_geometry()->has_motion_blur() &&
+            static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) {
          /* Select between motion blur and non-motion blur built-in intersection module. */
          instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
        }
      }
-      else {
-        /* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves,
-         * since it needs to filter out end-caps there).
-         * It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit
-         * programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT.
-         */
-        instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT;
-      }

      /* Insert motion traversable if object has motion. */
      if (motion_blur && ob->use_motion()) {
@@ -1490,7 +1474,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
        delete[] reinterpret_cast<uint8_t *>(&motion_transform);

        /* Disable instance transform if object uses motion transform already. */
-        instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
+        instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;

        /* Get traversable handle to motion transform. */
        optixConvertPointerToTraversableHandle(context,
@@ -1507,7 +1491,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
        }
        else {
          /* Disable instance transform if geometry already has it applied to vertex data. */
-          instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
+          instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
          /* Non-instanced objects read ID from 'prim_object', so distinguish
           * them from instanced objects with the low bit set. */
          instance.instanceId |= 1;
--- a/intern/cycles/device/optix/device_impl.h
+++ b/intern/cycles/device/optix/device_impl.h
@@ -40,7 +40,6 @@ enum {
  PG_HITD, /* Default hit group. */
  PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */
  PG_HITL, /* __BVH_LOCAL__ hit group (only used for triangles). */
-  PG_HITV, /* __VOLUME__ hit group. */
  PG_HITD_MOTION,
  PG_HITS_MOTION,
  PG_CALL_SVM_AO,
@@ -52,7 +51,7 @@ enum {
 static const int MISS_PROGRAM_GROUP_OFFSET = PG_MISS;
 static const int NUM_MIS_PROGRAM_GROUPS = 1;
 static const int HIT_PROGAM_GROUP_OFFSET = PG_HITD;
-static const int NUM_HIT_PROGRAM_GROUPS = 6;
+static const int NUM_HIT_PROGRAM_GROUPS = 5;
 static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
 static const int NUM_CALLABLE_PROGRAM_GROUPS = 3;

--- a/intern/cycles/integrator/CMakeLists.txt
+++ b/intern/cycles/integrator/CMakeLists.txt
@@ -27,8 +27,6 @@ set(SRC
  pass_accessor.cpp
  pass_accessor_cpu.cpp
  pass_accessor_gpu.cpp
-  path_trace_display.cpp
-  path_trace_tile.cpp
  path_trace_work.cpp
  path_trace_work_cpu.cpp
  path_trace_work_gpu.cpp
@@ -49,8 +47,6 @@ set(SRC_HEADERS
  pass_accessor.h
  pass_accessor_cpu.h
  pass_accessor_gpu.h
-  path_trace_display.h
-  path_trace_tile.h
  path_trace_work.h
  path_trace_work_cpu.h
  path_trace_work_gpu.h
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -19,9 +19,8 @@
 #include "device/cpu/device.h"
 #include "device/device.h"
 #include "integrator/pass_accessor.h"
-#include "integrator/path_trace_display.h"
-#include "integrator/path_trace_tile.h"
 #include "integrator/render_scheduler.h"
+#include "render/gpu_display.h"
 #include "render/pass.h"
 #include "render/scene.h"
 #include "render/tile.h"
@@ -68,11 +67,11 @@ PathTrace::PathTrace(Device *device,
 PathTrace::~PathTrace()
 {
  /* Destroy any GPU resource which was used for graphics interop.
-   * Need to have access to the PathTraceDisplay as it is the only source of drawing context which
-   * is used for interop. */
-  if (display_) {
+   * Need to have access to the GPUDisplay as it is the only source of drawing context which is
+   * used for interop. */
+  if (gpu_display_) {
    for (auto &&path_trace_work : path_trace_works_) {
-      path_trace_work->destroy_gpu_resources(display_.get());
+      path_trace_work->destroy_gpu_resources(gpu_display_.get());
    }
  }
 }
@@ -95,7 +94,7 @@ bool PathTrace::ready_to_reset()
 {
  /* The logic here is optimized for the best feedback in the viewport, which implies having a GPU
   * display. Of there is no such display, the logic here will break. */
-  DCHECK(display_);
+  DCHECK(gpu_display_);

  /* The logic here tries to provide behavior which feels the most interactive feel to artists.
   * General idea is to be able to reset as quickly as possible, while still providing interactive
@@ -127,8 +126,8 @@ void PathTrace::reset(const BufferParams &full_params, const BufferParams &big_t
  /* NOTE: GPU display checks for buffer modification and avoids unnecessary re-allocation.
   * It is requires to inform about reset whenever it happens, so that the redraw state tracking is
   * properly updated. */
-  if (display_) {
-    display_->reset(full_params);
+  if (gpu_display_) {
+    gpu_display_->reset(full_params);
  }

  render_state_.has_denoised_result = false;
@@ -245,7 +244,7 @@ static void foreach_sliced_buffer_params(const vector<unique_ptr<PathTraceWork>>
    const int slice_height = max(lround(height * weight), 1);

    /* Disallow negative values to deal with situations when there are more compute devices than
-     * scan-lines. */
+     * scanlines. */
    const int remaining_height = max(0, height - current_y);

    BufferParams slide_params = buffer_params;
@@ -536,35 +535,25 @@ void PathTrace::denoise(const RenderWork &render_work)
  render_scheduler_.report_denoise_time(render_work, time_dt() - start_time);
 }

-void PathTrace::set_output_driver(unique_ptr<OutputDriver> driver)
+void PathTrace::set_gpu_display(unique_ptr<GPUDisplay> gpu_display)
 {
-  output_driver_ = move(driver);
+  gpu_display_ = move(gpu_display);
 }

-void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
+void PathTrace::clear_gpu_display()
 {
-  if (driver) {
-    display_ = make_unique<PathTraceDisplay>(move(driver));
-  }
-  else {
-    display_ = nullptr;
-  }
-}
-
-void PathTrace::clear_display()
-{
-  if (display_) {
-    display_->clear();
+  if (gpu_display_) {
+    gpu_display_->clear();
  }
 }

 void PathTrace::draw()
 {
-  if (!display_) {
+  if (!gpu_display_) {
    return;
  }

-  did_draw_after_reset_ |= display_->draw();
+  did_draw_after_reset_ |= gpu_display_->draw();
 }

 void PathTrace::update_display(const RenderWork &render_work)
@@ -573,32 +562,31 @@ void PathTrace::update_display(const RenderWork &render_work)
    return;
  }

-  if (!display_ && !output_driver_) {
+  if (!gpu_display_ && !tile_buffer_update_cb) {
    VLOG(3) << "Ignore display update.";
    return;
  }

  if (full_params_.width == 0 || full_params_.height == 0) {
-    VLOG(3) << "Skipping PathTraceDisplay update due to 0 size of the render buffer.";
+    VLOG(3) << "Skipping GPUDisplay update due to 0 size of the render buffer.";
    return;
  }

  const double start_time = time_dt();

-  if (output_driver_) {
+  if (tile_buffer_update_cb) {
    VLOG(3) << "Invoke buffer update callback.";

-    PathTraceTile tile(*this);
-    output_driver_->update_render_tile(tile);
+    tile_buffer_update_cb();
  }

-  if (display_) {
+  if (gpu_display_) {
    VLOG(3) << "Perform copy to GPUDisplay work.";

    const int resolution_divider = render_work.resolution_divider;
    const int texture_width = max(1, full_params_.width / resolution_divider);
    const int texture_height = max(1, full_params_.height / resolution_divider);
-    if (!display_->update_begin(texture_width, texture_height)) {
+    if (!gpu_display_->update_begin(texture_width, texture_height)) {
      LOG(ERROR) << "Error beginning GPUDisplay update.";
      return;
    }
@@ -612,10 +600,10 @@ void PathTrace::update_display(const RenderWork &render_work)
     * all works in parallel. */
    const int num_samples = get_num_samples_in_buffer();
    for (auto &&path_trace_work : path_trace_works_) {
-      path_trace_work->copy_to_display(display_.get(), pass_mode, num_samples);
+      path_trace_work->copy_to_gpu_display(gpu_display_.get(), pass_mode, num_samples);
    }

-    display_->update_end();
+    gpu_display_->update_end();
  }

  render_scheduler_.report_display_update_time(render_work, time_dt() - start_time);
@@ -765,26 +753,20 @@ bool PathTrace::is_cancel_requested()

 void PathTrace::tile_buffer_write()
 {
-  if (!output_driver_) {
+  if (!tile_buffer_write_cb) {
    return;
  }

-  PathTraceTile tile(*this);
-  output_driver_->write_render_tile(tile);
+  tile_buffer_write_cb();
 }

 void PathTrace::tile_buffer_read()
 {
-  if (!device_scene_->data.bake.use) {
+  if (!tile_buffer_read_cb) {
    return;
  }

-  if (!output_driver_) {
-    return;
-  }
-
-  PathTraceTile tile(*this);
-  if (output_driver_->read_render_tile(tile)) {
+  if (tile_buffer_read_cb()) {
    tbb::parallel_for_each(path_trace_works_, [](unique_ptr<PathTraceWork> &path_trace_work) {
      path_trace_work->copy_render_buffers_to_device();
    });
@@ -819,7 +801,7 @@ void PathTrace::tile_buffer_write_to_disk()
  }

  if (!tile_manager_.write_tile(*buffers)) {
-    device_->set_error("Error writing tile to file");
+    LOG(ERROR) << "Error writing tile to file.";
  }
 }

@@ -912,14 +894,7 @@ void PathTrace::process_full_buffer_from_disk(string_view filename)

  DenoiseParams denoise_params;
  if (!tile_manager_.read_full_buffer_from_disk(filename, &full_frame_buffers, &denoise_params)) {
-    const string error_message = "Error reading tiles from file";
-    if (progress_) {
-      progress_->set_error(error_message);
-      progress_->set_cancel(error_message);
-    }
-    else {
-      LOG(ERROR) << error_message;
-    }
+    LOG(ERROR) << "Error reading tiles from file.";
    return;
  }

@@ -1023,11 +998,6 @@ int2 PathTrace::get_render_tile_offset() const
  return make_int2(tile.x, tile.y);
 }

-int2 PathTrace::get_render_size() const
-{
-  return tile_manager_.get_size();
-}
-
 const BufferParams &PathTrace::get_render_tile_params() const
 {
  if (full_frame_state_.render_buffers) {
@@ -1058,8 +1028,6 @@ static const char *device_type_for_description(const DeviceType type)
      return "CUDA";
    case DEVICE_OPTIX:
      return "OptiX";
-    case DEVICE_HIP:
-      return "HIP";
    case DEVICE_DUMMY:
      return "Dummy";
    case DEVICE_MULTI:
--- a/intern/cycles/integrator/path_trace.h
+++ b/intern/cycles/integrator/path_trace.h
@@ -31,14 +31,12 @@ CCL_NAMESPACE_BEGIN
 class AdaptiveSampling;
 class Device;
 class DeviceScene;
-class DisplayDriver;
 class Film;
 class RenderBuffers;
 class RenderScheduler;
 class RenderWork;
-class PathTraceDisplay;
-class OutputDriver;
 class Progress;
+class GPUDisplay;
 class TileManager;

 /* PathTrace class takes care of kernel graph and scheduling on a (multi)device. It takes care of
@@ -100,16 +98,13 @@ class PathTrace {
   * Use this to configure the adaptive sampler before rendering any samples. */
  void set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling);

-  /* Sets output driver for render buffer output. */
-  void set_output_driver(unique_ptr<OutputDriver> driver);
+  /* Set GPU display which takes care of drawing the render result. */
+  void set_gpu_display(unique_ptr<GPUDisplay> gpu_display);

-  /* Set display driver for interactive render buffer display. */
-  void set_display_driver(unique_ptr<DisplayDriver> driver);
+  /* Clear the GPU display by filling it in with all zeroes. */
+  void clear_gpu_display();

-  /* Clear the display buffer by filling it in with all zeroes. */
-  void clear_display();
-
-  /* Perform drawing of the current state of the DisplayDriver. */
+  /* Perform drawing of the current state of the GPUDisplay. */
  void draw();

  /* Cancel rendering process as soon as possible, without waiting for full tile to be sampled.
@@ -162,7 +157,6 @@ class PathTrace {
   * instead. */
  int2 get_render_tile_size() const;
  int2 get_render_tile_offset() const;
-  int2 get_render_size() const;

  /* Get buffer parameters of the current tile.
   *
@@ -174,6 +168,18 @@ class PathTrace {
   * times, and so on. */
  string full_report() const;

+  /* Callback which communicates an updates state of the render buffer of the current big tile.
+   * Is called during path tracing to communicate work-in-progress state of the final buffer. */
+  function<void(void)> tile_buffer_update_cb;
+
+  /* Callback which communicates final rendered buffer. Is called after path-tracing is done. */
+  function<void(void)> tile_buffer_write_cb;
+
+  /* Callback which initializes rendered buffer. Is called before path-tracing starts.
+   *
+   * This is used for baking. */
+  function<bool(void)> tile_buffer_read_cb;
+
  /* Callback which is called to report current rendering progress.
   *
   * It is supposed to be cheaper than buffer update/write, hence can be called more often.
@@ -246,11 +252,7 @@ class PathTrace {
  RenderScheduler &render_scheduler_;
  TileManager &tile_manager_;

-  /* Display driver for interactive render buffer display. */
-  unique_ptr<PathTraceDisplay> display_;
-
-  /* Output driver to write render buffer to. */
-  unique_ptr<OutputDriver> output_driver_;
+  unique_ptr<GPUDisplay> gpu_display_;

  /* Per-compute device descriptors of work which is responsible for path tracing on its configured
   * device. */
@@ -284,7 +286,7 @@ class PathTrace {
    /* Parameters of the big tile with the current resolution divider applied. */
    BufferParams effective_big_tile_params;

-    /* Denoiser was run and there are denoised versions of the passes in the render buffers. */
+    /* Denosier was run and there are denoised versions of the passes in the render buffers. */
    bool has_denoised_result = false;

    /* Current tile has been written (to either disk or callback.
--- a/intern/cycles/integrator/path_trace_tile.cpp
+++ b/intern/cycles/integrator/path_trace_tile.cpp
@@ -1,107 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "integrator/path_trace_tile.h"
-#include "integrator/pass_accessor_cpu.h"
-#include "integrator/path_trace.h"
-
-#include "render/buffers.h"
-#include "render/film.h"
-#include "render/pass.h"
-#include "render/scene.h"
-
-CCL_NAMESPACE_BEGIN
-
-PathTraceTile::PathTraceTile(PathTrace &path_trace)
-    : OutputDriver::Tile(path_trace.get_render_tile_offset(),
-                         path_trace.get_render_tile_size(),
-                         path_trace.get_render_size(),
-                         path_trace.get_render_tile_params().layer,
-                         path_trace.get_render_tile_params().view),
-      path_trace_(path_trace),
-      copied_from_device_(false)
-{
-}
-
-bool PathTraceTile::get_pass_pixels(const string_view pass_name,
-                                    const int num_channels,
-                                    float *pixels) const
-{
-  /* NOTE: The code relies on a fact that session is fully update and no scene/buffer modification
-   * is happening while this function runs. */
-
-  if (!copied_from_device_) {
-    /* Copy from device on demand. */
-    path_trace_.copy_render_tile_from_device();
-    const_cast<PathTraceTile *>(this)->copied_from_device_ = true;
-  }
-
-  const BufferParams &buffer_params = path_trace_.get_render_tile_params();
-
-  const BufferPass *pass = buffer_params.find_pass(pass_name);
-  if (pass == nullptr) {
-    return false;
-  }
-
-  const bool has_denoised_result = path_trace_.has_denoised_result();
-  if (pass->mode == PassMode::DENOISED && !has_denoised_result) {
-    pass = buffer_params.find_pass(pass->type);
-    if (pass == nullptr) {
-      /* Happens when denoised result pass is requested but is never written by the kernel. */
-      return false;
-    }
-  }
-
-  pass = buffer_params.get_actual_display_pass(pass);
-
-  const float exposure = buffer_params.exposure;
-  const int num_samples = path_trace_.get_num_render_tile_samples();
-
-  PassAccessor::PassAccessInfo pass_access_info(*pass);
-  pass_access_info.use_approximate_shadow_catcher = buffer_params.use_approximate_shadow_catcher;
-  pass_access_info.use_approximate_shadow_catcher_background =
-      pass_access_info.use_approximate_shadow_catcher && !buffer_params.use_transparent_background;
-
-  const PassAccessorCPU pass_accessor(pass_access_info, exposure, num_samples);
-  const PassAccessor::Destination destination(pixels, num_channels);
-
-  return path_trace_.get_render_tile_pixels(pass_accessor, destination);
-}
-
-bool PathTraceTile::set_pass_pixels(const string_view pass_name,
-                                    const int num_channels,
-                                    const float *pixels) const
-{
-  /* NOTE: The code relies on a fact that session is fully update and no scene/buffer modification
-   * is happening while this function runs. */
-
-  const BufferParams &buffer_params = path_trace_.get_render_tile_params();
-  const BufferPass *pass = buffer_params.find_pass(pass_name);
-  if (!pass) {
-    return false;
-  }
-
-  const float exposure = buffer_params.exposure;
-  const int num_samples = 1;
-
-  const PassAccessor::PassAccessInfo pass_access_info(*pass);
-  PassAccessorCPU pass_accessor(pass_access_info, exposure, num_samples);
-  PassAccessor::Source source(pixels, num_channels);
-
-  return path_trace_.set_render_tile_pixels(pass_accessor, source);
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/integrator/path_trace_tile.h
+++ b/intern/cycles/integrator/path_trace_tile.h
@@ -1,43 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "render/output_driver.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* PathTraceTile
- *
- * Implementation of OutputDriver::Tile interface for path tracer. */
-
-class PathTrace;
-
-class PathTraceTile : public OutputDriver::Tile {
- public:
-  PathTraceTile(PathTrace &path_trace);
-
-  bool get_pass_pixels(const string_view pass_name, const int num_channels, float *pixels) const;
-  bool set_pass_pixels(const string_view pass_name,
-                       const int num_channels,
-                       const float *pixels) const;
-
- private:
-  PathTrace &path_trace_;
-  bool copied_from_device_;
-};
-
-CCL_NAMESPACE_END
--- a/intern/cycles/integrator/path_trace_work.cpp
+++ b/intern/cycles/integrator/path_trace_work.cpp
@@ -16,12 +16,12 @@

 #include "device/device.h"

-#include "integrator/path_trace_display.h"
 #include "integrator/path_trace_work.h"
 #include "integrator/path_trace_work_cpu.h"
 #include "integrator/path_trace_work_gpu.h"
 #include "render/buffers.h"
 #include "render/film.h"
+#include "render/gpu_display.h"
 #include "render/scene.h"

 #include "kernel/kernel_types.h"
@@ -185,12 +185,12 @@ PassAccessor::PassAccessInfo PathTraceWork::get_display_pass_access_info(PassMod
  return pass_access_info;
 }

-PassAccessor::Destination PathTraceWork::get_display_destination_template(
-    const PathTraceDisplay *display) const
+PassAccessor::Destination PathTraceWork::get_gpu_display_destination_template(
+    const GPUDisplay *gpu_display) const
 {
  PassAccessor::Destination destination(film_->get_display_pass());

-  const int2 display_texture_size = display->get_texture_size();
+  const int2 display_texture_size = gpu_display->get_texture_size();
  const int texture_x = effective_buffer_params_.full_x - effective_full_params_.full_x;
  const int texture_y = effective_buffer_params_.full_y - effective_full_params_.full_y;

--- a/intern/cycles/integrator/path_trace_work.h
+++ b/intern/cycles/integrator/path_trace_work.h
@@ -28,7 +28,7 @@ class BufferParams;
 class Device;
 class DeviceScene;
 class Film;
-class PathTraceDisplay;
+class GPUDisplay;
 class RenderBuffers;

 class PathTraceWork {
@@ -83,9 +83,11 @@ class PathTraceWork {
   * noisy pass mode will be passed here when it is known that the buffer does not have denoised
   * passes yet (because denoiser did not run). If the denoised pass is requested and denoiser is
   * not used then this function will fall-back to the noisy pass instead. */
-  virtual void copy_to_display(PathTraceDisplay *display, PassMode pass_mode, int num_samples) = 0;
+  virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
+                                   PassMode pass_mode,
+                                   int num_samples) = 0;

-  virtual void destroy_gpu_resources(PathTraceDisplay *display) = 0;
+  virtual void destroy_gpu_resources(GPUDisplay *gpu_display) = 0;

  /* Copy data from/to given render buffers.
   * Will copy pixels from a corresponding place (from multi-device point of view) of the render
@@ -102,7 +104,7 @@ class PathTraceWork {
   * - Copies work's render buffer to its device. */
  void copy_from_render_buffers(const RenderBuffers *render_buffers);

-  /* Special version of the `copy_from_render_buffers()` which only copies denoised passes from the
+  /* Special version of the `copy_from_render_buffers()` which only copies denosied passes from the
   * given render buffers, leaving rest of the passes.
   *
   * Same notes about device copying applies to this call as well. */
@@ -160,8 +162,8 @@ class PathTraceWork {

  /* Get destination which offset and stride are configured so that writing to it will write to a
   * proper location of GPU display texture, taking current tile and device slice into account. */
-  PassAccessor::Destination get_display_destination_template(
-      const PathTraceDisplay *display) const;
+  PassAccessor::Destination get_gpu_display_destination_template(
+      const GPUDisplay *gpu_display) const;

  /* Device which will be used for path tracing.
   * Note that it is an actual render device (and never is a multi-device). */
--- a/intern/cycles/integrator/path_trace_work_cpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_cpu.cpp
@@ -19,12 +19,10 @@
 #include "device/cpu/kernel.h"
 #include "device/device.h"

-#include "kernel/kernel_path_state.h"
-
 #include "integrator/pass_accessor_cpu.h"
-#include "integrator/path_trace_display.h"

 #include "render/buffers.h"
+#include "render/gpu_display.h"
 #include "render/scene.h"

 #include "util/util_atomic.h"
@@ -118,17 +116,13 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global
                                                    const KernelWorkTile &work_tile,
                                                    const int samples_num)
 {
+  const bool has_shadow_catcher = device_scene_->data.integrator.has_shadow_catcher;
  const bool has_bake = device_scene_->data.bake.use;

-  IntegratorStateCPU integrator_states[2];
+  IntegratorStateCPU integrator_states[2] = {};

  IntegratorStateCPU *state = &integrator_states[0];
-  IntegratorStateCPU *shadow_catcher_state = nullptr;
-
-  if (device_scene_->data.integrator.has_shadow_catcher) {
-    shadow_catcher_state = &integrator_states[1];
-    path_state_init_queues(kernel_globals, shadow_catcher_state);
-  }
+  IntegratorStateCPU *shadow_catcher_state = &integrator_states[1];

  KernelWorkTile sample_work_tile = work_tile;
  float *render_buffer = buffers_->buffer.data();
@@ -153,7 +147,7 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global

    kernels_.integrator_megakernel(kernel_globals, state, render_buffer);

-    if (shadow_catcher_state) {
+    if (has_shadow_catcher) {
      kernels_.integrator_megakernel(kernel_globals, shadow_catcher_state, render_buffer);
    }

@@ -161,14 +155,14 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global
  }
 }

-void PathTraceWorkCPU::copy_to_display(PathTraceDisplay *display,
-                                       PassMode pass_mode,
-                                       int num_samples)
+void PathTraceWorkCPU::copy_to_gpu_display(GPUDisplay *gpu_display,
+                                           PassMode pass_mode,
+                                           int num_samples)
 {
-  half4 *rgba_half = display->map_texture_buffer();
+  half4 *rgba_half = gpu_display->map_texture_buffer();
  if (!rgba_half) {
-    /* TODO(sergey): Look into using copy_to_display() if mapping failed. Might be needed for
-     * some implementations of PathTraceDisplay which can not map memory? */
+    /* TODO(sergey): Look into using copy_to_gpu_display() if mapping failed. Might be needed for
+     * some implementations of GPUDisplay which can not map memory? */
    return;
  }

@@ -178,7 +172,7 @@ void PathTraceWorkCPU::copy_to_display(PathTraceDisplay *display,

  const PassAccessorCPU pass_accessor(pass_access_info, kfilm.exposure, num_samples);

-  PassAccessor::Destination destination = get_display_destination_template(display);
+  PassAccessor::Destination destination = get_gpu_display_destination_template(gpu_display);
  destination.pixels_half_rgba = rgba_half;

  tbb::task_arena local_arena = local_tbb_arena_create(device_);
@@ -186,10 +180,10 @@ void PathTraceWorkCPU::copy_to_display(PathTraceDisplay *display,
    pass_accessor.get_render_tile_pixels(buffers_.get(), effective_buffer_params_, destination);
  });

-  display->unmap_texture_buffer();
+  gpu_display->unmap_texture_buffer();
 }

-void PathTraceWorkCPU::destroy_gpu_resources(PathTraceDisplay * /*display*/)
+void PathTraceWorkCPU::destroy_gpu_resources(GPUDisplay * /*gpu_display*/)
 {
 }

--- a/intern/cycles/integrator/path_trace_work_cpu.h
+++ b/intern/cycles/integrator/path_trace_work_cpu.h
@@ -50,10 +50,10 @@ class PathTraceWorkCPU : public PathTraceWork {
                              int start_sample,
                              int samples_num) override;

-  virtual void copy_to_display(PathTraceDisplay *display,
-                               PassMode pass_mode,
-                               int num_samples) override;
-  virtual void destroy_gpu_resources(PathTraceDisplay *display) override;
+  virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
+                                   PassMode pass_mode,
+                                   int num_samples) override;
+  virtual void destroy_gpu_resources(GPUDisplay *gpu_display) override;

  virtual bool copy_render_buffers_from_device() override;
  virtual bool copy_render_buffers_to_device() override;
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -15,12 +15,12 @@
 */

 #include "integrator/path_trace_work_gpu.h"
-#include "integrator/path_trace_display.h"

 #include "device/device.h"

 #include "integrator/pass_accessor_gpu.h"
 #include "render/buffers.h"
+#include "render/gpu_display.h"
 #include "render/scene.h"
 #include "util/util_logging.h"
 #include "util/util_tbb.h"
@@ -46,7 +46,7 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
      queued_paths_(device, "queued_paths", MEM_READ_WRITE),
      num_queued_paths_(device, "num_queued_paths", MEM_READ_WRITE),
      work_tiles_(device, "work_tiles", MEM_READ_WRITE),
-      display_rgba_half_(device, "display buffer half", MEM_READ_WRITE),
+      gpu_display_rgba_half_(device, "display buffer half", MEM_READ_WRITE),
      max_num_paths_(queue_->num_concurrent_states(sizeof(IntegratorStateCPU))),
      min_num_active_paths_(queue_->num_concurrent_busy_states()),
      max_active_path_index_(0)
@@ -95,8 +95,8 @@ void PathTraceWorkGPU::alloc_integrator_soa()
 #define KERNEL_STRUCT_END(name) \
  break; \
  }
-#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
-  if (array_index == gpu_array_size - 1) { \
+#define KERNEL_STRUCT_END_ARRAY(name, array_size) \
+  if (array_index == array_size - 1) { \
    break; \
  } \
  }
@@ -652,7 +652,7 @@ int PathTraceWorkGPU::get_num_active_paths()
 bool PathTraceWorkGPU::should_use_graphics_interop()
 {
  /* There are few aspects with the graphics interop when using multiple devices caused by the fact
-   * that the PathTraceDisplay has a single texture:
+   * that the GPUDisplay has a single texture:
   *
   *   CUDA will return `CUDA_ERROR_NOT_SUPPORTED` from `cuGraphicsGLRegisterBuffer()` when
   *   attempting to register OpenGL PBO which has been mapped. Which makes sense, because
@@ -678,9 +678,9 @@ bool PathTraceWorkGPU::should_use_graphics_interop()
  return interop_use_;
 }

-void PathTraceWorkGPU::copy_to_display(PathTraceDisplay *display,
-                                       PassMode pass_mode,
-                                       int num_samples)
+void PathTraceWorkGPU::copy_to_gpu_display(GPUDisplay *gpu_display,
+                                           PassMode pass_mode,
+                                           int num_samples)
 {
  if (device_->have_error()) {
    /* Don't attempt to update GPU display if the device has errors: the error state will make
@@ -694,7 +694,7 @@ void PathTraceWorkGPU::copy_to_display(PathTraceDisplay *display,
  }

  if (should_use_graphics_interop()) {
-    if (copy_to_display_interop(display, pass_mode, num_samples)) {
+    if (copy_to_gpu_display_interop(gpu_display, pass_mode, num_samples)) {
      return;
    }

@@ -703,12 +703,12 @@ void PathTraceWorkGPU::copy_to_display(PathTraceDisplay *display,
    interop_use_ = false;
  }

-  copy_to_display_naive(display, pass_mode, num_samples);
+  copy_to_gpu_display_naive(gpu_display, pass_mode, num_samples);
 }

-void PathTraceWorkGPU::copy_to_display_naive(PathTraceDisplay *display,
-                                             PassMode pass_mode,
-                                             int num_samples)
+void PathTraceWorkGPU::copy_to_gpu_display_naive(GPUDisplay *gpu_display,
+                                                 PassMode pass_mode,
+                                                 int num_samples)
 {
  const int full_x = effective_buffer_params_.full_x;
  const int full_y = effective_buffer_params_.full_y;
@@ -725,42 +725,43 @@ void PathTraceWorkGPU::copy_to_display_naive(PathTraceDisplay *display,
   * NOTE: allocation happens to the final resolution so that no re-allocation happens on every
   * change of the resolution divider. However, if the display becomes smaller, shrink the
   * allocated memory as well. */
-  if (display_rgba_half_.data_width != final_width ||
-      display_rgba_half_.data_height != final_height) {
-    display_rgba_half_.alloc(final_width, final_height);
+  if (gpu_display_rgba_half_.data_width != final_width ||
+      gpu_display_rgba_half_.data_height != final_height) {
+    gpu_display_rgba_half_.alloc(final_width, final_height);
    /* TODO(sergey): There should be a way to make sure device-side memory is allocated without
     * transferring zeroes to the device. */
-    queue_->zero_to_device(display_rgba_half_);
+    queue_->zero_to_device(gpu_display_rgba_half_);
  }

  PassAccessor::Destination destination(film_->get_display_pass());
-  destination.d_pixels_half_rgba = display_rgba_half_.device_pointer;
+  destination.d_pixels_half_rgba = gpu_display_rgba_half_.device_pointer;

  get_render_tile_film_pixels(destination, pass_mode, num_samples);

-  queue_->copy_from_device(display_rgba_half_);
-  queue_->synchronize();
+  gpu_display_rgba_half_.copy_from_device();

-  display->copy_pixels_to_texture(display_rgba_half_.data(), texture_x, texture_y, width, height);
+  gpu_display->copy_pixels_to_texture(
+      gpu_display_rgba_half_.data(), texture_x, texture_y, width, height);
 }

-bool PathTraceWorkGPU::copy_to_display_interop(PathTraceDisplay *display,
-                                               PassMode pass_mode,
-                                               int num_samples)
+bool PathTraceWorkGPU::copy_to_gpu_display_interop(GPUDisplay *gpu_display,
+                                                   PassMode pass_mode,
+                                                   int num_samples)
 {
  if (!device_graphics_interop_) {
    device_graphics_interop_ = queue_->graphics_interop_create();
  }

-  const DisplayDriver::GraphicsInterop graphics_interop_dst = display->graphics_interop_get();
-  device_graphics_interop_->set_display_interop(graphics_interop_dst);
+  const DeviceGraphicsInteropDestination graphics_interop_dst =
+      gpu_display->graphics_interop_get();
+  device_graphics_interop_->set_destination(graphics_interop_dst);

  const device_ptr d_rgba_half = device_graphics_interop_->map();
  if (!d_rgba_half) {
    return false;
  }

-  PassAccessor::Destination destination = get_display_destination_template(display);
+  PassAccessor::Destination destination = get_gpu_display_destination_template(gpu_display);
  destination.d_pixels_half_rgba = d_rgba_half;

  get_render_tile_film_pixels(destination, pass_mode, num_samples);
@@ -770,14 +771,14 @@ bool PathTraceWorkGPU::copy_to_display_interop(PathTraceDisplay *display,
  return true;
 }

-void PathTraceWorkGPU::destroy_gpu_resources(PathTraceDisplay *display)
+void PathTraceWorkGPU::destroy_gpu_resources(GPUDisplay *gpu_display)
 {
  if (!device_graphics_interop_) {
    return;
  }
-  display->graphics_interop_activate();
+  gpu_display->graphics_interop_activate();
  device_graphics_interop_ = nullptr;
-  display->graphics_interop_deactivate();
+  gpu_display->graphics_interop_deactivate();
 }

 void PathTraceWorkGPU::get_render_tile_film_pixels(const PassAccessor::Destination &destination,
--- a/intern/cycles/integrator/path_trace_work_gpu.h
+++ b/intern/cycles/integrator/path_trace_work_gpu.h
@@ -48,10 +48,10 @@ class PathTraceWorkGPU : public PathTraceWork {
                              int start_sample,
                              int samples_num) override;

-  virtual void copy_to_display(PathTraceDisplay *display,
-                               PassMode pass_mode,
-                               int num_samples) override;
-  virtual void destroy_gpu_resources(PathTraceDisplay *display) override;
+  virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
+                                   PassMode pass_mode,
+                                   int num_samples) override;
+  virtual void destroy_gpu_resources(GPUDisplay *gpu_display) override;

  virtual bool copy_render_buffers_from_device() override;
  virtual bool copy_render_buffers_to_device() override;
@@ -88,16 +88,16 @@ class PathTraceWorkGPU : public PathTraceWork {

  int get_num_active_paths();

-  /* Check whether graphics interop can be used for the PathTraceDisplay update. */
+  /* Check whether graphics interop can be used for the GPUDisplay update. */
  bool should_use_graphics_interop();

-  /* Naive implementation of the `copy_to_display()` which performs film conversion on the
-   * device, then copies pixels to the host and pushes them to the `display`. */
-  void copy_to_display_naive(PathTraceDisplay *display, PassMode pass_mode, int num_samples);
+  /* Naive implementation of the `copy_to_gpu_display()` which performs film conversion on the
+   * device, then copies pixels to the host and pushes them to the `gpu_display`. */
+  void copy_to_gpu_display_naive(GPUDisplay *gpu_display, PassMode pass_mode, int num_samples);

-  /* Implementation of `copy_to_display()` which uses driver's OpenGL/GPU interoperability
+  /* Implementation of `copy_to_gpu_display()` which uses driver's OpenGL/GPU interoperability
   * functionality, avoiding copy of pixels to the host. */
-  bool copy_to_display_interop(PathTraceDisplay *display, PassMode pass_mode, int num_samples);
+  bool copy_to_gpu_display_interop(GPUDisplay *gpu_display, PassMode pass_mode, int num_samples);

  /* Synchronously run film conversion kernel and store display result in the given destination. */
  void get_render_tile_film_pixels(const PassAccessor::Destination &destination,
@@ -139,9 +139,9 @@ class PathTraceWorkGPU : public PathTraceWork {
  /* Temporary buffer for passing work tiles to kernel. */
  device_vector<KernelWorkTile> work_tiles_;

-  /* Temporary buffer used by the copy_to_display() whenever graphics interoperability is not
+  /* Temporary buffer used by the copy_to_gpu_display() whenever graphics interoperability is not
   * available. Is allocated on-demand. */
-  device_vector<half4> display_rgba_half_;
+  device_vector<half4> gpu_display_rgba_half_;

  unique_ptr<DeviceGraphicsInterop> device_graphics_interop_;

--- a/intern/cycles/integrator/render_scheduler.cpp
+++ b/intern/cycles/integrator/render_scheduler.cpp
@@ -384,7 +384,7 @@ bool RenderScheduler::set_postprocess_render_work(RenderWork *render_work)
  }

  if (denoiser_params_.use && !state_.last_work_tile_was_denoised) {
-    render_work->tile.denoise = !tile_manager_.has_multiple_tiles();
+    render_work->tile.denoise = true;
    any_scheduled = true;
  }

@@ -903,12 +903,6 @@ bool RenderScheduler::work_need_denoise(bool &delayed, bool &ready_to_display)
    return false;
  }

-  /* When multiple tiles are used the full frame will be denoised.
-   * Avoid per-tile denoising to save up render time. */
-  if (tile_manager_.has_multiple_tiles()) {
-    return false;
-  }
-
  if (done()) {
    /* Always denoise at the last sample. */
    return true;
--- a/intern/cycles/integrator/render_scheduler.h
+++ b/intern/cycles/integrator/render_scheduler.h
@@ -31,7 +31,7 @@ class RenderWork {
  int resolution_divider = 1;

  /* Initialize render buffers.
-   * Includes steps like zeroing the buffer on the device, and optional reading of pixels from the
+   * Includes steps like zero-ing the buffer on the device, and optional reading of pixels from the
   * baking target. */
  bool init_render_buffers = false;

@@ -344,7 +344,7 @@ class RenderScheduler {
    /* Number of rendered samples on top of the start sample. */
    int num_rendered_samples = 0;

-    /* Point in time the latest PathTraceDisplay work has been scheduled. */
+    /* Point in time the latest GPUDisplay work has been scheduled. */
    double last_display_update_time = 0.0;
    /* Value of -1 means display was never updated. */
    int last_display_update_sample = -1;
--- a/intern/cycles/integrator/shader_eval.cpp
+++ b/intern/cycles/integrator/shader_eval.cpp
@@ -149,14 +149,14 @@ bool ShaderEval::eval_gpu(Device *device,

  /* Execute work on GPU in chunk, so we can cancel.
   * TODO : query appropriate size from device.*/
-  const int64_t chunk_size = 65536;
+  const int chunk_size = 65536;

-  const int64_t work_size = output.size();
+  const int work_size = output.size();
  void *d_input = (void *)input.device_pointer;
  void *d_output = (void *)output.device_pointer;

-  for (int64_t d_offset = 0; d_offset < work_size; d_offset += chunk_size) {
-    int64_t d_work_size = std::min(chunk_size, work_size - d_offset);
+  for (int d_offset = 0; d_offset < work_size; d_offset += chunk_size) {
+    int d_work_size = min(chunk_size, work_size - d_offset);
    void *args[] = {&d_input, &d_output, &d_offset, &d_work_size};

    queue->enqueue(kernel, d_work_size, args);
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -35,10 +35,6 @@ set(SRC_DEVICE_CUDA
  device/cuda/kernel.cu
 )

-set(SRC_DEVICE_HIP
-  device/hip/kernel.cpp
-)
-
 set(SRC_DEVICE_OPTIX
  device/optix/kernel.cu
  device/optix/kernel_shader_raytrace.cu
@@ -110,12 +106,6 @@ set(SRC_DEVICE_CUDA_HEADERS
  device/cuda/globals.h
 )

-set(SRC_DEVICE_HIP_HEADERS
-  device/hip/compat.h
-  device/hip/config.h
-  device/hip/globals.h
-)
-
 set(SRC_DEVICE_OPTIX_HEADERS
  device/optix/compat.h
  device/optix/globals.h
@@ -468,104 +458,6 @@ if(WITH_CYCLES_CUDA_BINARIES)
  cycles_set_solution_folder(cycles_kernel_cuda)
 endif()

-####################################################### START
-
-# HIP module
-
-if(WITH_CYCLES_HIP_BINARIES)
-  # 64 bit only
-  set(HIP_BITS 64)
-
-  # HIP version
-  execute_process(COMMAND ${HIP_HIPCC_EXECUTABLE} "--version" OUTPUT_VARIABLE HIPCC_OUT)
-  string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" HIP_VERSION_MAJOR "${HIPCC_OUT}")
-  string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" HIP_VERSION_MINOR "${HIPCC_OUT}")
-  set(HIP_VERSION "${HIP_VERSION_MAJOR}${HIP_VERSION_MINOR}")
-
-
-  message(WARNING
-    "HIP version ${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR} detected")
-
-  # build for each arch
-  set(hip_sources device/hip/kernel.cpp
-    ${SRC_HEADERS}
-    ${SRC_DEVICE_HIP_HEADERS}
-    ${SRC_BVH_HEADERS}
-    ${SRC_SVM_HEADERS}
-    ${SRC_GEOM_HEADERS}
-    ${SRC_INTEGRATOR_HEADERS}
-    ${SRC_CLOSURE_HEADERS}
-    ${SRC_UTIL_HEADERS}
-  )
-  set(hip_fatbins)
-
-  macro(CYCLES_HIP_KERNEL_ADD arch prev_arch name flags sources experimental)
-    if(${arch} MATCHES "compute_.*")
-      set(format "ptx")
-    else()
-      set(format "fatbin")
-    endif()
-    set(hip_file ${name}_${arch}.${format})
-
-    set(kernel_sources ${sources})
-    if(NOT ${prev_arch} STREQUAL "none")
-      if(${prev_arch} MATCHES "compute_.*")
-        set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx)
-      else()
-        set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin)
-      endif()
-    endif()
-
-    set(hip_kernel_src "/device/hip/${name}.cpp")
-
-    set(hip_flags ${flags}
-      -D CCL_NAMESPACE_BEGIN=
-      -D CCL_NAMESPACE_END=
-      -D HIPCC
-      -m ${HIP_BITS}
-      -I ${CMAKE_CURRENT_SOURCE_DIR}/..
-      -I ${CMAKE_CURRENT_SOURCE_DIR}/device/hip
-      --use_fast_math
-      -o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
-
-    if(${experimental})
-      set(hip_flags ${hip_flags} -D __KERNEL_EXPERIMENTAL__)
-      set(name ${name}_experimental)
-    endif()
-
-    if(WITH_CYCLES_DEBUG)
-      set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__)
-    endif()
-
-    if(WITH_NANOVDB)
-      set(hip_flags ${hip_flags}
-        -D WITH_NANOVDB
-        -I "${NANOVDB_INCLUDE_DIR}")
-    endif()
-  endmacro()
-
-  set(prev_arch "none")
-  foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
-      set(hip_hipcc_executable ${HIP_HIPCC_EXECUTABLE})
-      set(hip_toolkit_root_dir ${HIP_TOOLKIT_ROOT_DIR})
-    if(DEFINED hip_hipcc_executable AND DEFINED hip_toolkit_root_dir)
-      # Compile regular kernel
-      CYCLES_HIP_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${hip_sources}" FALSE)
-
-      if(WITH_CYCLES_HIP_BUILD_SERIAL)
-        set(prev_arch ${arch})
-      endif()
-
-      unset(hip_hipcc_executable)
-      unset(hip_toolkit_root_dir)
-    endif()
-  endforeach()
-
-  add_custom_target(cycles_kernel_hip ALL DEPENDS ${hip_fatbins})
-  cycles_set_solution_folder(cycles_kernel_hip)
-endif()
-
-####################################################### END
 # OptiX PTX modules

 if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
@@ -710,13 +602,11 @@ endif()
 cycles_add_library(cycles_kernel "${LIB}"
  ${SRC_DEVICE_CPU}
  ${SRC_DEVICE_CUDA}
-  ${SRC_DEVICE_HIP}
  ${SRC_DEVICE_OPTIX}
  ${SRC_HEADERS}
  ${SRC_DEVICE_CPU_HEADERS}
  ${SRC_DEVICE_GPU_HEADERS}
  ${SRC_DEVICE_CUDA_HEADERS}
-  ${SRC_DEVICE_HIP_HEADERS}
  ${SRC_DEVICE_OPTIX_HEADERS}
  ${SRC_BVH_HEADERS}
  ${SRC_CLOSURE_HEADERS}
@@ -731,7 +621,6 @@ source_group("geom" FILES ${SRC_GEOM_HEADERS})
 source_group("integrator" FILES ${SRC_INTEGRATOR_HEADERS})
 source_group("kernel" FILES ${SRC_HEADERS})
 source_group("device\\cpu" FILES ${SRC_DEVICE_CPU} ${SRC_DEVICE_CPU_HEADERS})
-source_group("device\\hip" FILES ${SRC_DEVICE_HIP} ${SRC_DEVICE_HIP_HEADERS})
 source_group("device\\gpu" FILES ${SRC_DEVICE_GPU_HEADERS})
 source_group("device\\cuda" FILES ${SRC_DEVICE_CUDA} ${SRC_DEVICE_CUDA_HEADERS})
 source_group("device\\optix" FILES ${SRC_DEVICE_OPTIX} ${SRC_DEVICE_OPTIX_HEADERS})
@@ -743,19 +632,14 @@ endif()
 if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
  add_dependencies(cycles_kernel cycles_kernel_optix)
 endif()
-if(WITH_CYCLES_HIP)
-  add_dependencies(cycles_kernel cycles_kernel_hip)
-endif()

 # Install kernel source for runtime compilation

 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_CUDA}" ${CYCLES_INSTALL_PATH}/source/kernel/device/cuda)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_HIP}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_GPU_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/gpu)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_CUDA_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/cuda)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/bvh)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/closure)
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -167,25 +167,15 @@ ccl_device_intersect bool scene_intersect(const KernelGlobals *kg,
  uint p4 = visibility;
  uint p5 = PRIMITIVE_NONE;

-  uint ray_mask = visibility & 0xFF;
-  uint ray_flags = OPTIX_RAY_FLAG_NONE;
-  if (0 == ray_mask && (visibility & ~0xFF) != 0) {
-    ray_mask = 0xFF;
-    ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT;
-  }
-  else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
-    ray_flags = OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT;
-  }
-
  optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
             ray->P,
             ray->D,
             0.0f,
             ray->t,
             ray->time,
-             ray_mask,
-             ray_flags,
-             0, /* SBT offset for PG_HITD */
+             0xF,
+             OPTIX_RAY_FLAG_NONE,
+             0,  // SBT offset for PG_HITD
             0,
             0,
             p0,
@@ -261,11 +251,11 @@ ccl_device_intersect bool scene_intersect_local(const KernelGlobals *kg,
  uint p2 = ((uint64_t)local_isect) & 0xFFFFFFFF;
  uint p3 = (((uint64_t)local_isect) >> 32) & 0xFFFFFFFF;
  uint p4 = local_object;
-  /* Is set to zero on miss or if ray is aborted, so can be used as return value. */
+  // Is set to zero on miss or if ray is aborted, so can be used as return value
  uint p5 = max_hits;

  if (local_isect) {
-    local_isect->num_hits = 0; /* Initialize hit count to zero. */
+    local_isect->num_hits = 0;  // Initialize hit count to zero
  }
  optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
             ray->P,
@@ -273,10 +263,11 @@ ccl_device_intersect bool scene_intersect_local(const KernelGlobals *kg,
             0.0f,
             ray->t,
             ray->time,
-             0xFF,
-             /* Need to always call into __anyhit__kernel_optix_local_hit. */
+             // Skip curves
+             0x3,
+             // Need to always call into __anyhit__kernel_optix_local_hit
             OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
-             2, /* SBT offset for PG_HITL */
+             2,  // SBT offset for PG_HITL
             0,
             0,
             p0,
@@ -374,22 +365,17 @@ ccl_device_intersect bool scene_intersect_shadow_all(const KernelGlobals *kg,
  uint p4 = visibility;
  uint p5 = false;

-  uint ray_mask = visibility & 0xFF;
-  if (0 == ray_mask && (visibility & ~0xFF) != 0) {
-    ray_mask = 0xFF;
-  }
-
-  *num_hits = 0; /* Initialize hit count to zero. */
+  *num_hits = 0;  // Initialize hit count to zero
  optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
             ray->P,
             ray->D,
             0.0f,
             ray->t,
             ray->time,
-             ray_mask,
-             /* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */
+             0xF,
+             // Need to always call into __anyhit__kernel_optix_shadow_all_hit
             OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
-             1, /* SBT offset for PG_HITS */
+             1,  // SBT offset for PG_HITS
             0,
             0,
             p0,
@@ -458,21 +444,16 @@ ccl_device_intersect bool scene_intersect_volume(const KernelGlobals *kg,
  uint p4 = visibility;
  uint p5 = PRIMITIVE_NONE;

-  uint ray_mask = visibility & 0xFF;
-  if (0 == ray_mask && (visibility & ~0xFF) != 0) {
-    ray_mask = 0xFF;
-  }
-
  optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
             ray->P,
             ray->D,
             0.0f,
             ray->t,
             ray->time,
-             ray_mask,
-             /* Need to always call into __anyhit__kernel_optix_volume_test. */
-             OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
-             3, /* SBT offset for PG_HITV */
+             // Skip everything but volumes
+             0x2,
+             OPTIX_RAY_FLAG_NONE,
+             0,  // SBT offset for PG_HITD
             0,
             0,
             p0,
--- a/intern/cycles/kernel/device/gpu/parallel_active_index.h
+++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h
@@ -21,15 +21,11 @@ CCL_NAMESPACE_BEGIN
 /* Given an array of states, build an array of indices for which the states
 * are active.
 *
- * Shared memory requirement is `sizeof(int) * (number_of_warps + 1)`. */
+ * Shared memory requirement is sizeof(int) * (number_of_warps + 1) */

 #include "util/util_atomic.h"

-#ifdef __HIP__
-#  define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 1024
-#else
-#  define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 512
-#endif
+#define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 512

 template<uint blocksize, typename IsActiveOp>
 __device__ void gpu_parallel_active_index_array(const uint num_states,
--- a/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h
+++ b/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h
@@ -27,11 +27,7 @@ CCL_NAMESPACE_BEGIN

 #include "util/util_atomic.h"

-#ifdef __HIP__
-#  define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 1024
-#else
-#  define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512
-#endif
+#define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512

 template<uint blocksize> __device__ void gpu_parallel_prefix_sum(int *values, const int num_values)
 {
--- a/intern/cycles/kernel/device/gpu/parallel_reduce.h
+++ b/intern/cycles/kernel/device/gpu/parallel_reduce.h
@@ -26,11 +26,7 @@ CCL_NAMESPACE_BEGIN
 * the overall cost of the algorithm while keeping the work complexity O(n) and
 * the step complexity O(log n). (Brent's Theorem optimization) */

-#ifdef __HIP__
-#  define GPU_PARALLEL_SUM_DEFAULT_BLOCK_SIZE 1024
-#else
-#  define GPU_PARALLEL_SUM_DEFAULT_BLOCK_SIZE 512
-#endif
+#define GPU_PARALLEL_SUM_DEFAULT_BLOCK_SIZE 512

 template<uint blocksize, typename InputT, typename OutputT, typename ConvertOp>
 __device__ void gpu_parallel_sum(
--- a/intern/cycles/kernel/device/gpu/parallel_sorted_index.h
+++ b/intern/cycles/kernel/device/gpu/parallel_sorted_index.h
@@ -26,11 +26,7 @@ CCL_NAMESPACE_BEGIN

 #include "util/util_atomic.h"

-#ifdef __HIP__
-#  define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 1024
-#else
-#  define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 512
-#endif
+#define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 512
 #define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY (~0)

 template<uint blocksize, typename GetKeyOp>
--- a/intern/cycles/kernel/device/hip/compat.h
+++ b/intern/cycles/kernel/device/hip/compat.h
@@ -1,121 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#define __KERNEL_GPU__
-#define __KERNEL_HIP__
-#define CCL_NAMESPACE_BEGIN
-#define CCL_NAMESPACE_END
-
-#ifndef ATTR_FALLTHROUGH
-#  define ATTR_FALLTHROUGH
-#endif
-
-#ifdef __HIPCC_RTC__
-typedef unsigned int uint32_t;
-typedef unsigned long long uint64_t;
-#else
-#  include <stdint.h>
-#endif
-
-#ifdef CYCLES_HIPBIN_CC
-#  define FLT_MIN 1.175494350822287507969e-38f
-#  define FLT_MAX 340282346638528859811704183484516925440.0f
-#  define FLT_EPSILON 1.192092896e-07F
-#endif
-
-/* Qualifiers */
-
-#define ccl_device __device__ __inline__
-#define ccl_device_inline __device__ __inline__
-#define ccl_device_forceinline __device__ __forceinline__
-#define ccl_device_noinline __device__ __noinline__
-#define ccl_device_noinline_cpu ccl_device
-#define ccl_global
-#define ccl_static_constant __constant__
-#define ccl_device_constant __constant__ __device__
-#define ccl_constant const
-#define ccl_gpu_shared __shared__
-#define ccl_private
-#define ccl_may_alias
-#define ccl_addr_space
-#define ccl_restrict __restrict__
-#define ccl_loop_no_unroll
-#define ccl_align(n) __align__(n)
-#define ccl_optional_struct_init
-
-#define kernel_assert(cond)
-
-/* Types */
-#ifdef __HIP__
-#  include "hip/hip_fp16.h"
-#  include "hip/hip_runtime.h"
-#endif
-
-#ifdef _MSC_VER
-#  include <immintrin.h>
-#endif
-
-#define ccl_gpu_thread_idx_x (threadIdx.x)
-#define ccl_gpu_block_dim_x (blockDim.x)
-#define ccl_gpu_block_idx_x (blockIdx.x)
-#define ccl_gpu_grid_dim_x (gridDim.x)
-#define ccl_gpu_warp_size (warpSize)
-
-#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
-#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
-
-/* GPU warp synchronization */
-
-#define ccl_gpu_syncthreads() __syncthreads()
-#define ccl_gpu_ballot(predicate) __ballot(predicate)
-#define ccl_gpu_shfl_down_sync(mask, var, detla) __shfl_down(var, detla)
-#define ccl_gpu_popc(x) __popc(x)
-
-/* GPU texture objects */
-typedef hipTextureObject_t ccl_gpu_tex_object;
-
-template<typename T>
-ccl_device_forceinline T ccl_gpu_tex_object_read_2D(const ccl_gpu_tex_object texobj,
-                                                    const float x,
-                                                    const float y)
-{
-  return tex2D<T>(texobj, x, y);
-}
-
-template<typename T>
-ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object texobj,
-                                                    const float x,
-                                                    const float y,
-                                                    const float z)
-{
-  return tex3D<T>(texobj, x, y, z);
-}
-
-/* Use fast math functions */
-
-#define cosf(x) __cosf(((float)(x)))
-#define sinf(x) __sinf(((float)(x)))
-#define powf(x, y) __powf(((float)(x)), ((float)(y)))
-#define tanf(x) __tanf(((float)(x)))
-#define logf(x) __logf(((float)(x)))
-#define expf(x) __expf(((float)(x)))
-
-/* Types */
-
-#include "util/util_half.h"
-#include "util/util_types.h"
--- a/intern/cycles/kernel/device/hip/config.h
+++ b/intern/cycles/kernel/device/hip/config.h
@@ -1,57 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Device data taken from HIP occupancy calculator.
- *
- * Terminology
- * - HIP GPUs have multiple streaming multiprocessors
- * - Each multiprocessor executes multiple thread blocks
- * - Each thread block contains a number of threads, also known as the block size
- * - Multiprocessors have a fixed number of registers, and the amount of registers
- *   used by each threads limits the number of threads per block.
- */
-
-/* Launch Bound Definitions */
-#define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
-#define GPU_MULTIPROCESSOR_MAX_BLOCKS 64
-#define GPU_BLOCK_MAX_THREADS 1024
-#define GPU_THREAD_MAX_REGISTERS 255
-
-#define GPU_KERNEL_BLOCK_NUM_THREADS 1024
-#define GPU_KERNEL_MAX_REGISTERS 64
-
-/* Compute number of threads per block and minimum blocks per multiprocessor
- * given the maximum number of registers per thread. */
-
-#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
-  extern "C" __global__ void __launch_bounds__(block_num_threads, \
-                                               GPU_MULTIPRESSOR_MAX_REGISTERS / \
-                                                   (block_num_threads * thread_num_registers))
-
-/* sanity checks */
-
-#if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
-#  error "Maximum number of threads per block exceeded"
-#endif
-
-#if GPU_MULTIPRESSOR_MAX_REGISTERS / (GPU_KERNEL_BLOCK_NUM_THREADS * GPU_KERNEL_MAX_REGISTERS) > \
-    GPU_MULTIPROCESSOR_MAX_BLOCKS
-#  error "Maximum number of blocks per multiprocessor exceeded"
-#endif
-
-#if GPU_KERNEL_MAX_REGISTERS > GPU_THREAD_MAX_REGISTERS
-#  error "Maximum number of registers per thread exceeded"
-#endif
--- a/intern/cycles/kernel/device/hip/globals.h
+++ b/intern/cycles/kernel/device/hip/globals.h
@@ -1,49 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Constant Globals */
-
-#pragma once
-
-#include "kernel/kernel_profiling.h"
-#include "kernel/kernel_types.h"
-
-#include "kernel/integrator/integrator_state.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Not actually used, just a NULL pointer that gets passed everywhere, which we
- * hope gets optimized out by the compiler. */
-struct KernelGlobals {
-  /* NOTE: Keep the size in sync with SHADOW_STACK_MAX_HITS. */
-  int unused[1];
-};
-
-/* Global scene data and textures */
-__constant__ KernelData __data;
-#define KERNEL_TEX(type, name) __attribute__((used)) const __constant__ __device__ type *name;
-#include "kernel/kernel_textures.h"
-
-/* Integrator state */
-__constant__ IntegratorStateGPU __integrator_state;
-
-/* Abstraction macros */
-#define kernel_data __data
-#define kernel_tex_fetch(t, index) t[(index)]
-#define kernel_tex_array(t) (t)
-#define kernel_integrator_state __integrator_state
-
-CCL_NAMESPACE_END
--- a/intern/cycles/kernel/device/hip/kernel.cpp
+++ b/intern/cycles/kernel/device/hip/kernel.cpp
@@ -1,28 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* HIP kernel entry points */
-
-#ifdef __HIP_DEVICE_COMPILE__
-
-#  include "kernel/device/hip/compat.h"
-#  include "kernel/device/hip/config.h"
-#  include "kernel/device/hip/globals.h"
-
-#  include "kernel/device/gpu/image.h"
-#  include "kernel/device/gpu/kernel.h"
-
-#endif
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -19,7 +19,7 @@
 #include "kernel/device/optix/compat.h"
 #include "kernel/device/optix/globals.h"

-#include "kernel/device/gpu/image.h"  /* Texture lookup uses normal CUDA intrinsics. */
+#include "kernel/device/gpu/image.h"  // Texture lookup uses normal CUDA intrinsics

 #include "kernel/integrator/integrator_state.h"
 #include "kernel/integrator/integrator_state_flow.h"
@@ -44,18 +44,18 @@ template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
 template<bool always = false> ccl_device_forceinline uint get_object_id()
 {
 #ifdef __OBJECT_MOTION__
-  /* Always get the the instance ID from the TLAS.
-   * There might be a motion transform node between TLAS and BLAS which does not have one. */
+  // Always get the the instance ID from the TLAS
+  // There might be a motion transform node between TLAS and BLAS which does not have one
  uint object = optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
 #else
  uint object = optixGetInstanceId();
 #endif
-  /* Choose between always returning object ID or only for instances. */
+  // Choose between always returning object ID or only for instances
  if (always || (object & 1) == 0)
-    /* Can just remove the low bit since instance always contains object ID. */
+    // Can just remove the low bit since instance always contains object ID
    return object >> 1;
  else
-    /* Set to OBJECT_NONE if this is not an instanced object. */
+    // Set to OBJECT_NONE if this is not an instanced object
    return OBJECT_NONE;
 }

@@ -93,30 +93,23 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_volume_st

 extern "C" __global__ void __miss__kernel_optix_miss()
 {
-  /* 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss. */
+  // 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss
  optixSetPayload_0(__float_as_uint(optixGetRayTmax()));
  optixSetPayload_5(PRIMITIVE_NONE);
 }

 extern "C" __global__ void __anyhit__kernel_optix_local_hit()
 {
-#ifdef __HAIR__
-  if (!optixIsTriangleHit()) {
-    /* Ignore curves. */
-    return optixIgnoreIntersection();
-  }
-#endif
-
 #ifdef __BVH_LOCAL__
  const uint object = get_object_id<true>();
  if (object != optixGetPayload_4() /* local_object */) {
-    /* Only intersect with matching object. */
+    // Only intersect with matching object
    return optixIgnoreIntersection();
  }

  const uint max_hits = optixGetPayload_5();
  if (max_hits == 0) {
-    /* Special case for when no hit information is requested, just report that something was hit */
+    // Special case for when no hit information is requested, just report that something was hit
    optixSetPayload_5(true);
    return optixTerminateRay();
  }
@@ -143,9 +136,8 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
  }
  else {
    if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
-      /* Record closest intersection only.
-       * Do not terminate ray here, since there is no guarantee about distance ordering in any-hit.
-       */
+      // Record closest intersection only
+      // Do not terminate ray here, since there is no guarantee about distance ordering in any-hit
      return optixIgnoreIntersection();
    }

@@ -162,14 +154,14 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
  isect->u = 1.0f - barycentrics.y - barycentrics.x;
  isect->v = barycentrics.x;

-  /* Record geometric normal. */
+  // Record geometric normal
  const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
  const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0));
  const float3 tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1));
  const float3 tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2));
  local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));

-  /* Continue tracing (without this the trace call would return after the first hit). */
+  // Continue tracing (without this the trace call would return after the first hit)
  optixIgnoreIntersection();
 #endif
 }
@@ -198,7 +190,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
    u = __uint_as_float(optixGetAttribute_0());
    v = __uint_as_float(optixGetAttribute_1());

-    /* Filter out curve endcaps. */
+    // Filter out curve endcaps
    if (u == 0.0f || u == 1.0f) {
      ignore_intersection = true;
    }
@@ -249,10 +241,10 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
    isect->type = kernel_tex_fetch(__prim_type, prim);

 #  ifdef __TRANSPARENT_SHADOWS__
-    /* Detect if this surface has a shader with transparent shadows. */
+    // Detect if this surface has a shader with transparent shadows
    if (!shader_transparent_shadow(NULL, isect) || max_hits == 0) {
 #  endif
-      /* If no transparent shadows, all light is blocked and we can stop immediately. */
+      // If no transparent shadows, all light is blocked and we can stop immediately
      optixSetPayload_5(true);
      return optixTerminateRay();
 #  ifdef __TRANSPARENT_SHADOWS__
@@ -260,39 +252,24 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
 #  endif
  }

-  /* Continue tracing. */
+  // Continue tracing
  optixIgnoreIntersection();
 #endif
 }

-extern "C" __global__ void __anyhit__kernel_optix_volume_test()
+extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
 {
-#ifdef __HAIR__
-  if (!optixIsTriangleHit()) {
-    /* Ignore curves. */
-    return optixIgnoreIntersection();
-  }
-#endif
-
+  uint visibility = optixGetPayload_4();
 #ifdef __VISIBILITY_FLAG__
  const uint prim = optixGetPrimitiveIndex();
-  const uint visibility = optixGetPayload_4();
  if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
    return optixIgnoreIntersection();
  }
 #endif

-  const uint object = get_object_id<true>();
-  if ((kernel_tex_fetch(__object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
-    return optixIgnoreIntersection();
-  }
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
-{
 #ifdef __HAIR__
  if (!optixIsTriangleHit()) {
-    /* Filter out curve endcaps. */
+    // Filter out curve endcaps
    const float u = __uint_as_float(optixGetAttribute_0());
    if (u == 0.0f || u == 1.0f) {
      return optixIgnoreIntersection();
@@ -300,26 +277,18 @@ extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
  }
 #endif

-#ifdef __VISIBILITY_FLAG__
-  const uint prim = optixGetPrimitiveIndex();
-  const uint visibility = optixGetPayload_4();
-  if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
-    return optixIgnoreIntersection();
-  }
-
-  /* Shadow ray early termination. */
+  // Shadow ray early termination
  if (visibility & PATH_RAY_SHADOW_OPAQUE) {
    return optixTerminateRay();
  }
-#endif
 }

 extern "C" __global__ void __closesthit__kernel_optix_hit()
 {
-  optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */
+  optixSetPayload_0(__float_as_uint(optixGetRayTmax()));  // Intersection distance
  optixSetPayload_3(optixGetPrimitiveIndex());
  optixSetPayload_4(get_object_id());
-  /* Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index. */
+  // Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index
  optixSetPayload_5(kernel_tex_fetch(__prim_type, optixGetPrimitiveIndex()));

  if (optixIsTriangleHit()) {
@@ -328,7 +297,7 @@ extern "C" __global__ void __closesthit__kernel_optix_hit()
    optixSetPayload_2(__float_as_uint(barycentrics.x));
  }
  else {
-    optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */
+    optixSetPayload_1(optixGetAttribute_0());  // Same as 'optixGetCurveParameter()'
    optixSetPayload_2(optixGetAttribute_1());
  }
 }
@@ -342,7 +311,7 @@ ccl_device_inline void optix_intersection_curve(const uint prim, const uint type
  float3 P = optixGetObjectRayOrigin();
  float3 dir = optixGetObjectRayDirection();

-  /* The direction is not normalized by default, but the curve intersection routine expects that */
+  // The direction is not normalized by default, but the curve intersection routine expects that
  float len;
  dir = normalize_len(dir, &len);

@@ -354,15 +323,15 @@ ccl_device_inline void optix_intersection_curve(const uint prim, const uint type

  Intersection isect;
  isect.t = optixGetRayTmax();
-  /* Transform maximum distance into object space. */
+  // Transform maximum distance into object space
  if (isect.t != FLT_MAX)
    isect.t *= len;

  if (curve_intersect(NULL, &isect, P, dir, isect.t, visibility, object, prim, time, type)) {
    optixReportIntersection(isect.t / len,
                            type & PRIMITIVE_ALL,
-                            __float_as_int(isect.u),  /* Attribute_0 */
-                            __float_as_int(isect.v)); /* Attribute_1 */
+                            __float_as_int(isect.u),   // Attribute_0
+                            __float_as_int(isect.v));  // Attribute_1
  }
 }

--- a/intern/cycles/kernel/geom/geom_curve_intersect.h
+++ b/intern/cycles/kernel/geom/geom_curve_intersect.h
@@ -713,7 +713,7 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg,

    P = transform_point(&tfm, P);
    D = transform_direction(&tfm, D * t);
-    D = safe_normalize_len(D, &t);
+    D = normalize_len(D, &t);
  }

  int prim = kernel_tex_fetch(__prim_index, isect_prim);
@@ -764,10 +764,8 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg,
    /* Thick curves, compute normal using direction from inside the curve.
     * This could be optimized by recording the normal in the intersection,
     * however for Optix this would go beyond the size of the payload. */
-    /* NOTE: It is possible that P will be the same as P_inside (precision issues, or very small
-     * radius). In this case use the view direction to approximate the normal. */
    const float3 P_inside = float4_to_float3(catmull_rom_basis_eval(P_curve, sd->u));
-    const float3 Ng = (!isequal_float3(P, P_inside)) ? normalize(P - P_inside) : -sd->I;
+    const float3 Ng = normalize(P - P_inside);

    sd->N = Ng;
    sd->Ng = Ng;
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -41,18 +41,7 @@ ccl_device_inline int find_attribute_motion(const KernelGlobals *kg,
  uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);

  while (attr_map.x != id) {
-    if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
-      if (UNLIKELY(attr_map.y == 0)) {
-        return (int)ATTR_STD_NOT_FOUND;
-      }
-      else {
-        /* Chain jump to a different part of the table. */
-        attr_offset = attr_map.z;
-      }
-    }
-    else {
-      attr_offset += ATTR_PRIM_TYPES;
-    }
+    attr_offset += ATTR_PRIM_TYPES;
    attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
  }

--- a/intern/cycles/kernel/integrator/integrator_init_from_bake.h
+++ b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
@@ -112,6 +112,8 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
  float3 P, Ng;
  int shader;
  triangle_point_normal(kg, kernel_data.bake.object_index, prim, u, v, &P, &Ng, &shader);
+  shader &= SHADER_MASK;
+
  if (kernel_data.film.pass_background != PASS_UNUSED) {
    /* Environment baking. */

@@ -130,13 +132,11 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
  }
  else {
    /* Surface baking. */
-    const float3 N = (shader & SHADER_SMOOTH_NORMAL) ? triangle_smooth_normal(kg, Ng, prim, u, v) :
-                                                       Ng;

    /* Setup ray. */
    Ray ray ccl_optional_struct_init;
-    ray.P = P + N;
-    ray.D = -N;
+    ray.P = P + Ng;
+    ray.D = -Ng;
    ray.t = FLT_MAX;
    ray.time = 0.5f;

@@ -166,13 +166,12 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
    integrator_state_write_isect(INTEGRATOR_STATE_PASS, &isect);

    /* Setup next kernel to execute. */
-    const int shader_index = shader & SHADER_MASK;
-    const int shader_flags = kernel_tex_fetch(__shaders, shader_index).flags;
+    const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
    if ((shader_flags & SD_HAS_RAYTRACE) || (kernel_data.film.pass_ao != PASS_UNUSED)) {
-      INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
+      INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
    }
    else {
-      INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index);
+      INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
    }
  }

--- a/intern/cycles/kernel/integrator/integrator_shade_surface.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_surface.h
@@ -365,16 +365,19 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
 #ifdef __VOLUME__
  if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
 #endif
-    const int path_flag = INTEGRATOR_STATE(path, flag);

-#ifdef __SUBSURFACE__
-    /* Can skip shader evaluation for BSSRDF exit point without bump mapping. */
-    if (!(path_flag & PATH_RAY_SUBSURFACE) || ((sd.flag & SD_HAS_BSSRDF_BUMP)))
-#endif
    {
-      /* Evaluate shader. */
-      PROFILING_EVENT(PROFILING_SHADE_SURFACE_EVAL);
-      shader_eval_surface<node_feature_mask>(INTEGRATOR_STATE_PASS, &sd, render_buffer, path_flag);
+      const int path_flag = INTEGRATOR_STATE(path, flag);
+#ifdef __SUBSURFACE__
+      /* Can skip shader evaluation for BSSRDF exit point without bump mapping. */
+      if (!(path_flag & PATH_RAY_SUBSURFACE) || ((sd.flag & SD_HAS_BSSRDF_BUMP)))
+#endif
+      {
+        /* Evaluate shader. */
+        PROFILING_EVENT(PROFILING_SHADE_SURFACE_EVAL);
+        shader_eval_surface<node_feature_mask>(
+            INTEGRATOR_STATE_PASS, &sd, render_buffer, path_flag);
+      }
    }

 #ifdef __SUBSURFACE__
@@ -414,20 +417,17 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,

    /* Perform path termination. Most paths have already been terminated in
     * the intersect_closest kernel, this is just for emission and for dividing
-     * throughput by the probability at the right moment.
-     *
-     * Also ensure we don't do it twice for SSS at both the entry and exit point. */
-    if (!(path_flag & PATH_RAY_SUBSURFACE)) {
-      const float probability = (path_flag & PATH_RAY_TERMINATE_ON_NEXT_SURFACE) ?
-                                    0.0f :
-                                    path_state_continuation_probability(INTEGRATOR_STATE_PASS,
-                                                                        path_flag);
-      if (probability == 0.0f) {
-        return false;
-      }
-      else if (probability != 1.0f) {
-        INTEGRATOR_STATE_WRITE(path, throughput) /= probability;
-      }
+     * throughput by the probability at the right moment. */
+    const int path_flag = INTEGRATOR_STATE(path, flag);
+    const float probability = (path_flag & PATH_RAY_TERMINATE_ON_NEXT_SURFACE) ?
+                                  0.0f :
+                                  path_state_continuation_probability(INTEGRATOR_STATE_PASS,
+                                                                      path_flag);
+    if (probability == 0.0f) {
+      return false;
+    }
+    else if (probability != 1.0f) {
+      INTEGRATOR_STATE_WRITE(path, throughput) /= probability;
    }

 #ifdef __DENOISING_FEATURES__
--- a/intern/cycles/kernel/integrator/integrator_shade_volume.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h
@@ -74,7 +74,7 @@ ccl_device_inline bool shadow_volume_shader_sample(INTEGRATOR_STATE_ARGS,
                                                   ShaderData *ccl_restrict sd,
                                                   float3 *ccl_restrict extinction)
 {
-  shader_eval_volume<true>(INTEGRATOR_STATE_PASS, sd, PATH_RAY_SHADOW, [=](const int i) {
+  shader_eval_volume(INTEGRATOR_STATE_PASS, sd, PATH_RAY_SHADOW, [=](const int i) {
    return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i);
  });

@@ -93,7 +93,7 @@ ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS,
                                            VolumeShaderCoefficients *coeff)
 {
  const int path_flag = INTEGRATOR_STATE(path, flag);
-  shader_eval_volume<false>(INTEGRATOR_STATE_PASS, sd, path_flag, [=](const int i) {
+  shader_eval_volume(INTEGRATOR_STATE_PASS, sd, path_flag, [=](const int i) {
    return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
  });

@@ -726,10 +726,6 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
    }
  }

-  if (ls->shader & SHADER_EXCLUDE_SCATTER) {
-    return;
-  }
-
  /* Evaluate light shader.
   *
   * TODO: can we reuse sd memory? In theory we can move this after
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Siddhartha Jejurkar	560e015a5b	Fix and cleanup: Pack to correct/Specified UDIM After recent changes made in master branch, it is now possible to display UDIM grid and tiled images simultaneously in the UV Editor. This commit updates the pack islands to correct/specified UDIM implementation to work with the new behavior. Also includes some code and comment cleanups.	2021-09-26 00:40:51 +05:30
Siddhartha Jejurkar	f87ed1547c	UV grid: Resolve merge conflicts Resolves merge conflicts that broke the subdividing and dynamic grid implementations	2021-09-22 23:08:30 +05:30
Siddhartha Jejurkar	1b9dd08d02	Merge branch 'master' into soc-2021-uv-editor-improvements	2021-09-22 21:07:15 +05:30
Siddhartha Jejurkar	0c2bc843f5	Merge branch 'master' into soc-2021-uv-editor-improvements	2021-08-16 10:34:46 +05:30
Siddhartha Jejurkar	98deceb5c1	Cleanup and fix: Dynamic grid and offset operator * Minor fix for dynamic grid * Cleanup UV offset operator	2021-08-16 10:04:49 +05:30
Siddhartha Jejurkar	da8963af2d	Cleanup and fixes: UV grid types * Code and comment cleanup * Refactor subdividing grid code to allow creating grid of different dimensions different from the previous 4x4 grid * Change subdividing grid dimensions to start from 8x8 * Refactor code used to calculate increment snapping value for the UV editor	2021-08-16 06:44:00 +05:30
Siddhartha Jejurkar	72ee339710	Merge branch 'master' into soc-2021-uv-editor-improvements	2021-08-09 06:31:32 +05:30
Campbell Barton	1c74abfa4b	Cleanup: license headers These were removed globally in `65ec7ec524`. Some files re-introduced these conventions since.	2021-08-06 11:39:49 +05:30
Campbell Barton	f6e594f1ee	Cleanup: tab indentation for CMake / GNUmakefile	2021-08-06 11:39:49 +05:30
Ray molenkamp	8c3128baae	Windows: Add icons and icons_geom to make.bat This adds support for building the icons from make.bat unlike bash there is no passing environment variables on the command line. The scripts go out of their way to locate both blender and inkscape however if they are not found, the user is given a helpful error message telling them how to set the variables. Although some extra help can be given there, if your normal build is a 2019 full build running `make 2019 full icons` will help it find the blender executable as well. finally if you know the name of your build folder running `make builddir build_windows_Lite_x64_vc16_Release icons` will also work, if all fails you can point directly to the blender executable by running `set BLENDER_BIN=c:\where\blender\lives\blender.exe` before running `make icons` or `make icons_geom` The python scripts needed some small modifications since without the PATHEXT, SystemRoot and SystemDrive environment variables python will not initialize properly on windows. (Not blender related, even mainline python won't start without those)	2021-08-06 11:39:48 +05:30
Aaron Carlisle	798214c1f6	PyDoc: Update GPU Example of `draw_view3d` This function was changed in rBc8004ab4078c98c54a70113c12bbb186403e90cf but didnt update the example. Part of T84227	2021-08-06 11:39:48 +05:30
Aaron Carlisle	e981df9fbd	PyDoc: Improve description of texture.evaluate Inspired by the old 2.49 docs: https://docs.blender.org/api/249PythonDoc/Texture.Texture-class.html#evaluate	2021-08-06 11:39:48 +05:30
Aaron Carlisle	112a532f41	Cleanup: make format	2021-08-06 11:39:48 +05:30
Harley Acheson	aa0b05ef70	Win32 IME: Rename SetInputLanguage() GHOST_ImeWin32::SetInputLanguage() has a confusing name because it does not set the input language. It actually retrieves the current input locale from the OS and caches the value of the current input language ID. Therefore this patch renames it to "UpdateInputLanguage" Differential Revision: https://developer.blender.org/D12134 Reviewed by Ray Molenkamp	2021-08-06 11:39:47 +05:30
Harley Acheson	c3e996a118	Win32 IME: Remove ime_status_ This removes one member of GHOST_ImeWin32 that is not used and cannot be used in the future. It is holding the result of ImmIsIME, which is whether an input language supports IME. It does not indicate that one is in use, turned on, composing, in English mode, etc. see D12131 for more information. Differential Revision: https://developer.blender.org/D12131 Reviewed by Ray Molenkamp	2021-08-06 11:39:47 +05:30
Brecht Van Lommel	c7a2086253	Cycles: make object Fast GI Approximation panel a subpanel of Shading	2021-08-06 11:39:47 +05:30
Brecht Van Lommel	0ac3da0e45	Cleanup: rename restrict to hide/visibility in Object, Collection, MaskLayer This makes the internal naming consistent with the public API. And also gives us a visibility_flag rather than restrictflag that can be extended with more flags.	2021-08-06 11:39:46 +05:30
Romain Toumi	a39edbfb19	Fix Cycles material slots list being too short Bring it in line with Eevee. Differential Revision: https://developer.blender.org/D11982	2021-08-06 11:39:45 +05:30
Harley Acheson	e6a68e9511	VSE: Allow Wingdings and Symbol Fonts This patch makes us less restrictive on the allowed types of FreeType font character maps we allow, rather than primarily unicode-only. This allows us to use some legacy, symbol, specialty, and proprietary fonts like Wingdings. Note we were a little less restrictive with vfonts, used for 3D Text Objects, so this patch primarily helps VSE. See D12124 for details and examples. Differential Revision: https://developer.blender.org/D12124 Reviewed by Brecht Van Lommel	2021-08-06 11:39:45 +05:30
Harley Acheson	1ea018eb4d	UI: VFont Display Names When displaying the names of fonts for 3D Text objects, use the same format as shown in File Browser: Family name + Style name. They are currently shown with Postscript Name, which doesn't match well. see D12069 for more details. Differential Revision: https://developer.blender.org/D12069 Reviewed by Campbell Barton	2021-08-06 11:39:45 +05:30
Sergey Sharybin	08697cff10	Cycles: More flexible GI Approximation AO distance control The goal: allow to easily use AO approximation in scenes which combines both small and large scale objects. The idea: use per-object AO distance which will allow to override world settings. Instancer object will "propagate" its AO distance to all its instances unless the instance defines own distance (this allows to modify AO distance in the shot files, without requiring to modify props used in the shots. Available from the new Fats GI Approximation panel in object properties. Differential Revision: https://developer.blender.org/D12112	2021-08-06 11:39:44 +05:30
Bastien Montagne	395056561b	LibOverride RNA API: add removal of properties and operations. This should complete the basics of RNA API for library overrides. Ref. T86656.	2021-08-06 11:39:44 +05:30
Bastien Montagne	6669c559e3	LibOverride: Add RNA API to reset/delete overrides. Ref. T86656.	2021-08-06 11:39:44 +05:30
Jeroen Bakker	cacfdcd041	Added some TODO remarks.	2021-08-06 11:39:43 +05:30
Antonio Vazquez	4362228562	GPencil: New Brush option to define Caps type This is used to set the default caps type for the stroke. Before always was rounded and only could be changed later in Edit mode Two new buttons has been added to topbar. NOTE: New icons are been designed (T90414) The buttons are expanded to list in Properties panel. Reviewed By: mendio, HooglyBoogly Differential Revision: https://developer.blender.org/D11999	2021-08-06 11:39:43 +05:30
Germano Cavalcante	4ea6595af0	Fix T90427: Center View to Mouse broken rBfb87d236edb7 made the values returned by `projmat_dimensions` more standardized following the documentations. But the functions in Blender that called `projmat_dimensions` followed a proposal that these values corresponded to a distance of 1m of clip. Adjust these functions to follow the new algorithm.	2021-08-06 11:39:42 +05:30
Germano Cavalcante	8506edc0b8	BLI: add double version of 'scaleform' No functional changes. New utility.	2021-08-06 11:39:42 +05:30
Campbell Barton	5c4bd878a2	Fix T90421: edit-mode auto-smooth crash when angle set to 180degrees Error in `39b2a7bb7e` which failed to set edge flags with single threaded calculation, used for low poly models.	2021-08-06 11:39:41 +05:30
Gavin Li	ccf4103c1a	Fix transparent faces on certain AMD cards This patch fixes an issue with missing faces when assigning a material slot other than the first to faces on AMD TAHITI cards. Refer to T78390 and T74024 for a description of this issue. This patch also incorporates fix from T78390 for KAVERI. {F9029258} Reviewed By: fclem Differential Revision: https://developer.blender.org/D9305	2021-08-06 11:39:41 +05:30
Campbell Barton	76e4ffdb90	Cleanup: remove *.rej from `cb67bfdba2`	2021-08-06 11:39:41 +05:30
Bastien Montagne	97f82a2224	Tweak to recent liboverride API addition: naming. Rename new API function introduced in recent rB3b0fab6dfaa0 to match our convention to put the action (verb) at the end of names: `operations_update`. Sorry for not catching that during review.	2021-08-06 11:39:40 +05:30
Jacques Lucke	2581039d55	Cleanup: inconsistent parameter name	2021-08-06 11:39:40 +05:30
Jacques Lucke	1e6fef3aa1	Cleanup: initialize variable to quiet warning	2021-08-06 11:39:39 +05:30
Bastien Montagne	446f488685	Outliner/LibOverrides: Fix logic of checks for drag'n'drop of Collections. Previous check was too blunt, preventing e.g. re-organization of collection overrides inside a local parent collection, which is perfectly valid operation. Reported by @hjalti from the studio, thanks!	2021-08-06 11:39:39 +05:30
Julian Eisel	91a2f5583e	Fix compile error without `WITH_OCEANSIM` enabled Was changed in `218df99410`.	2021-08-06 11:39:39 +05:30
Pratik Borhade	aa33073004	Fix T87635: Rename shader node "Specular" to "Specular BSDF" Node name edited in Specular node definition Reviewed By: fclem Maniphest Tasks: T87635 Differential Revision: https://developer.blender.org/D11022	2021-08-06 11:39:38 +05:30
Gottfried Hofmann	bb5373ad7b	Expose Color Management as argument for gpu.types.GPUOffScreen.draw_view3d() Fix for https://developer.blender.org/T84227 The problem was that https://developer.blender.org/rBe0ffb911a22bb03755687f45fc1a996870e059a8 turned color management for offscreen rendering off by default, which makes it non-color-managed in some cases. So the idea here is that script authors get the choice wether they want color managed non-color-managed output. Thus this patch introduces a new argument do_color_management as a bool to gpu.types.GPUOffScreen.draw_view3d(). Reviewed By: jbakker Differential Revision: https://developer.blender.org/D11645	2021-08-06 11:39:38 +05:30
Anthony Edlin	791cb92b96	Make loopcut drawing consistent between gizmo and operator. Loopcut drawing from gizmo had thicker lines because it was using line smoothing without alpha blend, compared to thin jagged lines from operator. Make the drawing anti aliased and consistent by using 3D_POLYLINE/3D_POINT shaders, and making sure alpha blending is on. Reviewed By: #eevee_viewport, fclem Differential Revision: https://developer.blender.org/D11333	2021-08-06 11:39:38 +05:30
Jeroen Bakker	3f0d85de05	Viewport normal drawing with constant length Patch for: T37878 {F10169694} Reviewed By: fclem Differential Revision: https://developer.blender.org/D11487	2021-08-06 11:39:37 +05:30
Jeroen Bakker	506a2f43b6	Modifier: warn if the ocean simulation fails to allocate memory While most modifies don't handle out of memory cases, ocean simulation could attempt huge allocations: 2048 gb at the maximum resolution. Resolves T83952.	2021-08-06 11:39:35 +05:30
Jeroen Bakker	1f13ff614d	Override: API update_operations. The update_operations function will update the override structure of the local object. When working with overrides the override structure is only updated when the work-file is stored. When using scripts you might want to enforce the update of override properties and operations. This function removes a hack on the test cases. Reviewed By: mont29 Maniphest Tasks: T86656 Differential Revision: https://developer.blender.org/D10848	2021-08-06 11:39:35 +05:30
Jeroen Bakker	c6e1b2f015	T90371: Asset: Drop Material Tooltip. This patch changes the drop named material tooltip to give feedback to the user what is going to happen when they invoke the change. There are 3 states: * "": Operator will be canceled as not all data is present (dropping on background.) * "Drop <named material> on <object name> (slot <slot number>, replacing <current material in slot>). * "Drop <named material> on <object name> (slot <slot number). Reviewed By: Severin Maniphest Tasks: T90371 Differential Revision: https://developer.blender.org/D12106	2021-08-06 11:39:34 +05:30
Campbell Barton	790c6740dc	Cleanup: use C comments for descriptive text	2021-08-06 11:39:34 +05:30
Campbell Barton	16cd543e4f	Cleanup: add comment to fix for T90417	2021-08-06 11:39:33 +05:30
Johnny Matthews	f36013a78d	Geometry Nodes: Curve Set Spline Type This node sets the selected (or all) splines in curve to a chosen target spline type. Poly, Bezier, and NURB splines can be converted to any of the other types. This is meant to be a building block node, useful in many procedural situations. In the future the node could be optimized with multi-threading, or by avoiding copying in many cases, either by retrieving the curve for write access or by passing the raw vectors to the new splines where possible. With edits from Hans Goudey (@HooglyBoogly) Differential Revision: https://developer.blender.org/D12013	2021-08-06 11:39:32 +05:30
Campbell Barton	11ef414ba3	Fix T90417: font loading creates duplicate ID names Also repair any errors in existing files. Error from `e0dd3fe587`.	2021-08-06 11:39:31 +05:30
Campbell Barton	39df796ec9	Cleanup: de-duplicate ID renaming utility for versioning	2021-08-06 11:39:30 +05:30
Campbell Barton	e17731fc9c	Icons: add license headers to utilities	2021-08-06 11:39:29 +05:30
Campbell Barton	65c36dc583	Icons: resolve various issues for generating icons - INKSCAPE_BIN environment variable was ignored by alert_icons_update & prvicons_update. - `make icons` wasn't regenerating alert icons. - Updating SVG icons failed using blender built with ASAN.	2021-08-06 11:39:28 +05:30
Campbell Barton	8804c698eb	Icons: update alert icon script Missed from `c549d736cf`.	2021-08-06 11:39:28 +05:30
Siddhartha Jejurkar	d6ddacabc1	Cleanup: Allow early exit if operator cancelled Cancel the pack islands operator early, in case there are no UV selections to pack	2021-08-04 22:48:41 +05:30
Siddhartha Jejurkar	9974edc857	Merge branch 'master' into soc-2021-uv-editor-improvements	2021-08-04 06:13:00 +05:30
Siddhartha Jejurkar	7d5ed35602	Merge branch 'master' into soc-2021-uv-editor-improvements	2021-07-17 21:40:01 +05:30
Siddhartha Jejurkar	cd75125c48	UV: Keymaps for offsetting selected UVs Adds keymaps to offset selected UVs by a fixed distance in a specified direction. Refer T78405	2021-07-17 21:26:51 +05:30
Siddhartha Jejurkar	e1abd5947f	UV: Absolute grid snap for UV editor Adds a UI toggle for absolute grid snap when using Increment snapping in UV editor. This implementation mimics the behavior observed with the same toggle in the 3D viewport.	2021-07-10 17:32:07 +05:30
Siddhartha Jejurkar	8d642bbba6	Merge branch 'master' into soc-2021-uv-editor-improvements	2021-07-08 23:59:20 +05:30
Siddhartha Jejurkar	5777ec9af9	Merge branch 'master' into soc-2021-uv-editor-improvements	2021-07-08 15:48:19 +05:30
Siddhartha Jejurkar	86023928ba	UV: Increment snapping based on new UV grid types Since the default UV editor grid has been replaced with a new subdividing grid and dynamic grid (T78389) has also been implemented, increment snapping value needs to change according to the grid configuration in use. This commit ensures that the increment snapping value is changed according to the grid dimensions currently in use. Example - For a NxN grid the increment value is set as 1/N	2021-07-04 22:51:34 +05:30
Siddhartha Jejurkar	708f375f76	Cleanup: Refactor reusable code into functions New functions for : * Calculating current zoom factor used for determining the grid resolution in UV/Image editor * Calculating grid steps for determining the grid spacings in UV/Image editor	2021-07-04 20:19:58 +05:30
Siddhartha Jejurkar	28c85e60cb	Cleanup: Use struct pointer Replace struct variables to use pointers instead	2021-07-04 15:29:24 +05:30
Siddhartha Jejurkar	d615f4d65e	Fix: Pack islands to area operator * Correct packing area coordinates for cases when user sets the maximum coordinates to be lower than the minimum coordinates * Cancel operator when scale option is disabled and packing area is not big enough for selected islands. Also display warning message in UI when this happens	2021-07-03 14:29:56 +05:30
Siddhartha Jejurkar	eb88ce5146	Merge branch 'master' into soc-2021-uv-editor-improvements	2021-07-01 18:29:10 +05:30
Siddhartha Jejurkar	1f65001cae	Change the max limit for Dynamic grid The max limit for dynamic grid was set at 12. This commit changes that to 5000	2021-07-01 12:02:33 +05:30
Siddhartha Jejurkar	8a57e48a8f	Minor Cleanup * Remove printf statements * Use enums	2021-06-30 21:20:18 +05:30
Siddhartha Jejurkar	be270d8c8a	Merge branch 'master' into soc-2021-uv-editor-improvements	2021-06-30 18:45:04 +05:30
Siddhartha Jejurkar	39aa006260	UV: Replace default grid with subdividing grid Replaces the default static grid with a dynamically subdividing grid. This means that zooming in the UV editor will add more divisions to the grid and vice versa when zooming out.	2021-06-30 11:55:18 +05:30
Siddhartha Jejurkar	1965df11f4	UV: Dynamic Grid Adds the option to replace the default grid in the UV editor with a NxN grid. Refer T78389	2021-06-26 14:02:50 +05:30
Siddhartha Jejurkar	ad5983895a	Merge branch 'master' into soc-2021-uv-editor-improvements	2021-06-25 19:11:12 +05:30
Siddhartha Jejurkar	37e185980a	UV: Display packing area coordinates Adds properties for displaying packing area coordinates in the pack islands to area operator. Also fixes the issue of modified packing coordinates when the user zooms in/out in the editor before redoing the operator from the properties panel in UI.	2021-06-23 21:10:59 +05:30
Siddhartha Jejurkar	7e1e4889c6	Fix: Release memory before cancelling operator	2021-06-23 20:58:07 +05:30
Siddhartha Jejurkar	62d2e8130f	Cleanup : UV pack operators * Remove printf statements * Use float constants instead of double * Cleanup comments * enum to make code better readable	2021-06-23 20:42:04 +05:30
Siddhartha Jejurkar	2b37edebdf	Merge branch 'master' into soc-2021-uv-editor-improvements	2021-06-21 19:10:47 +05:30
Siddhartha Jejurkar	e5ab28d392	UV : Pack islands to box area Adds a new operator to the UV editor - Pack islands to area Allows the users to pack selected UV islands to a specified area in the UV editor Refer T78398	2021-06-19 22:10:34 +05:30
Siddhartha Jejurkar	2771b931b5	Merge branch 'master' into soc-2021-uv-editor-improvements	2021-06-17 14:12:33 +05:30
Siddhartha Jejurkar	f76eca3af2	Merge branch 'master' into soc-2021-uv-editor-improvements	2021-06-14 17:14:18 +05:30
Siddhartha Jejurkar	279a67ecc8	Minor fix : Correct calculation of nearest UDIM Corrects the logic for calculating the distance between selected UVs and UDIM tiles	2021-06-11 20:50:38 +05:30
Siddhartha Jejurkar	1a89001151	UV : Pack islands to correct/specified UDIM Adds 2 features to the pack islands operator * Packing selected UVs to the closest UDIM * Packing selected UVs to user specified UDIM	2021-06-11 19:36:24 +05:30
Siddhartha Jejurkar	2126fc815d	Merge branch 'master' into soc-2021-uv-editor-improvements	2021-06-10 15:45:47 +05:30
Siddhartha Jejurkar	790d11899a	Merge branch 'master' into soc-2021-uv-editor-improvements	2021-06-08 20:58:04 +05:30
Siddhartha Jejurkar	6a422b6624	Test commit. Add missing tooltips for pivot options in UV Editor.	2021-06-08 19:39:20 +05:30