realize non-standard virtual arrays

use threads
add slice method
2021-09-26 15:07:48 +02:00 · 2021-09-26 15:07:27 +02:00 · 2021-09-26 14:52:12 +02:00 · 2021-09-26 14:11:42 +02:00 · 2021-09-26 13:39:27 +02:00 · 2021-09-26 13:23:09 +02:00
778 changed files with 7634 additions and 22784 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -419,8 +419,6 @@ mark_as_advanced(WITH_CYCLES_NATIVE_ONLY)

 option(WITH_CYCLES_DEVICE_CUDA              "Enable Cycles CUDA compute support" ON)
 option(WITH_CYCLES_DEVICE_OPTIX             "Enable Cycles OptiX support" ON)
-option(WITH_CYCLES_DEVICE_HIP               "Enable Cycles HIP support" OFF)
-mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
 mark_as_advanced(WITH_CYCLES_DEVICE_CUDA)

 option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime" ON)
@@ -823,11 +821,6 @@ if(NOT WITH_CUDA_DYNLOAD)
  endif()
 endif()

-if(WITH_CYCLES_DEVICE_HIP)
-  # Currently HIP must be dynamically loaded, this may change in future toolkits
-  set(WITH_HIP_DYNLOAD ON)
-endif()
-
 #-----------------------------------------------------------------------------
 # Check check if submodules are cloned

@@ -1857,9 +1850,6 @@ elseif(WITH_CYCLES_STANDALONE)
  if(WITH_CUDA_DYNLOAD)
    add_subdirectory(extern/cuew)
  endif()
-  if(WITH_HIP_DYNLOAD)
-    add_subdirectory(extern/hipew)
-  endif()
  if(NOT WITH_SYSTEM_GLEW)
    add_subdirectory(extern/glew)
  endif()
--- a/build_files/cmake/cmake_static_check_cppcheck.py
+++ b/build_files/cmake/cmake_static_check_cppcheck.py
@@ -24,7 +24,6 @@ import project_source_info
 import subprocess
 import sys
 import os
-import tempfile

 from typing import (
    Any,
@@ -36,6 +35,7 @@ USE_QUIET = (os.environ.get("QUIET", None) is not None)

 CHECKER_IGNORE_PREFIX = [
    "extern",
+    "intern/moto",
 ]

 CHECKER_BIN = "cppcheck"
@@ -47,19 +47,13 @@ CHECKER_ARGS = [
    "--max-configs=1",  # speeds up execution
    #  "--check-config", # when includes are missing
    "--enable=all",  # if you want sixty hundred pedantic suggestions
-
-    # Quiet output, otherwise all defines/includes are printed (overly verbose).
-    # Only enable this for troubleshooting (if defines are not set as expected for example).
-    "--quiet",
-
-    # NOTE: `--cppcheck-build-dir=<dir>` is added later as a temporary directory.
 ]

 if USE_QUIET:
    CHECKER_ARGS.append("--quiet")


-def cppcheck() -> None:
+def main() -> None:
    source_info = project_source_info.build_info(ignore_prefix_list=CHECKER_IGNORE_PREFIX)
    source_defines = project_source_info.build_defines_as_args()

@@ -84,10 +78,7 @@ def cppcheck() -> None:
            percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"

            sys.stdout.flush()
-            sys.stdout.write("%s %s\n" % (
-                percent_str,
-                os.path.relpath(c, project_source_info.SOURCE_DIR)
-            ))
+            sys.stdout.write("%s " % percent_str)

        return subprocess.Popen(cmd)

@@ -99,11 +90,5 @@ def cppcheck() -> None:
    print("Finished!")


-def main() -> None:
-    with tempfile.TemporaryDirectory() as temp_dir:
-        CHECKER_ARGS.append("--cppcheck-build-dir=" + temp_dir)
-        cppcheck()
-
-
 if __name__ == "__main__":
    main()
--- a/build_files/cmake/project_source_info.py
+++ b/build_files/cmake/project_source_info.py
@@ -243,9 +243,7 @@ def build_defines_as_args() -> List[str]:
 # use this module.
 def queue_processes(
        process_funcs: Sequence[Tuple[Callable[..., subprocess.Popen[Any]], Tuple[Any, ...]]],
-        *,
        job_total: int =-1,
-        sleep: float = 0.1,
 ) -> None:
    """ Takes a list of function arg pairs, each function must return a process
    """
@@ -273,20 +271,14 @@ def queue_processes(

                if len(processes) <= job_total:
                    break
-                time.sleep(sleep)
+                else:
+                    time.sleep(0.1)

            sys.stdout.flush()
            sys.stderr.flush()

            processes.append(func(*args))

-        # Don't return until all jobs have finished.
-        while 1:
-            processes[:] = [p for p in processes if p.poll() is None]
-            if not processes:
-                break
-            time.sleep(sleep)
-

 def main() -> None:
    if not os.path.exists(join(CMAKE_DIR, "CMakeCache.txt")):
--- a/doc/python_api/examples/bpy.types.Bone.convert_local_to_pose.py
+++ b/doc/python_api/examples/bpy.types.Bone.convert_local_to_pose.py
@@ -1,40 +0,0 @@
-"""
-This method enables conversions between Local and Pose space for bones in
-the middle of updating the armature without having to update dependencies
-after each change, by manually carrying updated matrices in a recursive walk.
-"""
-
-def set_pose_matrices(obj, matrix_map):
-    "Assign pose space matrices of all bones at once, ignoring constraints."
-
-    def rec(pbone, parent_matrix):
-        matrix = matrix_map[pbone.name]
-
-        ## Instead of:
-        # pbone.matrix = matrix
-        # bpy.context.view_layer.update()
-
-        # Compute and assign local matrix, using the new parent matrix
-        if pbone.parent:
-            pbone.matrix_basis = pbone.bone.convert_local_to_pose(
-                matrix,
-                pbone.bone.matrix_local,
-                parent_matrix=parent_matrix,
-                parent_matrix_local=pbone.parent.bone.matrix_local,
-                invert=True
-            )
-        else:
-            pbone.matrix_basis = pbone.bone.convert_local_to_pose(
-                matrix,
-                pbone.bone.matrix_local,
-                invert=True
-            )
-
-        # Recursively process children, passing the new matrix through
-        for child in pbone.children:
-            rec(child, matrix)
-
-    # Scan all bone trees from their roots
-    for pbone in obj.pose.bones:
-        if not pbone.parent:
-            rec(pbone, None)
--- a/doc/python_api/sphinx_doc_gen.py
+++ b/doc/python_api/sphinx_doc_gen.py
@@ -1101,7 +1101,6 @@ context_type_map = {
    "scene": ("Scene", False),
    "sculpt_object": ("Object", False),
    "selectable_objects": ("Object", True),
-    "selected_asset_files": ("FileSelectEntry", True),
    "selected_bones": ("EditBone", True),
    "selected_editable_bones": ("EditBone", True),
    "selected_editable_fcurves": ("FCurve", True),
--- a/extern/CMakeLists.txt
+++ b/extern/CMakeLists.txt
@@ -67,12 +67,9 @@ endif()

 if(WITH_CYCLES OR WITH_COMPOSITOR OR WITH_OPENSUBDIV)
  add_subdirectory(clew)
-  if((WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX) AND WITH_CUDA_DYNLOAD)
+  if(WITH_CUDA_DYNLOAD)
    add_subdirectory(cuew)
  endif()
-  if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
-    add_subdirectory(hipew)
-  endif()
 endif()

 if(WITH_GHOST_X11 AND WITH_GHOST_XDND)
--- a/extern/hipew/CMakeLists.txt
+++ b/extern/hipew/CMakeLists.txt
@@ -1,39 +0,0 @@
-# ***** BEGIN GPL LICENSE BLOCK *****
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-#
-# The Original Code is Copyright (C) 2021, Blender Foundation
-# All rights reserved.
-# ***** END GPL LICENSE BLOCK *****
-
-set(INC
-  .
-  include
-)
-
-set(INC_SYS
-
-)
-
-set(SRC
-  src/hipew.c
-
-  include/hipew.h
-)
-
-set(LIB
-)
-
-blender_add_lib(extern_hipew "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
--- a/extern/hipew/include/hipew.h
+++ b/extern/hipew/include/hipew.h
--- a/extern/hipew/src/hipew.c
+++ b/extern/hipew/src/hipew.c
@@ -1,533 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License
- */
-#ifdef _MSC_VER
-#  if _MSC_VER < 1900
-#    define snprintf _snprintf
-#  endif
-#  define popen _popen
-#  define pclose _pclose
-#  define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <hipew.h>
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/stat.h>
-
-#ifdef _WIN32
-#  define WIN32_LEAN_AND_MEAN
-#  define VC_EXTRALEAN
-#  include <windows.h>
-
-/* Utility macros. */
-
-typedef HMODULE DynamicLibrary;
-
-#  define dynamic_library_open(path)         LoadLibraryA(path)
-#  define dynamic_library_close(lib)         FreeLibrary(lib)
-#  define dynamic_library_find(lib, symbol)  GetProcAddress(lib, symbol)
-#else
-#  include <dlfcn.h>
-
-typedef void* DynamicLibrary;
-
-#  define dynamic_library_open(path)         dlopen(path, RTLD_NOW)
-#  define dynamic_library_close(lib)         dlclose(lib)
-#  define dynamic_library_find(lib, symbol)  dlsym(lib, symbol)
-#endif
-
-#define _LIBRARY_FIND_CHECKED(lib, name) \
-        name = (t##name *)dynamic_library_find(lib, #name); \
-        assert(name);
-
-#define _LIBRARY_FIND(lib, name) \
-        name = (t##name *)dynamic_library_find(lib, #name);
-
-#define HIP_LIBRARY_FIND_CHECKED(name) \
-        _LIBRARY_FIND_CHECKED(hip_lib, name)
-#define HIP_LIBRARY_FIND(name) _LIBRARY_FIND(hip_lib, name)
-
-
-static DynamicLibrary hip_lib;
-
-/* Function definitions. */
-thipGetErrorName *hipGetErrorName;
-thipInit *hipInit;
-thipDriverGetVersion *hipDriverGetVersion;
-thipGetDevice *hipGetDevice;
-thipGetDeviceCount *hipGetDeviceCount;
-thipDeviceGetName *hipDeviceGetName;
-thipDeviceGetAttribute *hipDeviceGetAttribute;
-thipDeviceComputeCapability *hipDeviceComputeCapability;
-thipDevicePrimaryCtxRetain *hipDevicePrimaryCtxRetain;
-thipDevicePrimaryCtxRelease *hipDevicePrimaryCtxRelease;
-thipDevicePrimaryCtxSetFlags *hipDevicePrimaryCtxSetFlags;
-thipDevicePrimaryCtxGetState *hipDevicePrimaryCtxGetState;
-thipDevicePrimaryCtxReset *hipDevicePrimaryCtxReset;
-thipCtxCreate *hipCtxCreate;
-thipCtxDestroy *hipCtxDestroy;
-thipCtxPushCurrent *hipCtxPushCurrent;
-thipCtxPopCurrent *hipCtxPopCurrent;
-thipCtxSetCurrent *hipCtxSetCurrent;
-thipCtxGetCurrent *hipCtxGetCurrent;
-thipCtxGetDevice *hipCtxGetDevice;
-thipCtxGetFlags *hipCtxGetFlags;
-thipCtxSynchronize *hipCtxSynchronize;
-thipDeviceSynchronize *hipDeviceSynchronize;
-thipCtxGetCacheConfig *hipCtxGetCacheConfig;
-thipCtxSetCacheConfig *hipCtxSetCacheConfig;
-thipCtxGetSharedMemConfig *hipCtxGetSharedMemConfig;
-thipCtxSetSharedMemConfig *hipCtxSetSharedMemConfig;
-thipCtxGetApiVersion *hipCtxGetApiVersion;
-thipModuleLoad *hipModuleLoad;
-thipModuleLoadData *hipModuleLoadData;
-thipModuleLoadDataEx *hipModuleLoadDataEx;
-thipModuleUnload *hipModuleUnload;
-thipModuleGetFunction *hipModuleGetFunction;
-thipModuleGetGlobal *hipModuleGetGlobal;
-thipModuleGetTexRef *hipModuleGetTexRef;
-thipMemGetInfo *hipMemGetInfo;
-thipMalloc *hipMalloc;
-thipMemAllocPitch *hipMemAllocPitch;
-thipFree *hipFree;
-thipMemGetAddressRange *hipMemGetAddressRange;
-thipHostMalloc *hipHostMalloc;
-thipHostFree *hipHostFree;
-thipHostGetDevicePointer *hipHostGetDevicePointer;
-thipHostGetFlags *hipHostGetFlags;
-thipMallocManaged *hipMallocManaged;
-thipDeviceGetByPCIBusId *hipDeviceGetByPCIBusId;
-thipDeviceGetPCIBusId *hipDeviceGetPCIBusId;
-thipMemcpyPeer *hipMemcpyPeer;
-thipMemcpyHtoD *hipMemcpyHtoD;
-thipMemcpyDtoH *hipMemcpyDtoH;
-thipMemcpyDtoD *hipMemcpyDtoD;
-thipDrvMemcpy2DUnaligned *hipDrvMemcpy2DUnaligned;
-thipMemcpyParam2D *hipMemcpyParam2D;
-thipDrvMemcpy3D *hipDrvMemcpy3D;
-thipMemcpyHtoDAsync *hipMemcpyHtoDAsync;
-thipMemcpyDtoHAsync *hipMemcpyDtoHAsync;
-thipMemcpyParam2DAsync *hipMemcpyParam2DAsync;
-thipDrvMemcpy3DAsync *hipDrvMemcpy3DAsync;
-thipMemsetD8 *hipMemsetD8;
-thipMemsetD16 *hipMemsetD16;
-thipMemsetD32 *hipMemsetD32;
-thipMemsetD8Async *hipMemsetD8Async;
-thipMemsetD16Async *hipMemsetD16Async;
-thipMemsetD32Async *hipMemsetD32Async;
-thipArrayCreate *hipArrayCreate;
-thipArrayDestroy *hipArrayDestroy;
-thipArray3DCreate *hipArray3DCreate;
-thipStreamCreateWithFlags *hipStreamCreateWithFlags;
-thipStreamCreateWithPriority *hipStreamCreateWithPriority;
-thipStreamGetPriority *hipStreamGetPriority;
-thipStreamGetFlags *hipStreamGetFlags;
-thipStreamWaitEvent *hipStreamWaitEvent;
-thipStreamAddCallback *hipStreamAddCallback;
-thipStreamQuery *hipStreamQuery;
-thipStreamSynchronize *hipStreamSynchronize;
-thipStreamDestroy *hipStreamDestroy;
-thipEventCreateWithFlags *hipEventCreateWithFlags;
-thipEventRecord *hipEventRecord;
-thipEventQuery *hipEventQuery;
-thipEventSynchronize *hipEventSynchronize;
-thipEventDestroy *hipEventDestroy;
-thipEventElapsedTime *hipEventElapsedTime;
-thipFuncGetAttribute *hipFuncGetAttribute;
-thipFuncSetCacheConfig *hipFuncSetCacheConfig;
-thipModuleLaunchKernel *hipModuleLaunchKernel;
-thipDrvOccupancyMaxActiveBlocksPerMultiprocessor *hipDrvOccupancyMaxActiveBlocksPerMultiprocessor;
-thipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
-thipModuleOccupancyMaxPotentialBlockSize *hipModuleOccupancyMaxPotentialBlockSize;
-thipTexRefSetArray *hipTexRefSetArray;
-thipTexRefSetAddress *hipTexRefSetAddress;
-thipTexRefSetAddress2D *hipTexRefSetAddress2D;
-thipTexRefSetFormat *hipTexRefSetFormat;
-thipTexRefSetAddressMode *hipTexRefSetAddressMode;
-thipTexRefSetFilterMode *hipTexRefSetFilterMode;
-thipTexRefSetFlags *hipTexRefSetFlags;
-thipTexRefGetAddress *hipTexRefGetAddress;
-thipTexRefGetArray *hipTexRefGetArray;
-thipTexRefGetAddressMode *hipTexRefGetAddressMode;
-thipTexObjectCreate *hipTexObjectCreate;
-thipTexObjectDestroy *hipTexObjectDestroy;
-thipDeviceCanAccessPeer *hipDeviceCanAccessPeer;
-
-thipCtxEnablePeerAccess *hipCtxEnablePeerAccess;
-thipCtxDisablePeerAccess *hipCtxDisablePeerAccess;
-thipDeviceGetP2PAttribute *hipDeviceGetP2PAttribute;
-thipGraphicsUnregisterResource *hipGraphicsUnregisterResource;
-thipGraphicsMapResources *hipGraphicsMapResources;
-thipGraphicsUnmapResources *hipGraphicsUnmapResources;
-thipGraphicsResourceGetMappedPointer *hipGraphicsResourceGetMappedPointer;
-
-thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
-thipGLGetDevices *hipGLGetDevices;
-
-
-
-static DynamicLibrary dynamic_library_open_find(const char **paths) {
-  int i = 0;
-  while (paths[i] != NULL) {
-      DynamicLibrary lib = dynamic_library_open(paths[i]);
-      if (lib != NULL) {
-        return lib;
-      }
-      ++i;
-  }
-  return NULL;
-}
-
-/* Implementation function. */
-static void hipewHipExit(void) {
-  if (hip_lib != NULL) {
-    /*  Ignore errors. */
-    dynamic_library_close(hip_lib);
-    hip_lib = NULL;
-  }
-}
-
-static int hipewHipInit(void) {
-  /* Library paths. */
-#ifdef _WIN32
-  /* Expected in c:/windows/system or similar, no path needed. */
-  const char *hip_paths[] = {"amdhip64.dll", NULL};
-#elif defined(__APPLE__)
-  /* Default installation path. */
-  const char *hip_paths[] = {"", NULL};
-#else
-  const char *hip_paths[] = {"/opt/rocm/hip/lib/libamdhip64.so", NULL};
-#endif
-  static int initialized = 0;
-  static int result = 0;
-  int error, driver_version;
-
-  if (initialized) {
-    return result;
-  }
-
-  initialized = 1;
-
-  error = atexit(hipewHipExit);
-  if (error) {
-    result = HIPEW_ERROR_ATEXIT_FAILED;
-    return result;
-  }
-
-  /* Load library. */
-  hip_lib = dynamic_library_open_find(hip_paths);
-
-  if (hip_lib == NULL) {
-    result = HIPEW_ERROR_OPEN_FAILED;
-    return result;
-  }
-
-  /* Fetch all function pointers. */
-  HIP_LIBRARY_FIND_CHECKED(hipGetErrorName);
-  HIP_LIBRARY_FIND_CHECKED(hipInit);
-  HIP_LIBRARY_FIND_CHECKED(hipDriverGetVersion);
-  HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
-  HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
-  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRetain);
-  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRelease);
-  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxSetFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxGetState);
-  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxReset);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxCreate);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxDestroy);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxPushCurrent);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxPopCurrent);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxSetCurrent);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetCurrent);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetDevice);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxSynchronize);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceSynchronize);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetCacheConfig);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxSetCacheConfig);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetSharedMemConfig);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxSetSharedMemConfig);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetApiVersion);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleLoad);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleLoadData);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleLoadDataEx);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleUnload);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleGetFunction);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleGetGlobal);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleGetTexRef);
-  HIP_LIBRARY_FIND_CHECKED(hipMemGetInfo);
-  HIP_LIBRARY_FIND_CHECKED(hipMalloc);
-  HIP_LIBRARY_FIND_CHECKED(hipMemAllocPitch);
-  HIP_LIBRARY_FIND_CHECKED(hipFree);
-  HIP_LIBRARY_FIND_CHECKED(hipMemGetAddressRange);
-  HIP_LIBRARY_FIND_CHECKED(hipHostMalloc);
-  HIP_LIBRARY_FIND_CHECKED(hipHostFree);
-  HIP_LIBRARY_FIND_CHECKED(hipHostGetDevicePointer);
-  HIP_LIBRARY_FIND_CHECKED(hipHostGetFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipMallocManaged);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetByPCIBusId);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetPCIBusId);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyPeer);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoD);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoH);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoD);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyParam2D);
-  HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3D);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoDAsync);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoHAsync);
-  HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy2DUnaligned);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyParam2DAsync);
-  HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3DAsync);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD8);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD16);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD32);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD8Async);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD16Async);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD32Async);
-  HIP_LIBRARY_FIND_CHECKED(hipArrayCreate);
-  HIP_LIBRARY_FIND_CHECKED(hipArrayDestroy);
-  HIP_LIBRARY_FIND_CHECKED(hipArray3DCreate);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithPriority);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamGetPriority);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamGetFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamWaitEvent);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamAddCallback);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamQuery);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamSynchronize);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamDestroy);
-  HIP_LIBRARY_FIND_CHECKED(hipEventCreateWithFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipEventRecord);
-  HIP_LIBRARY_FIND_CHECKED(hipEventQuery);
-  HIP_LIBRARY_FIND_CHECKED(hipEventSynchronize);
-  HIP_LIBRARY_FIND_CHECKED(hipEventDestroy);
-  HIP_LIBRARY_FIND_CHECKED(hipEventElapsedTime);
-  HIP_LIBRARY_FIND_CHECKED(hipFuncGetAttribute);
-  HIP_LIBRARY_FIND_CHECKED(hipFuncSetCacheConfig);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleLaunchKernel);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleOccupancyMaxPotentialBlockSize);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetArray);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddress);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddress2D);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFormat);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddressMode);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFilterMode);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefGetAddress);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefGetAddressMode);
-  HIP_LIBRARY_FIND_CHECKED(hipTexObjectCreate);
-  HIP_LIBRARY_FIND_CHECKED(hipTexObjectDestroy);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceCanAccessPeer);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxEnablePeerAccess);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxDisablePeerAccess);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetP2PAttribute);
-#ifdef _WIN32
-  HIP_LIBRARY_FIND_CHECKED(hipGraphicsUnregisterResource);
-  HIP_LIBRARY_FIND_CHECKED(hipGraphicsMapResources);
-  HIP_LIBRARY_FIND_CHECKED(hipGraphicsUnmapResources);
-  HIP_LIBRARY_FIND_CHECKED(hipGraphicsResourceGetMappedPointer);
-  HIP_LIBRARY_FIND_CHECKED(hipGraphicsGLRegisterBuffer);
-  HIP_LIBRARY_FIND_CHECKED(hipGLGetDevices);
-#endif
-  result = HIPEW_SUCCESS;
-  return result;
-}
-
-
-
-int hipewInit(hipuint32_t flags) {
-  int result = HIPEW_SUCCESS;
-
-  if (flags & HIPEW_INIT_HIP) {
-    result = hipewHipInit();
-    if (result != HIPEW_SUCCESS) {
-      return result;
-    }
-  }
-
-  return result;
-}
-
-
-const char *hipewErrorString(hipError_t result) {
-  switch (result) {
-    case hipSuccess: return "No errors";
-    case hipErrorInvalidValue: return "Invalid value";
-    case hipErrorOutOfMemory: return "Out of memory";
-    case hipErrorNotInitialized: return "Driver not initialized";
-    case hipErrorDeinitialized: return "Driver deinitialized";
-    case hipErrorProfilerDisabled: return "Profiler disabled";
-    case hipErrorProfilerNotInitialized: return "Profiler not initialized";
-    case hipErrorProfilerAlreadyStarted: return "Profiler already started";
-    case hipErrorProfilerAlreadyStopped: return "Profiler already stopped";
-    case hipErrorNoDevice: return "No HIP-capable device available";
-    case hipErrorInvalidDevice: return "Invalid device";
-    case hipErrorInvalidImage: return "Invalid kernel image";
-    case hipErrorInvalidContext: return "Invalid context";
-    case hipErrorContextAlreadyCurrent: return "Context already current";
-    case hipErrorMapFailed: return "Map failed";
-    case hipErrorUnmapFailed: return "Unmap failed";
-    case hipErrorArrayIsMapped: return "Array is mapped";
-    case hipErrorAlreadyMapped: return "Already mapped";
-    case hipErrorNoBinaryForGpu: return "No binary for GPU";
-    case hipErrorAlreadyAcquired: return "Already acquired";
-    case hipErrorNotMapped: return "Not mapped";
-    case hipErrorNotMappedAsArray: return "Mapped resource not available for access as an array";
-    case hipErrorNotMappedAsPointer: return "Mapped resource not available for access as a pointer";
-    case hipErrorECCNotCorrectable: return "Uncorrectable ECC error detected";
-    case hipErrorUnsupportedLimit: return "hipLimit_t not supported by device";
-    case hipErrorContextAlreadyInUse: return "Context already in use";
-    case hipErrorPeerAccessUnsupported: return "Peer access unsupported";
-    case hipErrorInvalidKernelFile: return "Invalid ptx";
-    case hipErrorInvalidGraphicsContext: return "Invalid graphics context";
-    case hipErrorInvalidSource: return "Invalid source";
-    case hipErrorFileNotFound: return "File not found";
-    case hipErrorSharedObjectSymbolNotFound: return "Link to a shared object failed to resolve";
-    case hipErrorSharedObjectInitFailed: return "Shared object initialization failed";
-    case hipErrorOperatingSystem: return "Operating system";
-    case hipErrorInvalidHandle: return "Invalid handle";
-    case hipErrorNotFound: return "Not found";
-    case hipErrorNotReady: return "HIP not ready";
-    case hipErrorIllegalAddress: return "Illegal address";
-    case hipErrorLaunchOutOfResources: return "Launch exceeded resources";
-    case hipErrorLaunchTimeOut: return "Launch exceeded timeout";
-    case hipErrorPeerAccessAlreadyEnabled: return "Peer access already enabled";
-    case hipErrorPeerAccessNotEnabled: return "Peer access not enabled";
-    case hipErrorSetOnActiveProcess: return "Primary context active";
-    case hipErrorAssert: return "Assert";
-    case hipErrorHostMemoryAlreadyRegistered: return "Host memory already registered";
-    case hipErrorHostMemoryNotRegistered: return "Host memory not registered";
-    case hipErrorLaunchFailure: return "Launch failed";
-    case hipErrorCooperativeLaunchTooLarge: return "Cooperative launch too large";
-    case hipErrorNotSupported: return "Not supported";
-    case hipErrorUnknown: return "Unknown error";
-    default: return "Unknown HIP error value";
-  }
-}
-
-static void path_join(const char *path1,
-                      const char *path2,
-                      int maxlen,
-                      char *result) {
-#if defined(WIN32) || defined(_WIN32)
-  const char separator = '\\';
-#else
-  const char separator = '/';
-#endif
-  int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
-  if (n != -1 && n < maxlen) {
-    result[n] = '\0';
-  }
-  else {
-    result[maxlen - 1] = '\0';
-  }
-}
-
-static int path_exists(const char *path) {
-  struct stat st;
-  if (stat(path, &st)) {
-    return 0;
-  }
-  return 1;
-}
-
-const char *hipewCompilerPath(void) {
-    #ifdef _WIN32
-    const char *hipPath = getenv("HIP_ROCCLR_HOME");
-    const char *windowsCommand = "perl ";
-    const char *executable = "bin/hipcc";
-
-    static char hipcc[65536];
-    static char finalCommand[65536];
-    if(hipPath) {
-      path_join(hipPath, executable, sizeof(hipcc), hipcc);
-      if(path_exists(hipcc)) {
-        snprintf(finalCommand, sizeof(hipcc), "%s %s", windowsCommand, hipcc);
-        return finalCommand;
-      } else {
-        printf("Could not find hipcc. Make sure HIP_ROCCLR_HOME points to the directory holding /bin/hipcc");
-      }
-    }
-    #else
-    const char *hipPath =  "opt/rocm/hip/bin";
-    const char *executable = "hipcc";
-
-    static char hipcc[65536];
-    if(hipPath) {
-      path_join(hipPath, executable, sizeof(hipcc), hipcc);
-      if(path_exists(hipcc)){
-        return hipcc;
-      }
-    }
-    #endif
-
-  {
-#ifdef _WIN32
-    FILE *handle = popen("where hipcc", "r");
-#else
-    FILE *handle = popen("which hipcc", "r");
-#endif
-    if (handle) {
-      char buffer[4096] = {0};
-      int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
-      buffer[len] = '\0';
-      pclose(handle);
-      if (buffer[0]) {
-        return "hipcc";
-      }
-    }
-  }
-
-  return NULL;
-}
-
-int hipewCompilerVersion(void) {
-  const char *path = hipewCompilerPath();
-  const char *marker = "Hip compilation tools, release ";
-  FILE *pipe;
-  int major, minor;
-  char *versionstr;
-  char buf[128];
-  char output[65536] = "\0";
-  char command[65536] = "\0";
-
-  if (path == NULL) {
-    return 0;
-  }
-
-  /* get --version output */
-  strcat(command, "\"");
-  strncat(command, path, sizeof(command) - 1);
-  strncat(command, "\" --version", sizeof(command) - strlen(path) - 1);
-  pipe = popen(command, "r");
-  if (!pipe) {
-    fprintf(stderr, "HIP: failed to run compiler to retrieve version");
-    return 0;
-  }
-
-  while (!feof(pipe)) {
-    if (fgets(buf, sizeof(buf), pipe) != NULL) {
-      strncat(output, buf, sizeof(output) - strlen(output) - 1);
-    }
-  }
-
-  pclose(pipe);
-  return 40;
-}
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -297,7 +297,6 @@ endif()

 if(WITH_CYCLES_STANDALONE)
  set(WITH_CYCLES_DEVICE_CUDA TRUE)
-  set(WITH_CYCLES_DEVICE_HIP TRUE)
 endif()
 # TODO(sergey): Consider removing it, only causes confusion in interface.
 set(WITH_CYCLES_DEVICE_MULTI TRUE)
--- a/intern/cycles/app/CMakeLists.txt
+++ b/intern/cycles/app/CMakeLists.txt
@@ -64,8 +64,6 @@ if(WITH_CYCLES_STANDALONE)
    cycles_standalone.cpp
    cycles_xml.cpp
    cycles_xml.h
-    oiio_output_driver.cpp
-    oiio_output_driver.h
  )
  add_executable(cycles ${SRC} ${INC} ${INC_SYS})
  unset(SRC)
@@ -75,7 +73,7 @@ if(WITH_CYCLES_STANDALONE)

  if(APPLE)
    if(WITH_OPENCOLORIO)
-      set_property(TARGET cycles APPEND_STRING PROPERTY LINK_FLAGS " -framework IOKit -framework Carbon")
+      set_property(TARGET cycles APPEND_STRING PROPERTY LINK_FLAGS " -framework IOKit")
    endif()
    if(WITH_OPENIMAGEDENOISE AND "${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
      # OpenImageDenoise uses BNNS from the Accelerate framework.
--- a/intern/cycles/app/cycles_standalone.cpp
+++ b/intern/cycles/app/cycles_standalone.cpp
@@ -36,9 +36,6 @@
 #include "util/util_unique_ptr.h"
 #include "util/util_version.h"

-#include "app/cycles_xml.h"
-#include "app/oiio_output_driver.h"
-
 #ifdef WITH_CYCLES_STANDALONE_GUI
 #  include "util/util_view.h"
 #endif
@@ -57,7 +54,6 @@ struct Options {
  bool quiet;
  bool show_help, interactive, pause;
  string output_filepath;
-  string output_pass;
 } options;

 static void session_print(const string &str)
@@ -93,6 +89,30 @@ static void session_print_status()
  session_print(status);
 }

+static bool write_render(const uchar *pixels, int w, int h, int channels)
+{
+  string msg = string_printf("Writing image %s", options.output_path.c_str());
+  session_print(msg);
+
+  unique_ptr<ImageOutput> out = unique_ptr<ImageOutput>(ImageOutput::create(options.output_path));
+  if (!out) {
+    return false;
+  }
+
+  ImageSpec spec(w, h, channels, TypeDesc::UINT8);
+  if (!out->open(options.output_path, spec)) {
+    return false;
+  }
+
+  /* conversion for different top/bottom convention */
+  out->write_image(
+      TypeDesc::UINT8, pixels + (h - 1) * w * channels, AutoStride, -w * channels, AutoStride);
+
+  out->close();
+
+  return true;
+}
+
 static BufferParams &session_buffer_params()
 {
  static BufferParams buffer_params;
@@ -127,14 +147,9 @@ static void scene_init()

 static void session_init()
 {
-  options.output_pass = "combined";
+  options.session_params.write_render_cb = write_render;
  options.session = new Session(options.session_params, options.scene_params);

-  if (!options.output_filepath.empty()) {
-    options.session->set_output_driver(make_unique<OIIOOutputDriver>(
-        options.output_filepath, options.output_pass, session_print));
-  }
-
  if (options.session_params.background && !options.quiet)
    options.session->progress.set_update_callback(function_bind(&session_print_status));
 #ifdef WITH_CYCLES_STANDALONE_GUI
@@ -145,11 +160,6 @@ static void session_init()
  /* load scene */
  scene_init();

-  /* add pass for output. */
-  Pass *pass = options.scene->create_node<Pass>();
-  pass->set_name(ustring(options.output_pass.c_str()));
-  pass->set_type(PASS_COMBINED);
-
  options.session->reset(options.session_params, session_buffer_params());
  options.session->start();
 }
--- a/intern/cycles/app/cycles_xml.cpp
+++ b/intern/cycles/app/cycles_xml.cpp
@@ -333,7 +333,6 @@ static void xml_read_shader_graph(XMLReadState &state, Shader *shader, xml_node
      }

      snode = (ShaderNode *)node_type->create(node_type);
-      snode->set_owner(graph);
    }

    xml_read_node(graph_reader, snode, node);
--- a/intern/cycles/app/oiio_output_driver.cpp
+++ b/intern/cycles/app/oiio_output_driver.cpp
@@ -1,71 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "app/oiio_output_driver.h"
-
-CCL_NAMESPACE_BEGIN
-
-OIIOOutputDriver::OIIOOutputDriver(const string_view filepath,
-                                   const string_view pass,
-                                   LogFunction log)
-    : filepath_(filepath), pass_(pass), log_(log)
-{
-}
-
-OIIOOutputDriver::~OIIOOutputDriver()
-{
-}
-
-void OIIOOutputDriver::write_render_tile(const Tile &tile)
-{
-  /* Only write the full buffer, no intermediate tiles. */
-  if (!(tile.size == tile.full_size)) {
-    return;
-  }
-
-  log_(string_printf("Writing image %s", filepath_.c_str()));
-
-  unique_ptr<ImageOutput> image_output(ImageOutput::create(filepath_));
-  if (image_output == nullptr) {
-    log_("Failed to create image file");
-    return;
-  }
-
-  const int width = tile.size.x;
-  const int height = tile.size.y;
-
-  ImageSpec spec(width, height, 4, TypeDesc::FLOAT);
-  if (!image_output->open(filepath_, spec)) {
-    log_("Failed to create image file");
-    return;
-  }
-
-  vector<float> pixels(width * height * 4);
-  if (!tile.get_pass_pixels(pass_, 4, pixels.data())) {
-    log_("Failed to read render pass pixels");
-    return;
-  }
-
-  /* Manipulate offset and stride to convert from bottom-up to top-down convention. */
-  image_output->write_image(TypeDesc::FLOAT,
-                            pixels.data() + (height - 1) * width * 4,
-                            AutoStride,
-                            -width * 4 * sizeof(float),
-                            AutoStride);
-  image_output->close();
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/app/oiio_output_driver.h
+++ b/intern/cycles/app/oiio_output_driver.h
@@ -1,42 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "render/output_driver.h"
-
-#include "util/util_function.h"
-#include "util/util_image.h"
-#include "util/util_string.h"
-#include "util/util_unique_ptr.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class OIIOOutputDriver : public OutputDriver {
- public:
-  typedef function<void(const string &)> LogFunction;
-
-  OIIOOutputDriver(const string_view filepath, const string_view pass, LogFunction log);
-  virtual ~OIIOOutputDriver();
-
-  void write_render_tile(const Tile &tile) override;
-
- protected:
-  string filepath_;
-  string pass_;
-  LogFunction log_;
-};
-
-CCL_NAMESPACE_END
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -31,14 +31,13 @@ set(INC_SYS
 set(SRC
  blender_camera.cpp
  blender_device.cpp
-  blender_display_driver.cpp
  blender_image.cpp
  blender_geometry.cpp
+  blender_gpu_display.cpp
  blender_light.cpp
  blender_mesh.cpp
  blender_object.cpp
  blender_object_cull.cpp
-  blender_output_driver.cpp
  blender_particles.cpp
  blender_curves.cpp
  blender_logging.cpp
@@ -52,11 +51,10 @@ set(SRC

  CCL_api.h
  blender_device.h
-  blender_display_driver.h
+  blender_gpu_display.h
  blender_id_map.h
  blender_image.h
  blender_object_cull.h
-  blender_output_driver.h
  blender_sync.h
  blender_session.h
  blender_texture.h
@@ -97,9 +95,6 @@ set(ADDON_FILES

 add_definitions(${GL_DEFINITIONS})

-if(WITH_CYCLES_DEVICE_HIP)
-  add_definitions(-DWITH_HIP)
-endif()
 if(WITH_MOD_FLUID)
  add_definitions(-DWITH_FLUID)
 endif()
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -28,7 +28,7 @@ def _configure_argument_parser():
                        action='store_true')
    parser.add_argument("--cycles-device",
                        help="Set the device to use for Cycles, overriding user preferences and the scene setting."
-                             "Valid options are 'CPU', 'CUDA', 'OPTIX', or 'HIP'"
+                             "Valid options are 'CPU', 'CUDA' or 'OPTIX'."
                             "Additionally, you can append '+CPU' to any GPU type for hybrid rendering.",
                        default=None)
    return parser
@@ -211,6 +211,7 @@ def list_render_passes(scene, srl):
    if crl.use_pass_shadow_catcher:        yield ("Shadow Catcher",      "RGB",  'COLOR')

    # Debug passes.
+    if crl.pass_debug_render_time:             yield ("Debug Render Time",             "X",   'VALUE')
    if crl.pass_debug_sample_count:            yield ("Debug Sample Count",            "X",   'VALUE')

    # Cryptomatte passes.
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -111,7 +111,6 @@ enum_device_type = (
    ('CPU', "CPU", "CPU", 0),
    ('CUDA', "CUDA", "CUDA", 1),
    ('OPTIX', "OptiX", "OptiX", 3),
-    ("HIP", "HIP", "HIP", 4)
 )

 enum_texture_limit = (
@@ -124,7 +123,7 @@ enum_texture_limit = (
    ('4096', "4096", "Limit texture size to 4096 pixels", 6),
    ('8192', "8192", "Limit texture size to 8192 pixels", 7),
 )
-
+ 
 # NOTE: Identifiers are expected to be an upper case version of identifiers from  `Pass::get_type_enum()`
 enum_view3d_shading_render_pass = (
    ('', "General", ""),
@@ -1197,6 +1196,12 @@ class CyclesCurveRenderSettings(bpy.types.PropertyGroup):

 class CyclesRenderLayerSettings(bpy.types.PropertyGroup):

+    pass_debug_render_time: BoolProperty(
+        name="Debug Render Time",
+        description="Render time in milliseconds per sample and pixel",
+        default=False,
+        update=update_render_passes,
+    )
    pass_debug_sample_count: BoolProperty(
        name="Debug Sample Count",
        description="Number of samples/camera rays per pixel",
@@ -1261,16 +1266,12 @@ class CyclesPreferences(bpy.types.AddonPreferences):

    def get_device_types(self, context):
        import _cycles
-        has_cuda, has_optix, has_hip = _cycles.get_device_types()
-
+        has_cuda, has_optix = _cycles.get_device_types()
        list = [('NONE', "None", "Don't use compute device", 0)]
        if has_cuda:
            list.append(('CUDA', "CUDA", "Use CUDA for GPU acceleration", 1))
        if has_optix:
            list.append(('OPTIX', "OptiX", "Use OptiX for GPU acceleration", 3))
-        if has_hip:
-            list.append(('HIP', "HIP", "Use HIP for GPU acceleration", 4))
-
        return list

    compute_device_type: EnumProperty(
@@ -1295,7 +1296,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):

    def update_device_entries(self, device_list):
        for device in device_list:
-            if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP'}:
+            if not device[1] in {'CUDA', 'OPTIX', 'CPU'}:
                continue
            # Try to find existing Device entry
            entry = self.find_existing_device_entry(device)
@@ -1329,7 +1330,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
            elif entry.type == 'CPU':
                cpu_devices.append(entry)
        # Extend all GPU devices with CPU.
-        if compute_device_type != 'CPU' and compute_device_type != 'HIP':
+        if compute_device_type != 'CPU':
            devices.extend(cpu_devices)
        return devices

@@ -1339,7 +1340,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
        import _cycles
        # Ensure `self.devices` is not re-allocated when the second call to
        # get_devices_for_type is made, freeing items from the first list.
-        for device_type in ('CUDA', 'OPTIX', 'HIP'):
+        for device_type in ('CUDA', 'OPTIX', 'OPENCL'):
            self.update_device_entries(_cycles.available_devices(device_type))

    # Deprecated: use refresh_devices instead.
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -99,11 +99,6 @@ def use_cuda(context):
    return (get_device_type(context) == 'CUDA' and cscene.device == 'GPU')


-def use_hip(context):
-    cscene = context.scene.cycles
-
-    return (get_device_type(context) == 'HIP' and cscene.device == 'GPU')
-
 def use_optix(context):
    cscene = context.scene.cycles

@@ -792,6 +787,7 @@ class CYCLES_RENDER_PT_passes_data(CyclesButtonsPanel, Panel):
        col.prop(view_layer, "use_pass_material_index")

        col = layout.column(heading="Debug", align=True)
+        col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time")
        col.prop(cycles_view_layer, "pass_debug_sample_count", text="Sample Count")

        layout.prop(view_layer, "pass_alpha_threshold")
--- a/intern/cycles/blender/blender_device.cpp
+++ b/intern/cycles/blender/blender_device.cpp
@@ -26,7 +26,6 @@ enum ComputeDevice {
  COMPUTE_DEVICE_CPU = 0,
  COMPUTE_DEVICE_CUDA = 1,
  COMPUTE_DEVICE_OPTIX = 3,
-  COMPUTE_DEVICE_HIP = 4,

  COMPUTE_DEVICE_NUM
 };
@@ -82,9 +81,6 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
      else if (compute_device == COMPUTE_DEVICE_OPTIX) {
        mask |= DEVICE_MASK_OPTIX;
      }
-      else if (compute_device == COMPUTE_DEVICE_HIP) {
-        mask |= DEVICE_MASK_HIP;
-      }
      vector<DeviceInfo> devices = Device::available_devices(mask);

      /* Match device preferences and available devices. */
--- a/intern/cycles/blender/blender_geometry.cpp
+++ b/intern/cycles/blender/blender_geometry.cpp
@@ -80,10 +80,8 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
 {
  /* Test if we can instance or if the object is modified. */
  Geometry::Type geom_type = determine_geom_type(b_ob_info, use_particle_hair);
-  BL::ID b_key_id = (b_ob_info.is_real_object_data() &&
-                     BKE_object_is_modified(b_ob_info.real_object)) ?
-                        b_ob_info.real_object :
-                        b_ob_info.object_data;
+  BL::ID b_key_id = (BKE_object_is_modified(b_ob_info.real_object)) ? b_ob_info.real_object :
+                                                                      b_ob_info.object_data;
  GeometryKey key(b_key_id.ptr.data, geom_type);

  /* Find shader indices. */
--- a/intern/cycles/blender/blender_display_driver.cpp
+++ b/intern/cycles/blender/blender_display_driver.cpp
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

-#include "blender/blender_display_driver.h"
+#include "blender/blender_gpu_display.h"

 #include "device/device.h"
 #include "util/util_logging.h"
@@ -273,17 +273,17 @@ uint BlenderDisplaySpaceShader::get_shader_program()
 }

 /* --------------------------------------------------------------------
- * BlenderDisplayDriver.
+ * BlenderGPUDisplay.
 */

-BlenderDisplayDriver::BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene)
+BlenderGPUDisplay::BlenderGPUDisplay(BL::RenderEngine &b_engine, BL::Scene &b_scene)
    : b_engine_(b_engine), display_shader_(BlenderDisplayShader::create(b_engine, b_scene))
 {
  /* Create context while on the main thread. */
  gl_context_create();
 }

-BlenderDisplayDriver::~BlenderDisplayDriver()
+BlenderGPUDisplay::~BlenderGPUDisplay()
 {
  gl_resources_destroy();
 }
@@ -292,18 +292,19 @@ BlenderDisplayDriver::~BlenderDisplayDriver()
 * Update procedure.
 */

-bool BlenderDisplayDriver::update_begin(const Params &params,
+bool BlenderGPUDisplay::do_update_begin(const GPUDisplayParams &params,
                                        int texture_width,
                                        int texture_height)
 {
-  /* Note that it's the responsibility of BlenderDisplayDriver to ensure updating and drawing
+  /* Note that it's the responsibility of BlenderGPUDisplay to ensure updating and drawing
   * the texture does not happen at the same time. This is achieved indirectly.
   *
   * When enabling the OpenGL context, it uses an internal mutex lock DST.gl_context_lock.
   * This same lock is also held when do_draw() is called, which together ensure mutual
   * exclusion.
   *
-   * This locking is not performed on the Cycles side, because that would cause lock inversion. */
+   * This locking is not performed at the GPU display level, because that would cause lock
+   * inversion. */
  if (!gl_context_enable()) {
    return false;
  }
@@ -360,7 +361,7 @@ bool BlenderDisplayDriver::update_begin(const Params &params,
  return true;
 }

-void BlenderDisplayDriver::update_end()
+void BlenderGPUDisplay::do_update_end()
 {
  gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
  glFlush();
@@ -368,18 +369,54 @@ void BlenderDisplayDriver::update_end()
  gl_context_disable();
 }

+/* --------------------------------------------------------------------
+ * Texture update from CPU buffer.
+ */
+
+void BlenderGPUDisplay::do_copy_pixels_to_texture(
+    const half4 *rgba_pixels, int texture_x, int texture_y, int pixels_width, int pixels_height)
+{
+  /* This call copies pixels to a Pixel Buffer Object (PBO) which is much cheaper from CPU time
+   * point of view than to copy data directly to the OpenGL texture.
+   *
+   * The possible downside of this approach is that it might require a higher peak memory when
+   * doing partial updates of the texture (although, in practice even partial updates might peak
+   * with a full-frame buffer stored on the CPU if the GPU is currently occupied). */
+
+  half4 *mapped_rgba_pixels = map_texture_buffer();
+  if (!mapped_rgba_pixels) {
+    return;
+  }
+
+  if (texture_x == 0 && texture_y == 0 && pixels_width == texture_.width &&
+      pixels_height == texture_.height) {
+    const size_t size_in_bytes = sizeof(half4) * texture_.width * texture_.height;
+    memcpy(mapped_rgba_pixels, rgba_pixels, size_in_bytes);
+  }
+  else {
+    const half4 *rgba_row = rgba_pixels;
+    half4 *mapped_rgba_row = mapped_rgba_pixels + texture_y * texture_.width + texture_x;
+    for (int y = 0; y < pixels_height;
+         ++y, rgba_row += pixels_width, mapped_rgba_row += texture_.width) {
+      memcpy(mapped_rgba_row, rgba_row, sizeof(half4) * pixels_width);
+    }
+  }
+
+  unmap_texture_buffer();
+}
+
 /* --------------------------------------------------------------------
 * Texture buffer mapping.
 */

-half4 *BlenderDisplayDriver::map_texture_buffer()
+half4 *BlenderGPUDisplay::do_map_texture_buffer()
 {
  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);

  half4 *mapped_rgba_pixels = reinterpret_cast<half4 *>(
      glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY));
  if (!mapped_rgba_pixels) {
-    LOG(ERROR) << "Error mapping BlenderDisplayDriver pixel buffer object.";
+    LOG(ERROR) << "Error mapping BlenderGPUDisplay pixel buffer object.";
  }

  if (texture_.need_clear) {
@@ -394,7 +431,7 @@ half4 *BlenderDisplayDriver::map_texture_buffer()
  return mapped_rgba_pixels;
 }

-void BlenderDisplayDriver::unmap_texture_buffer()
+void BlenderGPUDisplay::do_unmap_texture_buffer()
 {
  glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);

@@ -405,9 +442,9 @@ void BlenderDisplayDriver::unmap_texture_buffer()
 * Graphics interoperability.
 */

-BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get()
+DeviceGraphicsInteropDestination BlenderGPUDisplay::do_graphics_interop_get()
 {
-  GraphicsInterop interop_dst;
+  DeviceGraphicsInteropDestination interop_dst;

  interop_dst.buffer_width = texture_.buffer_width;
  interop_dst.buffer_height = texture_.buffer_height;
@@ -419,12 +456,12 @@ BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get
  return interop_dst;
 }

-void BlenderDisplayDriver::graphics_interop_activate()
+void BlenderGPUDisplay::graphics_interop_activate()
 {
  gl_context_enable();
 }

-void BlenderDisplayDriver::graphics_interop_deactivate()
+void BlenderGPUDisplay::graphics_interop_deactivate()
 {
  gl_context_disable();
 }
@@ -433,21 +470,27 @@ void BlenderDisplayDriver::graphics_interop_deactivate()
 * Drawing.
 */

-void BlenderDisplayDriver::clear()
+void BlenderGPUDisplay::clear()
 {
  texture_.need_clear = true;
 }

-void BlenderDisplayDriver::set_zoom(float zoom_x, float zoom_y)
+void BlenderGPUDisplay::set_zoom(float zoom_x, float zoom_y)
 {
  zoom_ = make_float2(zoom_x, zoom_y);
 }

-void BlenderDisplayDriver::draw(const Params &params)
+void BlenderGPUDisplay::do_draw(const GPUDisplayParams &params)
 {
  /* See do_update_begin() for why no locking is required here. */
  const bool transparent = true;  // TODO(sergey): Derive this from Film.

+  if (texture_.need_clear) {
+    /* Texture is requested to be cleared and was not yet cleared.
+     * Do early return which should be equivalent of drawing all-zero texture. */
+    return;
+  }
+
  if (!gl_draw_resources_ensure()) {
    return;
  }
@@ -456,16 +499,6 @@ void BlenderDisplayDriver::draw(const Params &params)
    gl_context_mutex_.lock();
  }

-  if (texture_.need_clear) {
-    /* Texture is requested to be cleared and was not yet cleared.
-     *
-     * Do early return which should be equivalent of drawing all-zero texture.
-     * Watch out for the lock though so that the clear happening during update is properly
-     * synchronized here. */
-    gl_context_mutex_.unlock();
-    return;
-  }
-
  if (gl_upload_sync_) {
    glWaitSync((GLsync)gl_upload_sync_, 0, GL_TIMEOUT_IGNORED);
  }
@@ -547,7 +580,7 @@ void BlenderDisplayDriver::draw(const Params &params)
  }
 }

-void BlenderDisplayDriver::gl_context_create()
+void BlenderGPUDisplay::gl_context_create()
 {
  /* When rendering in viewport there is no render context available via engine.
   * Check whether own context is to be created here.
@@ -576,7 +609,7 @@ void BlenderDisplayDriver::gl_context_create()
  }
 }

-bool BlenderDisplayDriver::gl_context_enable()
+bool BlenderGPUDisplay::gl_context_enable()
 {
  if (use_gl_context_) {
    if (!gl_context_) {
@@ -591,7 +624,7 @@ bool BlenderDisplayDriver::gl_context_enable()
  return true;
 }

-void BlenderDisplayDriver::gl_context_disable()
+void BlenderGPUDisplay::gl_context_disable()
 {
  if (use_gl_context_) {
    if (gl_context_) {
@@ -604,7 +637,7 @@ void BlenderDisplayDriver::gl_context_disable()
  RE_engine_render_context_disable(reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
 }

-void BlenderDisplayDriver::gl_context_dispose()
+void BlenderGPUDisplay::gl_context_dispose()
 {
  if (gl_context_) {
    const bool drw_state = DRW_opengl_context_release();
@@ -616,7 +649,7 @@ void BlenderDisplayDriver::gl_context_dispose()
  }
 }

-bool BlenderDisplayDriver::gl_draw_resources_ensure()
+bool BlenderGPUDisplay::gl_draw_resources_ensure()
 {
  if (!texture_.gl_id) {
    /* If there is no texture allocated, there is nothing to draw. Inform the draw call that it can
@@ -643,7 +676,7 @@ bool BlenderDisplayDriver::gl_draw_resources_ensure()
  return true;
 }

-void BlenderDisplayDriver::gl_resources_destroy()
+void BlenderGPUDisplay::gl_resources_destroy()
 {
  gl_context_enable();

@@ -666,7 +699,7 @@ void BlenderDisplayDriver::gl_resources_destroy()
  gl_context_dispose();
 }

-bool BlenderDisplayDriver::gl_texture_resources_ensure()
+bool BlenderGPUDisplay::gl_texture_resources_ensure()
 {
  if (texture_.creation_attempted) {
    return texture_.is_created;
@@ -703,7 +736,7 @@ bool BlenderDisplayDriver::gl_texture_resources_ensure()
  return true;
 }

-void BlenderDisplayDriver::texture_update_if_needed()
+void BlenderGPUDisplay::texture_update_if_needed()
 {
  if (!texture_.need_update) {
    return;
@@ -717,7 +750,7 @@ void BlenderDisplayDriver::texture_update_if_needed()
  texture_.need_update = false;
 }

-void BlenderDisplayDriver::vertex_buffer_update(const Params &params)
+void BlenderGPUDisplay::vertex_buffer_update(const GPUDisplayParams &params)
 {
  /* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be
   * rendered. */
@@ -730,23 +763,23 @@ void BlenderDisplayDriver::vertex_buffer_update(const Params &params)

  vpointer[0] = 0.0f;
  vpointer[1] = 0.0f;
-  vpointer[2] = params.full_offset.x;
-  vpointer[3] = params.full_offset.y;
+  vpointer[2] = params.offset.x;
+  vpointer[3] = params.offset.y;

  vpointer[4] = 1.0f;
  vpointer[5] = 0.0f;
-  vpointer[6] = (float)params.size.x + params.full_offset.x;
-  vpointer[7] = params.full_offset.y;
+  vpointer[6] = (float)params.size.x + params.offset.x;
+  vpointer[7] = params.offset.y;

  vpointer[8] = 1.0f;
  vpointer[9] = 1.0f;
-  vpointer[10] = (float)params.size.x + params.full_offset.x;
-  vpointer[11] = (float)params.size.y + params.full_offset.y;
+  vpointer[10] = (float)params.size.x + params.offset.x;
+  vpointer[11] = (float)params.size.y + params.offset.y;

  vpointer[12] = 0.0f;
  vpointer[13] = 1.0f;
-  vpointer[14] = params.full_offset.x;
-  vpointer[15] = (float)params.size.y + params.full_offset.y;
+  vpointer[14] = params.offset.x;
+  vpointer[15] = (float)params.size.y + params.offset.y;

  glUnmapBuffer(GL_ARRAY_BUFFER);
 }
--- a/intern/cycles/blender/blender_display_driver.h
+++ b/intern/cycles/blender/blender_display_driver.h
@@ -22,14 +22,12 @@

 #include "RNA_blender_cpp.h"

-#include "render/display_driver.h"
-
-#include "util/util_thread.h"
+#include "render/gpu_display.h"
 #include "util/util_unique_ptr.h"

 CCL_NAMESPACE_BEGIN

-/* Base class of shader used for display driver rendering. */
+/* Base class of shader used for GPU display rendering. */
 class BlenderDisplayShader {
 public:
  static constexpr const char *position_attribute_name = "pos";
@@ -98,11 +96,11 @@ class BlenderDisplaySpaceShader : public BlenderDisplayShader {
  uint shader_program_ = 0;
 };

-/* Display driver implementation which is specific for Blender viewport integration. */
-class BlenderDisplayDriver : public DisplayDriver {
+/* GPU display implementation which is specific for Blender viewport integration. */
+class BlenderGPUDisplay : public GPUDisplay {
 public:
-  BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene);
-  ~BlenderDisplayDriver();
+  BlenderGPUDisplay(BL::RenderEngine &b_engine, BL::Scene &b_scene);
+  ~BlenderGPUDisplay();

  virtual void graphics_interop_activate() override;
  virtual void graphics_interop_deactivate() override;
@@ -112,15 +110,22 @@ class BlenderDisplayDriver : public DisplayDriver {
  void set_zoom(float zoom_x, float zoom_y);

 protected:
-  virtual bool update_begin(const Params &params, int texture_width, int texture_height) override;
-  virtual void update_end() override;
+  virtual bool do_update_begin(const GPUDisplayParams &params,
+                               int texture_width,
+                               int texture_height) override;
+  virtual void do_update_end() override;

-  virtual half4 *map_texture_buffer() override;
-  virtual void unmap_texture_buffer() override;
+  virtual void do_copy_pixels_to_texture(const half4 *rgba_pixels,
+                                         int texture_x,
+                                         int texture_y,
+                                         int pixels_width,
+                                         int pixels_height) override;
+  virtual void do_draw(const GPUDisplayParams &params) override;

-  virtual GraphicsInterop graphics_interop_get() override;
+  virtual half4 *do_map_texture_buffer() override;
+  virtual void do_unmap_texture_buffer() override;

-  virtual void draw(const Params &params) override;
+  virtual DeviceGraphicsInteropDestination do_graphics_interop_get() override;

  /* Helper function which allocates new GPU context. */
  void gl_context_create();
@@ -147,13 +152,13 @@ class BlenderDisplayDriver : public DisplayDriver {
   * This buffer is used to render texture in the viewport.
   *
   * NOTE: The buffer needs to be bound. */
-  void vertex_buffer_update(const Params &params);
+  void vertex_buffer_update(const GPUDisplayParams &params);

  BL::RenderEngine b_engine_;

  /* OpenGL context which is used the render engine doesn't have its own. */
  void *gl_context_ = nullptr;
-  /* The when Blender RenderEngine side context is not available and the DisplayDriver is to create
+  /* The when Blender RenderEngine side context is not available and the GPUDisplay is to create
   * its own context. */
  bool use_gl_context_ = false;
  /* Mutex used to guard the `gl_context_`. */
--- a/intern/cycles/blender/blender_output_driver.cpp
+++ b/intern/cycles/blender/blender_output_driver.cpp
@@ -1,127 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "blender/blender_output_driver.h"
-
-CCL_NAMESPACE_BEGIN
-
-BlenderOutputDriver::BlenderOutputDriver(BL::RenderEngine &b_engine) : b_engine_(b_engine)
-{
-}
-
-BlenderOutputDriver::~BlenderOutputDriver()
-{
-}
-
-bool BlenderOutputDriver::read_render_tile(const Tile &tile)
-{
-  /* Get render result. */
-  BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
-                                                 tile.offset.y,
-                                                 tile.size.x,
-                                                 tile.size.y,
-                                                 tile.layer.c_str(),
-                                                 tile.view.c_str());
-
-  /* Can happen if the intersected rectangle gives 0 width or height. */
-  if (b_rr.ptr.data == NULL) {
-    return false;
-  }
-
-  BL::RenderResult::layers_iterator b_single_rlay;
-  b_rr.layers.begin(b_single_rlay);
-
-  /* layer will be missing if it was disabled in the UI */
-  if (b_single_rlay == b_rr.layers.end()) {
-    return false;
-  }
-
-  BL::RenderLayer b_rlay = *b_single_rlay;
-
-  vector<float> pixels(tile.size.x * tile.size.y * 4);
-
-  /* Copy each pass.
-   * TODO:copy only the required ones for better performance? */
-  for (BL::RenderPass &b_pass : b_rlay.passes) {
-    tile.set_pass_pixels(b_pass.name(), b_pass.channels(), (float *)b_pass.rect());
-  }
-
-  b_engine_.end_result(b_rr, false, false, false);
-
-  return true;
-}
-
-bool BlenderOutputDriver::update_render_tile(const Tile &tile)
-{
-  /* Use final write for preview renders, otherwise render result wouldn't be be updated
-   * quickly on Blender side. For all other cases we use the display driver. */
-  if (b_engine_.is_preview()) {
-    write_render_tile(tile);
-    return true;
-  }
-  else {
-    /* Don't highlight full-frame tile. */
-    if (!(tile.size == tile.full_size)) {
-      b_engine_.tile_highlight_clear_all();
-      b_engine_.tile_highlight_set(tile.offset.x, tile.offset.y, tile.size.x, tile.size.y, true);
-    }
-
-    return false;
-  }
-}
-
-void BlenderOutputDriver::write_render_tile(const Tile &tile)
-{
-  b_engine_.tile_highlight_clear_all();
-
-  /* Get render result. */
-  BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
-                                                 tile.offset.y,
-                                                 tile.size.x,
-                                                 tile.size.y,
-                                                 tile.layer.c_str(),
-                                                 tile.view.c_str());
-
-  /* Can happen if the intersected rectangle gives 0 width or height. */
-  if (b_rr.ptr.data == NULL) {
-    return;
-  }
-
-  BL::RenderResult::layers_iterator b_single_rlay;
-  b_rr.layers.begin(b_single_rlay);
-
-  /* Layer will be missing if it was disabled in the UI. */
-  if (b_single_rlay == b_rr.layers.end()) {
-    return;
-  }
-
-  BL::RenderLayer b_rlay = *b_single_rlay;
-
-  vector<float> pixels(tile.size.x * tile.size.y * 4);
-
-  /* Copy each pass. */
-  for (BL::RenderPass &b_pass : b_rlay.passes) {
-    if (!tile.get_pass_pixels(b_pass.name(), b_pass.channels(), &pixels[0])) {
-      memset(&pixels[0], 0, pixels.size() * sizeof(float));
-    }
-
-    b_pass.rect(&pixels[0]);
-  }
-
-  b_engine_.end_result(b_rr, true, false, true);
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/blender/blender_output_driver.h
+++ b/intern/cycles/blender/blender_output_driver.h
@@ -1,40 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "MEM_guardedalloc.h"
-
-#include "RNA_blender_cpp.h"
-
-#include "render/output_driver.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BlenderOutputDriver : public OutputDriver {
- public:
-  BlenderOutputDriver(BL::RenderEngine &b_engine);
-  ~BlenderOutputDriver();
-
-  virtual void write_render_tile(const Tile &tile) override;
-  virtual bool update_render_tile(const Tile &tile) override;
-  virtual bool read_render_tile(const Tile &tile) override;
-
- protected:
-  BL::RenderEngine b_engine_;
-};
-
-CCL_NAMESPACE_END
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -911,16 +911,14 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
 static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
 {
  vector<DeviceType> device_types = Device::available_types();
-  bool has_cuda = false, has_optix = false, has_hip = false;
+  bool has_cuda = false, has_optix = false;
  foreach (DeviceType device_type, device_types) {
    has_cuda |= (device_type == DEVICE_CUDA);
    has_optix |= (device_type == DEVICE_OPTIX);
-    has_hip |= (device_type == DEVICE_HIP);
  }
-  PyObject *list = PyTuple_New(3);
+  PyObject *list = PyTuple_New(2);
  PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
  PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
-  PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
  return list;
 }

@@ -946,9 +944,6 @@ static PyObject *set_device_override_func(PyObject * /*self*/, PyObject *arg)
  else if (override == "OPTIX") {
    BlenderSession::device_override = DEVICE_MASK_OPTIX;
  }
-  else if (override == "HIP") {
-    BlenderSession::device_override = DEVICE_MASK_HIP;
-  }
  else {
    printf("\nError: %s is not a valid Cycles device.\n", override.c_str());
    Py_RETURN_FALSE;
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -42,8 +42,7 @@
 #include "util/util_progress.h"
 #include "util/util_time.h"

-#include "blender/blender_display_driver.h"
-#include "blender/blender_output_driver.h"
+#include "blender/blender_gpu_display.h"
 #include "blender/blender_session.h"
 #include "blender/blender_sync.h"
 #include "blender/blender_util.h"
@@ -72,8 +71,7 @@ BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
      width(0),
      height(0),
      preview_osl(preview_osl),
-      python_thread_state(NULL),
-      use_developer_ui(false)
+      python_thread_state(NULL)
 {
  /* offline render */
  background = true;
@@ -158,13 +156,11 @@ void BlenderSession::create_session()
      b_v3d, b_rv3d, scene->camera, width, height);
  session->reset(session_params, buffer_params);

-  /* Create GPU display.
-   * TODO(sergey): Investigate whether DisplayDriver can be used for the preview as well. */
+  /* Create GPU display. */
  if (!b_engine.is_preview() && !headless) {
-    unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(b_engine,
-                                                                                        b_scene);
-    display_driver_ = display_driver.get();
-    session->set_display_driver(move(display_driver));
+    unique_ptr<BlenderGPUDisplay> gpu_display = make_unique<BlenderGPUDisplay>(b_engine, b_scene);
+    gpu_display_ = gpu_display.get();
+    session->set_gpu_display(move(gpu_display));
  }

  /* Viewport and preview (as in, material preview) does not do tiled rendering, so can inform
@@ -281,6 +277,96 @@ void BlenderSession::free_session()
  session = nullptr;
 }

+void BlenderSession::read_render_tile()
+{
+  const int2 tile_offset = session->get_render_tile_offset();
+  const int2 tile_size = session->get_render_tile_size();
+
+  /* get render result */
+  BL::RenderResult b_rr = b_engine.begin_result(tile_offset.x,
+                                                tile_offset.y,
+                                                tile_size.x,
+                                                tile_size.y,
+                                                b_rlay_name.c_str(),
+                                                b_rview_name.c_str());
+
+  /* can happen if the intersected rectangle gives 0 width or height */
+  if (b_rr.ptr.data == NULL) {
+    return;
+  }
+
+  BL::RenderResult::layers_iterator b_single_rlay;
+  b_rr.layers.begin(b_single_rlay);
+
+  /* layer will be missing if it was disabled in the UI */
+  if (b_single_rlay == b_rr.layers.end())
+    return;
+
+  BL::RenderLayer b_rlay = *b_single_rlay;
+
+  vector<float> pixels(tile_size.x * tile_size.y * 4);
+
+  /* Copy each pass.
+   * TODO:copy only the required ones for better performance? */
+  for (BL::RenderPass &b_pass : b_rlay.passes) {
+    session->set_render_tile_pixels(b_pass.name(), b_pass.channels(), (float *)b_pass.rect());
+  }
+
+  b_engine.end_result(b_rr, false, false, false);
+}
+
+void BlenderSession::write_render_tile()
+{
+  const int2 tile_offset = session->get_render_tile_offset();
+  const int2 tile_size = session->get_render_tile_size();
+
+  const string_view render_layer_name = session->get_render_tile_layer();
+  const string_view render_view_name = session->get_render_tile_view();
+
+  b_engine.tile_highlight_clear_all();
+
+  /* get render result */
+  BL::RenderResult b_rr = b_engine.begin_result(tile_offset.x,
+                                                tile_offset.y,
+                                                tile_size.x,
+                                                tile_size.y,
+                                                render_layer_name.c_str(),
+                                                render_view_name.c_str());
+
+  /* can happen if the intersected rectangle gives 0 width or height */
+  if (b_rr.ptr.data == NULL) {
+    return;
+  }
+
+  BL::RenderResult::layers_iterator b_single_rlay;
+  b_rr.layers.begin(b_single_rlay);
+
+  /* layer will be missing if it was disabled in the UI */
+  if (b_single_rlay == b_rr.layers.end()) {
+    return;
+  }
+
+  BL::RenderLayer b_rlay = *b_single_rlay;
+
+  write_render_result(b_rlay);
+
+  b_engine.end_result(b_rr, true, false, true);
+}
+
+void BlenderSession::update_render_tile()
+{
+  if (!session->has_multiple_render_tiles()) {
+    /* Don't highlight full-frame tile. */
+    return;
+  }
+
+  const int2 tile_offset = session->get_render_tile_offset();
+  const int2 tile_size = session->get_render_tile_size();
+
+  b_engine.tile_highlight_clear_all();
+  b_engine.tile_highlight_set(tile_offset.x, tile_offset.y, tile_size.x, tile_size.y, true);
+}
+
 void BlenderSession::full_buffer_written(string_view filename)
 {
  full_buffer_files_.emplace_back(filename);
@@ -354,8 +440,18 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
    return;
  }

-  /* Create driver to write out render results. */
-  session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
+  /* set callback to write out render results */
+  session->write_render_tile_cb = [&]() { write_render_tile(); };
+
+  /* Use final write for preview renders, otherwise render result wouldn't be be updated on Blender
+   * side. */
+  /* TODO(sergey): Investigate whether GPUDisplay can be used for the preview as well. */
+  if (b_engine.is_preview()) {
+    session->update_render_tile_cb = [&]() { write_render_tile(); };
+  }
+  else {
+    session->update_render_tile_cb = [&]() { update_render_tile(); };
+  }

  session->full_buffer_written_cb = [&](string_view filename) { full_buffer_written(filename); };

@@ -492,22 +588,13 @@ void BlenderSession::render_frame_finish()

  for (string_view filename : full_buffer_files_) {
    session->process_full_buffer_from_disk(filename);
-    if (check_and_report_session_error()) {
-      break;
-    }
-  }
-
-  for (string_view filename : full_buffer_files_) {
    path_remove(filename);
  }

-  /* Clear driver. */
-  session->set_output_driver(nullptr);
+  /* clear callback */
+  session->write_render_tile_cb = function_null;
+  session->update_render_tile_cb = function_null;
  session->full_buffer_written_cb = function_null;
-
-  /* All the files are handled.
-   * Clear the list so that this session can be re-used by Persistent Data. */
-  full_buffer_files_.clear();
 }

 static PassType bake_type_to_pass(const string &bake_type_str, const int bake_filter)
@@ -612,8 +699,9 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
  pass->set_type(bake_type_to_pass(bake_type, bake_filter));
  pass->set_include_albedo((bake_filter & BL::BakeSettings::pass_filter_COLOR));

-  session->set_display_driver(nullptr);
-  session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
+  session->read_render_tile_cb = [&]() { read_render_tile(); };
+  session->write_render_tile_cb = [&]() { write_render_tile(); };
+  session->set_gpu_display(nullptr);

  if (!session->progress.get_cancel()) {
    /* Sync scene. */
@@ -656,7 +744,43 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
    session->wait();
  }

-  session->set_output_driver(nullptr);
+  session->read_render_tile_cb = function_null;
+  session->write_render_tile_cb = function_null;
+}
+
+void BlenderSession::write_render_result(BL::RenderLayer &b_rlay)
+{
+  if (!session->copy_render_tile_from_device()) {
+    return;
+  }
+
+  const int2 tile_size = session->get_render_tile_size();
+  vector<float> pixels(tile_size.x * tile_size.y * 4);
+
+  /* Copy each pass. */
+  for (BL::RenderPass &b_pass : b_rlay.passes) {
+    if (!session->get_render_tile_pixels(b_pass.name(), b_pass.channels(), &pixels[0])) {
+      memset(&pixels[0], 0, pixels.size() * sizeof(float));
+    }
+
+    b_pass.rect(&pixels[0]);
+  }
+}
+
+void BlenderSession::update_render_result(BL::RenderLayer &b_rlay)
+{
+  if (!session->copy_render_tile_from_device()) {
+    return;
+  }
+
+  const int2 tile_size = session->get_render_tile_size();
+  vector<float> pixels(tile_size.x * tile_size.y * 4);
+
+  /* Copy combined pass. */
+  BL::RenderPass b_combined_pass(b_rlay.passes.find_by_name("Combined", b_rview_name.c_str()));
+  if (session->get_render_tile_pixels("Combined", b_combined_pass.channels(), &pixels[0])) {
+    b_combined_pass.rect(&pixels[0]);
+  }
 }

 void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
@@ -764,7 +888,7 @@ void BlenderSession::draw(BL::SpaceImageEditor &space_image)
  }

  BL::Array<float, 2> zoom = space_image.zoom();
-  display_driver_->set_zoom(zoom[0], zoom[1]);
+  gpu_display_->set_zoom(zoom[0], zoom[1]);

  session->draw();
 }
@@ -911,27 +1035,20 @@ void BlenderSession::update_status_progress()
    last_progress = progress;
  }

-  check_and_report_session_error();
-}
-
-bool BlenderSession::check_and_report_session_error()
-{
-  if (!session->progress.get_error()) {
-    return false;
+  if (session->progress.get_error()) {
+    string error = session->progress.get_error_message();
+    if (error != last_error) {
+      /* TODO(sergey): Currently C++ RNA API doesn't let us to
+       * use mnemonic name for the variable. Would be nice to
+       * have this figured out.
+       *
+       * For until then, 1 << 5 means RPT_ERROR.
+       */
+      b_engine.report(1 << 5, error.c_str());
+      b_engine.error_set(error.c_str());
+      last_error = error;
+    }
  }
-
-  const string error = session->progress.get_error_message();
-  if (error != last_error) {
-    /* TODO(sergey): Currently C++ RNA API doesn't let us to use mnemonic name for the variable.
-     * Would be nice to have this figured out.
-     *
-     * For until then, 1 << 5 means RPT_ERROR. */
-    b_engine.report(1 << 5, error.c_str());
-    b_engine.error_set(error.c_str());
-    last_error = error;
-  }
-
-  return true;
 }

 void BlenderSession::tag_update()
--- a/intern/cycles/blender/blender_session.h
+++ b/intern/cycles/blender/blender_session.h
@@ -29,7 +29,7 @@

 CCL_NAMESPACE_BEGIN

-class BlenderDisplayDriver;
+class BlenderGPUDisplay;
 class BlenderSync;
 class ImageMetaData;
 class Scene;
@@ -70,7 +70,20 @@ class BlenderSession {
            const int bake_width,
            const int bake_height);

+  void write_render_result(BL::RenderLayer &b_rlay);
+  void write_render_tile();
+
+  void update_render_tile();
+
  void full_buffer_written(string_view filename);
+
+  /* update functions are used to update display buffer only after sample was rendered
+   * only needed for better visual feedback */
+  void update_render_result(BL::RenderLayer &b_rlay);
+
+  /* read functions for baking input */
+  void read_render_tile();
+
  /* interactive updates */
  void synchronize(BL::Depsgraph &b_depsgraph);

@@ -97,7 +110,8 @@ class BlenderSession {
  BL::RenderSettings b_render;
  BL::Depsgraph b_depsgraph;
  /* NOTE: Blender's scene might become invalid after call
-   * #free_blender_memory_if_possible(). */
+   * free_blender_memory_if_possible().
+   */
  BL::Scene b_scene;
  BL::SpaceView3D b_v3d;
  BL::RegionView3D b_rv3d;
@@ -133,11 +147,6 @@ class BlenderSession {
 protected:
  void stamp_view_layer_metadata(Scene *scene, const string &view_layer_name);

-  /* Check whether session error happened.
-   * If so, it is reported to the render engine and true is returned.
-   * Otherwise false is returned. */
-  bool check_and_report_session_error();
-
  void builtin_images_load();

  /* Is used after each render layer synchronization is done with the goal
@@ -151,8 +160,8 @@ class BlenderSession {
    int last_pass_index = -1;
  } draw_state_;

-  /* NOTE: The BlenderSession references the display driver. */
-  BlenderDisplayDriver *display_driver_ = nullptr;
+  /* NOTE: The BlenderSession references the GPU display. */
+  BlenderGPUDisplay *gpu_display_ = nullptr;

  vector<string> full_buffer_files_;
 };
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -279,7 +279,7 @@ static ShaderNode *add_node(Scene *scene,
    array<float3> curve_mapping_curves;
    float min_x, max_x;
    curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, true);
-    curvemapping_minmax(mapping, 4, &min_x, &max_x);
+    curvemapping_minmax(mapping, true, &min_x, &max_x);
    curves->set_min_x(min_x);
    curves->set_max_x(max_x);
    curves->set_curves(curve_mapping_curves);
@@ -292,25 +292,12 @@ static ShaderNode *add_node(Scene *scene,
    array<float3> curve_mapping_curves;
    float min_x, max_x;
    curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, false);
-    curvemapping_minmax(mapping, 3, &min_x, &max_x);
+    curvemapping_minmax(mapping, false, &min_x, &max_x);
    curves->set_min_x(min_x);
    curves->set_max_x(max_x);
    curves->set_curves(curve_mapping_curves);
    node = curves;
  }
-  else if (b_node.is_a(&RNA_ShaderNodeFloatCurve)) {
-    BL::ShaderNodeFloatCurve b_curve_node(b_node);
-    BL::CurveMapping mapping(b_curve_node.mapping());
-    FloatCurveNode *curve = graph->create_node<FloatCurveNode>();
-    array<float> curve_mapping_curve;
-    float min_x, max_x;
-    curvemapping_float_to_array(mapping, curve_mapping_curve, RAMP_TABLE_SIZE);
-    curvemapping_minmax(mapping, 1, &min_x, &max_x);
-    curve->set_min_x(min_x);
-    curve->set_max_x(max_x);
-    curve->set_curve(curve_mapping_curve);
-    node = curve;
-  }
  else if (b_node.is_a(&RNA_ShaderNodeValToRGB)) {
    RGBRampNode *ramp = graph->create_node<RGBRampNode>();
    BL::ShaderNodeValToRGB b_ramp_node(b_node);
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -545,6 +545,8 @@ static PassType get_blender_pass_type(BL::RenderPass &b_pass)
  MAP_PASS("Shadow Catcher", PASS_SHADOW_CATCHER);
  MAP_PASS("Noisy Shadow Catcher", PASS_SHADOW_CATCHER);

+  MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
+
  MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER);
  MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT);

@@ -602,6 +604,10 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v
  PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");

  /* Debug passes. */
+  if (get_boolean(crl, "pass_debug_render_time")) {
+    b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str());
+    pass_add(scene, PASS_RENDER_TIME, "Debug Render Time");
+  }
  if (get_boolean(crl, "pass_debug_sample_count")) {
    b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
    pass_add(scene, PASS_SAMPLE_COUNT, "Debug Sample Count");
--- a/intern/cycles/blender/blender_util.h
+++ b/intern/cycles/blender/blender_util.h
@@ -90,27 +90,26 @@ static inline BL::Mesh object_to_mesh(BL::BlendData & /*data*/,
  }
 #endif

-  BL::Mesh mesh = (b_ob_info.object_data.is_a(&RNA_Mesh)) ? BL::Mesh(b_ob_info.object_data) :
-                                                            BL::Mesh(PointerRNA_NULL);
+  BL::Mesh mesh(PointerRNA_NULL);
+  if (b_ob_info.object_data.is_a(&RNA_Mesh)) {
+    /* TODO: calc_undeformed is not used. */
+    mesh = BL::Mesh(b_ob_info.object_data);

-  if (b_ob_info.is_real_object_data()) {
-    if (mesh) {
-      /* Make a copy to split faces if we use autosmooth, otherwise not needed.
-       * Also in edit mode do we need to make a copy, to ensure data layers like
-       * UV are not empty. */
-      if (mesh.is_editmode() ||
-          (mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
-        BL::Depsgraph depsgraph(PointerRNA_NULL);
-        mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
-      }
-    }
-    else {
+    /* Make a copy to split faces if we use autosmooth, otherwise not needed.
+     * Also in edit mode do we need to make a copy, to ensure data layers like
+     * UV are not empty. */
+    if (mesh.is_editmode() ||
+        (mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
      BL::Depsgraph depsgraph(PointerRNA_NULL);
+      assert(b_ob_info.is_real_object_data());
      mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
    }
  }
  else {
-    /* TODO: what to do about non-mesh geometry instances? */
+    BL::Depsgraph depsgraph(PointerRNA_NULL);
+    if (b_ob_info.is_real_object_data()) {
+      mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
+    }
  }

 #if 0
@@ -171,11 +170,12 @@ static inline void curvemap_minmax_curve(/*const*/ BL::CurveMap &curve, float *m
 }

 static inline void curvemapping_minmax(/*const*/ BL::CurveMapping &cumap,
-                                       int num_curves,
+                                       bool rgb_curve,
                                       float *min_x,
                                       float *max_x)
 {
  // const int num_curves = cumap.curves.length(); /* Gives linking error so far. */
+  const int num_curves = rgb_curve ? 4 : 3;
  *min_x = FLT_MAX;
  *max_x = -FLT_MAX;
  for (int i = 0; i < num_curves; ++i) {
@@ -195,28 +195,6 @@ static inline void curvemapping_to_array(BL::CurveMapping &cumap, array<float> &
  }
 }

-static inline void curvemapping_float_to_array(BL::CurveMapping &cumap,
-                                               array<float> &data,
-                                               int size)
-{
-  float min = 0.0f, max = 1.0f;
-
-  curvemapping_minmax(cumap, 1, &min, &max);
-
-  const float range = max - min;
-
-  cumap.update();
-
-  BL::CurveMap map = cumap.curves[0];
-
-  data.resize(size);
-
-  for (int i = 0; i < size; i++) {
-    float t = min + (float)i / (float)(size - 1) * range;
-    data[i] = cumap.evaluate(map, t);
-  }
-}
-
 static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
                                               array<float3> &data,
                                               int size,
@@ -235,8 +213,7 @@ static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
   *
   * There might be some better estimations here tho.
   */
-  const int num_curves = rgb_curve ? 4 : 3;
-  curvemapping_minmax(cumap, num_curves, &min_x, &max_x);
+  curvemapping_minmax(cumap, rgb_curve, &min_x, &max_x);

  const float range_x = max_x - min_x;

--- a/intern/cycles/bvh/bvh_embree.cpp
+++ b/intern/cycles/bvh/bvh_embree.cpp
@@ -213,7 +213,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
      if (ctx->num_hits < ctx->max_hits) {
        Intersection current_isect;
        kernel_embree_convert_hit(kg, ray, hit, &current_isect);
-        for (size_t i = 0; i < ctx->num_hits; ++i) {
+        for (size_t i = 0; i < ctx->max_hits; ++i) {
          if (current_isect.object == ctx->isect_s[i].object &&
              current_isect.prim == ctx->isect_s[i].prim && current_isect.t == ctx->isect_s[i].t) {
            /* This intersection was already recorded, skip it. */
--- a/intern/cycles/cmake/external_libs.cmake
+++ b/intern/cycles/cmake/external_libs.cmake
@@ -532,13 +532,4 @@ if(WITH_CYCLES_CUDA_BINARIES OR NOT WITH_CUDA_DYNLOAD)
  endif()
 endif()

-
-###########################################################################
-# HIP
-###########################################################################
-
-if(NOT WITH_HIP_DYNLOAD)
-  set(WITH_HIP_DYNLOAD ON)
-endif()
-
 unset(_cycles_lib_dir)
--- a/intern/cycles/cmake/macros.cmake
+++ b/intern/cycles/cmake/macros.cmake
@@ -156,16 +156,10 @@ macro(cycles_target_link_libraries target)
    ${PLATFORM_LINKLIBS}
  )

-  if(WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX)
-    if(WITH_CUDA_DYNLOAD)
-      target_link_libraries(${target} extern_cuew)
-    else()
-      target_link_libraries(${target} ${CUDA_CUDA_LIBRARY})
-    endif()
-  endif()
-
-  if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
-    target_link_libraries(${target} extern_hipew)
+  if(WITH_CUDA_DYNLOAD)
+    target_link_libraries(${target} extern_cuew)
+  else()
+    target_link_libraries(${target} ${CUDA_CUDA_LIBRARY})
  endif()

  if(CYCLES_STANDALONE_REPOSITORY)
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -22,25 +22,16 @@ set(INC_SYS
  ../../../extern/clew/include
 )

-if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
-  if(WITH_CUDA_DYNLOAD)
-    list(APPEND INC
-      ../../../extern/cuew/include
-    )
-    add_definitions(-DWITH_CUDA_DYNLOAD)
-  else()
-    list(APPEND INC_SYS
-      ${CUDA_TOOLKIT_INCLUDE}
-    )
-    add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
-  endif()
-endif()
-
-if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
+if(WITH_CUDA_DYNLOAD)
  list(APPEND INC
-    ../../../extern/hipew/include
+    ../../../extern/cuew/include
  )
-  add_definitions(-DWITH_HIP_DYNLOAD)
+  add_definitions(-DWITH_CUDA_DYNLOAD)
+else()
+  list(APPEND INC_SYS
+    ${CUDA_TOOLKIT_INCLUDE}
+  )
+  add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
 endif()

 set(SRC
@@ -79,21 +70,6 @@ set(SRC_CUDA
  cuda/util.h
 )

-set(SRC_HIP
-  hip/device.cpp
-  hip/device.h
-  hip/device_impl.cpp
-  hip/device_impl.h
-  hip/graphics_interop.cpp
-  hip/graphics_interop.h
-  hip/kernel.cpp
-  hip/kernel.h
-  hip/queue.cpp
-  hip/queue.h
-  hip/util.cpp
-  hip/util.h
-)
-
 set(SRC_DUMMY
  dummy/device.cpp
  dummy/device.h
@@ -129,21 +105,13 @@ set(LIB
  ${CYCLES_GL_LIBRARIES}
 )

-if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
-  if(WITH_CUDA_DYNLOAD)
-    list(APPEND LIB
-      extern_cuew
-    )
-  else()
-    list(APPEND LIB
-      ${CUDA_CUDA_LIBRARY}
-    )
-  endif()
-endif()
-
-if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
+if(WITH_CUDA_DYNLOAD)
  list(APPEND LIB
-    extern_hipew
+    extern_cuew
+  )
+else()
+  list(APPEND LIB
+    ${CUDA_CUDA_LIBRARY}
  )
 endif()

@@ -152,9 +120,6 @@ add_definitions(${GL_DEFINITIONS})
 if(WITH_CYCLES_DEVICE_CUDA)
  add_definitions(-DWITH_CUDA)
 endif()
-if(WITH_CYCLES_DEVICE_HIP)
-  add_definitions(-DWITH_HIP)
-endif()
 if(WITH_CYCLES_DEVICE_OPTIX)
  add_definitions(-DWITH_OPTIX)
 endif()
@@ -175,7 +140,6 @@ cycles_add_library(cycles_device "${LIB}"
  ${SRC}
  ${SRC_CPU}
  ${SRC_CUDA}
-  ${SRC_HIP}
  ${SRC_DUMMY}
  ${SRC_MULTI}
  ${SRC_OPTIX}
--- a/intern/cycles/device/cuda/graphics_interop.cpp
+++ b/intern/cycles/device/cuda/graphics_interop.cpp
@@ -37,15 +37,14 @@ CUDADeviceGraphicsInterop::~CUDADeviceGraphicsInterop()
  }
 }

-void CUDADeviceGraphicsInterop::set_display_interop(
-    const DisplayDriver::GraphicsInterop &display_interop)
+void CUDADeviceGraphicsInterop::set_destination(
+    const DeviceGraphicsInteropDestination &destination)
 {
-  const int64_t new_buffer_area = int64_t(display_interop.buffer_width) *
-                                  display_interop.buffer_height;
+  const int64_t new_buffer_area = int64_t(destination.buffer_width) * destination.buffer_height;

-  need_clear_ = display_interop.need_clear;
+  need_clear_ = destination.need_clear;

-  if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
+  if (opengl_pbo_id_ == destination.opengl_pbo_id && buffer_area_ == new_buffer_area) {
    return;
  }

@@ -56,12 +55,12 @@ void CUDADeviceGraphicsInterop::set_display_interop(
  }

  const CUresult result = cuGraphicsGLRegisterBuffer(
-      &cu_graphics_resource_, display_interop.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
+      &cu_graphics_resource_, destination.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
  if (result != CUDA_SUCCESS) {
    LOG(ERROR) << "Error registering OpenGL buffer: " << cuewErrorString(result);
  }

-  opengl_pbo_id_ = display_interop.opengl_pbo_id;
+  opengl_pbo_id_ = destination.opengl_pbo_id;
  buffer_area_ = new_buffer_area;
 }

--- a/intern/cycles/device/cuda/graphics_interop.h
+++ b/intern/cycles/device/cuda/graphics_interop.h
@@ -41,7 +41,7 @@ class CUDADeviceGraphicsInterop : public DeviceGraphicsInterop {
  CUDADeviceGraphicsInterop &operator=(const CUDADeviceGraphicsInterop &other) = delete;
  CUDADeviceGraphicsInterop &operator=(CUDADeviceGraphicsInterop &&other) = delete;

-  virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override;
+  virtual void set_destination(const DeviceGraphicsInteropDestination &destination) override;

  virtual device_ptr map() override;
  virtual void unmap() override;
--- a/intern/cycles/device/cuda/queue.cpp
+++ b/intern/cycles/device/cuda/queue.cpp
@@ -116,18 +116,18 @@ bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *ar
  }

  /* Launch kernel. */
-  assert_success(cuLaunchKernel(cuda_kernel.function,
-                                num_blocks,
-                                1,
-                                1,
-                                num_threads_per_block,
-                                1,
-                                1,
-                                shared_mem_bytes,
-                                cuda_stream_,
-                                args,
-                                0),
-                 "enqueue");
+  cuda_device_assert(cuda_device_,
+                     cuLaunchKernel(cuda_kernel.function,
+                                    num_blocks,
+                                    1,
+                                    1,
+                                    num_threads_per_block,
+                                    1,
+                                    1,
+                                    shared_mem_bytes,
+                                    cuda_stream_,
+                                    args,
+                                    0));

  return !(cuda_device_->have_error());
 }
@@ -139,8 +139,7 @@ bool CUDADeviceQueue::synchronize()
  }

  const CUDAContextScope scope(cuda_device_);
-  assert_success(cuStreamSynchronize(cuda_stream_), "synchronize");
-
+  cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_));
  debug_synchronize();

  return !(cuda_device_->have_error());
@@ -163,9 +162,9 @@ void CUDADeviceQueue::zero_to_device(device_memory &mem)
  assert(mem.device_pointer != 0);

  const CUDAContextScope scope(cuda_device_);
-  assert_success(
-      cuMemsetD8Async((CUdeviceptr)mem.device_pointer, 0, mem.memory_size(), cuda_stream_),
-      "zero_to_device");
+  cuda_device_assert(
+      cuda_device_,
+      cuMemsetD8Async((CUdeviceptr)mem.device_pointer, 0, mem.memory_size(), cuda_stream_));
 }

 void CUDADeviceQueue::copy_to_device(device_memory &mem)
@@ -186,10 +185,10 @@ void CUDADeviceQueue::copy_to_device(device_memory &mem)

  /* Copy memory to device. */
  const CUDAContextScope scope(cuda_device_);
-  assert_success(
+  cuda_device_assert(
+      cuda_device_,
      cuMemcpyHtoDAsync(
-          (CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size(), cuda_stream_),
-      "copy_to_device");
+          (CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size(), cuda_stream_));
 }

 void CUDADeviceQueue::copy_from_device(device_memory &mem)
@@ -205,19 +204,10 @@ void CUDADeviceQueue::copy_from_device(device_memory &mem)

  /* Copy memory from device. */
  const CUDAContextScope scope(cuda_device_);
-  assert_success(
+  cuda_device_assert(
+      cuda_device_,
      cuMemcpyDtoHAsync(
-          mem.host_pointer, (CUdeviceptr)mem.device_pointer, mem.memory_size(), cuda_stream_),
-      "copy_from_device");
-}
-
-void CUDADeviceQueue::assert_success(CUresult result, const char *operation)
-{
-  if (result != CUDA_SUCCESS) {
-    const char *name = cuewErrorString(result);
-    cuda_device_->set_error(string_printf(
-        "%s in CUDA queue %s (%s)", name, operation, debug_active_kernels().c_str()));
-  }
+          mem.host_pointer, (CUdeviceptr)mem.device_pointer, mem.memory_size(), cuda_stream_));
 }

 unique_ptr<DeviceGraphicsInterop> CUDADeviceQueue::graphics_interop_create()
--- a/intern/cycles/device/cuda/queue.h
+++ b/intern/cycles/device/cuda/queue.h
@@ -60,8 +60,6 @@ class CUDADeviceQueue : public DeviceQueue {
 protected:
  CUDADevice *cuda_device_;
  CUstream cuda_stream_;
-
-  void assert_success(CUresult result, const char *operation);
 };

 CCL_NAMESPACE_END
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -25,7 +25,6 @@
 #include "device/cpu/device.h"
 #include "device/cuda/device.h"
 #include "device/dummy/device.h"
-#include "device/hip/device.h"
 #include "device/multi/device.h"
 #include "device/optix/device.h"

@@ -47,7 +46,6 @@ thread_mutex Device::device_mutex;
 vector<DeviceInfo> Device::cuda_devices;
 vector<DeviceInfo> Device::optix_devices;
 vector<DeviceInfo> Device::cpu_devices;
-vector<DeviceInfo> Device::hip_devices;
 uint Device::devices_initialized_mask = 0;

 /* Device */
@@ -98,14 +96,6 @@ Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
        device = device_optix_create(info, stats, profiler);
      break;
 #endif
-
-#ifdef WITH_HIP
-    case DEVICE_HIP:
-      if (device_hip_init())
-        device = device_hip_create(info, stats, profiler);
-      break;
-#endif
-
    default:
      break;
  }
@@ -127,8 +117,6 @@ DeviceType Device::type_from_string(const char *name)
    return DEVICE_OPTIX;
  else if (strcmp(name, "MULTI") == 0)
    return DEVICE_MULTI;
-  else if (strcmp(name, "HIP") == 0)
-    return DEVICE_HIP;

  return DEVICE_NONE;
 }
@@ -143,8 +131,6 @@ string Device::string_from_type(DeviceType type)
    return "OPTIX";
  else if (type == DEVICE_MULTI)
    return "MULTI";
-  else if (type == DEVICE_HIP)
-    return "HIP";

  return "";
 }
@@ -159,10 +145,6 @@ vector<DeviceType> Device::available_types()
 #ifdef WITH_OPTIX
  types.push_back(DEVICE_OPTIX);
 #endif
-#ifdef WITH_HIP
-  types.push_back(DEVICE_HIP);
-#endif
-
  return types;
 }

@@ -204,20 +186,6 @@ vector<DeviceInfo> Device::available_devices(uint mask)
  }
 #endif

-#ifdef WITH_HIP
-  if (mask & DEVICE_MASK_HIP) {
-    if (!(devices_initialized_mask & DEVICE_MASK_HIP)) {
-      if (device_hip_init()) {
-        device_hip_info(hip_devices);
-      }
-      devices_initialized_mask |= DEVICE_MASK_HIP;
-    }
-    foreach (DeviceInfo &info, hip_devices) {
-      devices.push_back(info);
-    }
-  }
-#endif
-
  if (mask & DEVICE_MASK_CPU) {
    if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
      device_cpu_info(cpu_devices);
@@ -258,15 +226,6 @@ string Device::device_capabilities(uint mask)
  }
 #endif

-#ifdef WITH_HIP
-  if (mask & DEVICE_MASK_HIP) {
-    if (device_hip_init()) {
-      capabilities += "\nHIP device capabilities:\n";
-      capabilities += device_hip_capabilities();
-    }
-  }
-#endif
-
  return capabilities;
 }

@@ -355,7 +314,6 @@ void Device::free_memory()
  devices_initialized_mask = 0;
  cuda_devices.free_memory();
  optix_devices.free_memory();
-  hip_devices.free_memory();
  cpu_devices.free_memory();
 }

--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -51,7 +51,6 @@ enum DeviceType {
  DEVICE_CUDA,
  DEVICE_MULTI,
  DEVICE_OPTIX,
-  DEVICE_HIP,
  DEVICE_DUMMY,
 };

@@ -59,7 +58,6 @@ enum DeviceTypeMask {
  DEVICE_MASK_CPU = (1 << DEVICE_CPU),
  DEVICE_MASK_CUDA = (1 << DEVICE_CUDA),
  DEVICE_MASK_OPTIX = (1 << DEVICE_OPTIX),
-  DEVICE_MASK_HIP = (1 << DEVICE_HIP),
  DEVICE_MASK_ALL = ~0
 };

@@ -286,7 +284,6 @@ class Device {
  static vector<DeviceInfo> cuda_devices;
  static vector<DeviceInfo> optix_devices;
  static vector<DeviceInfo> cpu_devices;
-  static vector<DeviceInfo> hip_devices;
  static uint devices_initialized_mask;
 };

--- a/intern/cycles/device/device_graphics_interop.h
+++ b/intern/cycles/device/device_graphics_interop.h
@@ -16,12 +16,25 @@

 #pragma once

-#include "render/display_driver.h"
-
 #include "util/util_types.h"

 CCL_NAMESPACE_BEGIN

+/* Information about interoperability destination.
+ * Is provided by the GPUDisplay. */
+class DeviceGraphicsInteropDestination {
+ public:
+  /* Dimensions of the buffer, in pixels. */
+  int buffer_width = 0;
+  int buffer_height = 0;
+
+  /* OpenGL pixel buffer object. */
+  int opengl_pbo_id = 0;
+
+  /* Clear the entire destination before doing partial write to it. */
+  bool need_clear = false;
+};
+
 /* Device-side graphics interoperability support.
 *
 * Takes care of holding all the handlers needed by the device to implement interoperability with
@@ -33,7 +46,7 @@ class DeviceGraphicsInterop {

  /* Update this device-side graphics interoperability object with the given destination resource
   * information. */
-  virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) = 0;
+  virtual void set_destination(const DeviceGraphicsInteropDestination &destination) = 0;

  virtual device_ptr map() = 0;
  virtual void unmap() = 0;
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -277,7 +277,6 @@ class device_memory {
 protected:
  friend class CUDADevice;
  friend class OptiXDevice;
-  friend class HIPDevice;

  /* Only create through subclasses. */
  device_memory(Device *device, const char *name, MemoryType type);
--- a/intern/cycles/device/device_queue.cpp
+++ b/intern/cycles/device/device_queue.cpp
@@ -57,9 +57,8 @@ void DeviceQueue::debug_init_execution()
 {
  if (VLOG_IS_ON(3)) {
    last_sync_time_ = time_dt();
+    last_kernels_enqueued_ = 0;
  }
-
-  last_kernels_enqueued_ = 0;
 }

 void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size)
@@ -67,9 +66,8 @@ void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size)
  if (VLOG_IS_ON(3)) {
    VLOG(4) << "GPU queue launch " << device_kernel_as_string(kernel) << ", work_size "
            << work_size;
+    last_kernels_enqueued_ |= (uint64_t(1) << (uint64_t)kernel);
  }
-
-  last_kernels_enqueued_ |= (uint64_t(1) << (uint64_t)kernel);
 }

 void DeviceQueue::debug_synchronize()
@@ -82,14 +80,8 @@ void DeviceQueue::debug_synchronize()
    stats_kernel_time_[last_kernels_enqueued_] += elapsed_time;

    last_sync_time_ = new_time;
+    last_kernels_enqueued_ = 0;
  }
-
-  last_kernels_enqueued_ = 0;
-}
-
-string DeviceQueue::debug_active_kernels()
-{
-  return device_kernel_mask_as_string(last_kernels_enqueued_);
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/device/device_queue.h
+++ b/intern/cycles/device/device_queue.h
@@ -21,7 +21,6 @@
 #include "device/device_graphics_interop.h"
 #include "util/util_logging.h"
 #include "util/util_map.h"
-#include "util/util_string.h"
 #include "util/util_unique_ptr.h"

 CCL_NAMESPACE_BEGIN
@@ -102,7 +101,6 @@ class DeviceQueue {
  void debug_init_execution();
  void debug_enqueue(DeviceKernel kernel, const int work_size);
  void debug_synchronize();
-  string debug_active_kernels();

  /* Combination of kernels enqueued together sync last synchronize. */
  DeviceKernelMask last_kernels_enqueued_;
--- a/intern/cycles/device/hip/device.cpp
+++ b/intern/cycles/device/hip/device.cpp
@@ -1,276 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/hip/device.h"
-
-#include "util/util_logging.h"
-
-#ifdef WITH_HIP
-#  include "device/device.h"
-#  include "device/hip/device_impl.h"
-
-#  include "util/util_string.h"
-#  include "util/util_windows.h"
-#endif /* WITH_HIP */
-
-CCL_NAMESPACE_BEGIN
-
-bool device_hip_init()
-{
-#if !defined(WITH_HIP)
-  return false;
-#elif defined(WITH_HIP_DYNLOAD)
-  static bool initialized = false;
-  static bool result = false;
-
-  if (initialized)
-    return result;
-
-  initialized = true;
-  int hipew_result = hipewInit(HIPEW_INIT_HIP);
-  if (hipew_result == HIPEW_SUCCESS) {
-    VLOG(1) << "HIPEW initialization succeeded";
-    if (HIPDevice::have_precompiled_kernels()) {
-      VLOG(1) << "Found precompiled kernels";
-      result = true;
-    }
-    else if (hipewCompilerPath() != NULL) {
-      VLOG(1) << "Found HIPCC " << hipewCompilerPath();
-      result = true;
-    }
-    else {
-      VLOG(1) << "Neither precompiled kernels nor HIPCC was found,"
-              << " unable to use HIP";
-    }
-  }
-  else {
-    VLOG(1) << "HIPEW initialization failed: "
-            << ((hipew_result == HIPEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
-                                                              "Error opening the library");
-  }
-
-  return result;
-#else  /* WITH_HIP_DYNLOAD */
-  return true;
-#endif /* WITH_HIP_DYNLOAD */
-}
-
-Device *device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
-{
-#ifdef WITH_HIP
-  return new HIPDevice(info, stats, profiler);
-#else
-  (void)info;
-  (void)stats;
-  (void)profiler;
-
-  LOG(FATAL) << "Request to create HIP device without compiled-in support. Should never happen.";
-
-  return nullptr;
-#endif
-}
-
-#ifdef WITH_HIP
-static hipError_t device_hip_safe_init()
-{
-#  ifdef _WIN32
-  __try {
-    return hipInit(0);
-  }
-  __except (EXCEPTION_EXECUTE_HANDLER) {
-    /* Ignore crashes inside the HIP driver and hope we can
-     * survive even with corrupted HIP installs. */
-    fprintf(stderr, "Cycles HIP: driver crashed, continuing without HIP.\n");
-  }
-
-  return hipErrorNoDevice;
-#  else
-  return hipInit(0);
-#  endif
-}
-#endif /* WITH_HIP */
-
-void device_hip_info(vector<DeviceInfo> &devices)
-{
-#ifdef WITH_HIP
-  hipError_t result = device_hip_safe_init();
-  if (result != hipSuccess) {
-    if (result != hipErrorNoDevice)
-      fprintf(stderr, "HIP hipInit: %s\n", hipewErrorString(result));
-    return;
-  }
-
-  int count = 0;
-  result = hipGetDeviceCount(&count);
-  if (result != hipSuccess) {
-    fprintf(stderr, "HIP hipGetDeviceCount: %s\n", hipewErrorString(result));
-    return;
-  }
-
-  vector<DeviceInfo> display_devices;
-
-  for (int num = 0; num < count; num++) {
-    char name[256];
-
-    result = hipDeviceGetName(name, 256, num);
-    if (result != hipSuccess) {
-      fprintf(stderr, "HIP :hipDeviceGetName: %s\n", hipewErrorString(result));
-      continue;
-    }
-
-    int major;
-    hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, num);
-    // TODO : (Arya) What is the last major version we are supporting?
-
-    DeviceInfo info;
-
-    info.type = DEVICE_HIP;
-    info.description = string(name);
-    info.num = num;
-
-    info.has_half_images = (major >= 3);
-    info.has_nanovdb = true;
-    info.denoisers = 0;
-
-    info.has_gpu_queue = true;
-    /* Check if the device has P2P access to any other device in the system. */
-    for (int peer_num = 0; peer_num < count && !info.has_peer_memory; peer_num++) {
-      if (num != peer_num) {
-        int can_access = 0;
-        hipDeviceCanAccessPeer(&can_access, num, peer_num);
-        info.has_peer_memory = (can_access != 0);
-      }
-    }
-
-    int pci_location[3] = {0, 0, 0};
-    hipDeviceGetAttribute(&pci_location[0], hipDeviceAttributePciDomainID, num);
-    hipDeviceGetAttribute(&pci_location[1], hipDeviceAttributePciBusId, num);
-    hipDeviceGetAttribute(&pci_location[2], hipDeviceAttributePciDeviceId, num);
-    info.id = string_printf("HIP_%s_%04x:%02x:%02x",
-                            name,
-                            (unsigned int)pci_location[0],
-                            (unsigned int)pci_location[1],
-                            (unsigned int)pci_location[2]);
-
-    /* If device has a kernel timeout and no compute preemption, we assume
-     * it is connected to a display and will freeze the display while doing
-     * computations. */
-    int timeout_attr = 0, preempt_attr = 0;
-    hipDeviceGetAttribute(&timeout_attr, hipDeviceAttributeKernelExecTimeout, num);
-
-    if (timeout_attr && !preempt_attr) {
-      VLOG(1) << "Device is recognized as display.";
-      info.description += " (Display)";
-      info.display_device = true;
-      display_devices.push_back(info);
-    }
-    else {
-      VLOG(1) << "Device has compute preemption or is not used for display.";
-      devices.push_back(info);
-    }
-    VLOG(1) << "Added device \"" << name << "\" with id \"" << info.id << "\".";
-  }
-
-  if (!display_devices.empty())
-    devices.insert(devices.end(), display_devices.begin(), display_devices.end());
-#else  /* WITH_HIP */
-  (void)devices;
-#endif /* WITH_HIP */
-}
-
-string device_hip_capabilities()
-{
-#ifdef WITH_HIP
-  hipError_t result = device_hip_safe_init();
-  if (result != hipSuccess) {
-    if (result != hipErrorNoDevice) {
-      return string("Error initializing HIP: ") + hipewErrorString(result);
-    }
-    return "No HIP device found\n";
-  }
-
-  int count;
-  result = hipGetDeviceCount(&count);
-  if (result != hipSuccess) {
-    return string("Error getting devices: ") + hipewErrorString(result);
-  }
-
-  string capabilities = "";
-  for (int num = 0; num < count; num++) {
-    char name[256];
-    if (hipDeviceGetName(name, 256, num) != hipSuccess) {
-      continue;
-    }
-    capabilities += string("\t") + name + "\n";
-    int value;
-#  define GET_ATTR(attr) \
-    { \
-      if (hipDeviceGetAttribute(&value, hipDeviceAttribute##attr, num) == hipSuccess) { \
-        capabilities += string_printf("\t\thipDeviceAttribute" #attr "\t\t\t%d\n", value); \
-      } \
-    } \
-    (void)0
-    /* TODO(sergey): Strip all attributes which are not useful for us
-     * or does not depend on the driver.
-     */
-    GET_ATTR(MaxThreadsPerBlock);
-    GET_ATTR(MaxBlockDimX);
-    GET_ATTR(MaxBlockDimY);
-    GET_ATTR(MaxBlockDimZ);
-    GET_ATTR(MaxGridDimX);
-    GET_ATTR(MaxGridDimY);
-    GET_ATTR(MaxGridDimZ);
-    GET_ATTR(MaxSharedMemoryPerBlock);
-    GET_ATTR(TotalConstantMemory);
-    GET_ATTR(WarpSize);
-    GET_ATTR(MaxPitch);
-    GET_ATTR(MaxRegistersPerBlock);
-    GET_ATTR(ClockRate);
-    GET_ATTR(TextureAlignment);
-    GET_ATTR(MultiprocessorCount);
-    GET_ATTR(KernelExecTimeout);
-    GET_ATTR(Integrated);
-    GET_ATTR(CanMapHostMemory);
-    GET_ATTR(ComputeMode);
-    GET_ATTR(MaxTexture1DWidth);
-    GET_ATTR(MaxTexture2DWidth);
-    GET_ATTR(MaxTexture2DHeight);
-    GET_ATTR(MaxTexture3DWidth);
-    GET_ATTR(MaxTexture3DHeight);
-    GET_ATTR(MaxTexture3DDepth);
-    GET_ATTR(ConcurrentKernels);
-    GET_ATTR(EccEnabled);
-    GET_ATTR(MemoryClockRate);
-    GET_ATTR(MemoryBusWidth);
-    GET_ATTR(L2CacheSize);
-    GET_ATTR(MaxThreadsPerMultiProcessor);
-    GET_ATTR(ComputeCapabilityMajor);
-    GET_ATTR(ComputeCapabilityMinor);
-    GET_ATTR(MaxSharedMemoryPerMultiprocessor);
-    GET_ATTR(ManagedMemory);
-    GET_ATTR(IsMultiGpuBoard);
-#  undef GET_ATTR
-    capabilities += "\n";
-  }
-
-  return capabilities;
-
-#else  /* WITH_HIP */
-  return "";
-#endif /* WITH_HIP */
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/hip/device.h
+++ b/intern/cycles/device/hip/device.h
@@ -1,37 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "util/util_string.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class Device;
-class DeviceInfo;
-class Profiler;
-class Stats;
-
-bool device_hip_init();
-
-Device *device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
-
-void device_hip_info(vector<DeviceInfo> &devices);
-
-string device_hip_capabilities();
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
--- a/intern/cycles/device/hip/device_impl.h
+++ b/intern/cycles/device/hip/device_impl.h
@@ -1,153 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_HIP
-
-#  include "device/device.h"
-#  include "device/hip/kernel.h"
-#  include "device/hip/queue.h"
-#  include "device/hip/util.h"
-
-#  include "util/util_map.h"
-
-#  ifdef WITH_HIP_DYNLOAD
-#    include "hipew.h"
-#  else
-#    include "util/util_opengl.h"
-#  endif
-
-CCL_NAMESPACE_BEGIN
-
-class DeviceQueue;
-
-class HIPDevice : public Device {
-
-  friend class HIPContextScope;
-
- public:
-  hipDevice_t hipDevice;
-  hipCtx_t hipContext;
-  hipModule_t hipModule;
-  size_t device_texture_headroom;
-  size_t device_working_headroom;
-  bool move_texture_to_host;
-  size_t map_host_used;
-  size_t map_host_limit;
-  int can_map_host;
-  int pitch_alignment;
-  int hipDevId;
-  int hipDevArchitecture;
-  bool first_error;
-
-  struct HIPMem {
-    HIPMem() : texobject(0), array(0), use_mapped_host(false)
-    {
-    }
-
-    hipTextureObject_t texobject;
-    hArray array;
-
-    /* If true, a mapped host memory in shared_pointer is being used. */
-    bool use_mapped_host;
-  };
-  typedef map<device_memory *, HIPMem> HIPMemMap;
-  HIPMemMap hip_mem_map;
-  thread_mutex hip_mem_map_mutex;
-
-  /* Bindless Textures */
-  device_vector<TextureInfo> texture_info;
-  bool need_texture_info;
-
-  HIPDeviceKernels kernels;
-
-  static bool have_precompiled_kernels();
-
-  virtual bool show_samples() const override;
-
-  virtual BVHLayoutMask get_bvh_layout_mask() const override;
-
-  void set_error(const string &error) override;
-
-  HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
-
-  virtual ~HIPDevice();
-
-  bool support_device(const uint /*kernel_features*/);
-
-  bool check_peer_access(Device *peer_device) override;
-
-  bool use_adaptive_compilation();
-
-  virtual string compile_kernel_get_common_cflags(const uint kernel_features);
-
-  string compile_kernel(const uint kernel_features,
-                        const char *name,
-                        const char *base = "hip",
-                        bool force_ptx = false);
-
-  virtual bool load_kernels(const uint kernel_features) override;
-  void reserve_local_memory(const uint kernel_features);
-
-  void init_host_memory();
-
-  void load_texture_info();
-
-  void move_textures_to_host(size_t size, bool for_texture);
-
-  HIPMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
-
-  void generic_copy_to(device_memory &mem);
-
-  void generic_free(device_memory &mem);
-
-  void mem_alloc(device_memory &mem) override;
-
-  void mem_copy_to(device_memory &mem) override;
-
-  void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
-
-  void mem_zero(device_memory &mem) override;
-
-  void mem_free(device_memory &mem) override;
-
-  device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
-
-  virtual void const_copy_to(const char *name, void *host, size_t size) override;
-
-  void global_alloc(device_memory &mem);
-
-  void global_free(device_memory &mem);
-
-  void tex_alloc(device_texture &mem);
-
-  void tex_free(device_texture &mem);
-
-  /* Graphics resources interoperability. */
-  virtual bool should_use_graphics_interop() override;
-
-  virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
-
-  int get_num_multiprocessors();
-  int get_max_num_threads_per_multiprocessor();
-
- protected:
-  bool get_device_attribute(hipDeviceAttribute_t attribute, int *value);
-  int get_device_default_attribute(hipDeviceAttribute_t attribute, int default_value);
-};
-
-CCL_NAMESPACE_END
-
-#endif
--- a/intern/cycles/device/hip/graphics_interop.cpp
+++ b/intern/cycles/device/hip/graphics_interop.cpp
@@ -1,105 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_HIP
-
-#  include "device/hip/graphics_interop.h"
-
-#  include "device/hip/device_impl.h"
-#  include "device/hip/util.h"
-
-CCL_NAMESPACE_BEGIN
-
-HIPDeviceGraphicsInterop::HIPDeviceGraphicsInterop(HIPDeviceQueue *queue)
-    : queue_(queue), device_(static_cast<HIPDevice *>(queue->device))
-{
-}
-
-HIPDeviceGraphicsInterop::~HIPDeviceGraphicsInterop()
-{
-  HIPContextScope scope(device_);
-
-  if (hip_graphics_resource_) {
-    hip_device_assert(device_, hipGraphicsUnregisterResource(hip_graphics_resource_));
-  }
-}
-
-void HIPDeviceGraphicsInterop::set_display_interop(
-    const DisplayDriver::GraphicsInterop &display_interop)
-{
-  const int64_t new_buffer_area = int64_t(display_interop.buffer_width) *
-                                  display_interop.buffer_height;
-
-  need_clear_ = display_interop.need_clear;
-
-  if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
-    return;
-  }
-
-  HIPContextScope scope(device_);
-
-  if (hip_graphics_resource_) {
-    hip_device_assert(device_, hipGraphicsUnregisterResource(hip_graphics_resource_));
-  }
-
-  const hipError_t result = hipGraphicsGLRegisterBuffer(
-      &hip_graphics_resource_, display_interop.opengl_pbo_id, hipGraphicsRegisterFlagsNone);
-  if (result != hipSuccess) {
-    LOG(ERROR) << "Error registering OpenGL buffer: " << hipewErrorString(result);
-  }
-
-  opengl_pbo_id_ = display_interop.opengl_pbo_id;
-  buffer_area_ = new_buffer_area;
-}
-
-device_ptr HIPDeviceGraphicsInterop::map()
-{
-  if (!hip_graphics_resource_) {
-    return 0;
-  }
-
-  HIPContextScope scope(device_);
-
-  hipDeviceptr_t hip_buffer;
-  size_t bytes;
-
-  hip_device_assert(device_,
-                    hipGraphicsMapResources(1, &hip_graphics_resource_, queue_->stream()));
-  hip_device_assert(
-      device_, hipGraphicsResourceGetMappedPointer(&hip_buffer, &bytes, hip_graphics_resource_));
-
-  if (need_clear_) {
-    hip_device_assert(
-        device_,
-        hipMemsetD8Async(static_cast<hipDeviceptr_t>(hip_buffer), 0, bytes, queue_->stream()));
-
-    need_clear_ = false;
-  }
-
-  return static_cast<device_ptr>(hip_buffer);
-}
-
-void HIPDeviceGraphicsInterop::unmap()
-{
-  HIPContextScope scope(device_);
-
-  hip_device_assert(device_,
-                    hipGraphicsUnmapResources(1, &hip_graphics_resource_, queue_->stream()));
-}
-
-CCL_NAMESPACE_END
-
-#endif
--- a/intern/cycles/device/hip/graphics_interop.h
+++ b/intern/cycles/device/hip/graphics_interop.h
@@ -1,64 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_HIP
-
-#  include "device/device_graphics_interop.h"
-
-#  ifdef WITH_HIP_DYNLOAD
-#    include "hipew.h"
-#  endif
-
-CCL_NAMESPACE_BEGIN
-
-class HIPDevice;
-class HIPDeviceQueue;
-
-class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
- public:
-  explicit HIPDeviceGraphicsInterop(HIPDeviceQueue *queue);
-
-  HIPDeviceGraphicsInterop(const HIPDeviceGraphicsInterop &other) = delete;
-  HIPDeviceGraphicsInterop(HIPDeviceGraphicsInterop &&other) noexcept = delete;
-
-  ~HIPDeviceGraphicsInterop();
-
-  HIPDeviceGraphicsInterop &operator=(const HIPDeviceGraphicsInterop &other) = delete;
-  HIPDeviceGraphicsInterop &operator=(HIPDeviceGraphicsInterop &&other) = delete;
-
-  virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override;
-
-  virtual device_ptr map() override;
-  virtual void unmap() override;
-
- protected:
-  HIPDeviceQueue *queue_ = nullptr;
-  HIPDevice *device_ = nullptr;
-
-  /* OpenGL PBO which is currently registered as the destination for the CUDA buffer. */
-  uint opengl_pbo_id_ = 0;
-  /* Buffer area in pixels of the corresponding PBO. */
-  int64_t buffer_area_ = 0;
-
-  /* The destination was requested to be cleared. */
-  bool need_clear_ = false;
-
-  hipGraphicsResource hip_graphics_resource_ = nullptr;
-};
-
-CCL_NAMESPACE_END
-
-#endif
--- a/intern/cycles/device/hip/kernel.cpp
+++ b/intern/cycles/device/hip/kernel.cpp
@@ -1,69 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_HIP
-
-#  include "device/hip/kernel.h"
-#  include "device/hip/device_impl.h"
-
-CCL_NAMESPACE_BEGIN
-
-void HIPDeviceKernels::load(HIPDevice *device)
-{
-  hipModule_t hipModule = device->hipModule;
-
-  for (int i = 0; i < (int)DEVICE_KERNEL_NUM; i++) {
-    HIPDeviceKernel &kernel = kernels_[i];
-
-    /* No mega-kernel used for GPU. */
-    if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
-      continue;
-    }
-
-    const std::string function_name = std::string("kernel_gpu_") +
-                                      device_kernel_as_string((DeviceKernel)i);
-    hip_device_assert(device,
-                      hipModuleGetFunction(&kernel.function, hipModule, function_name.c_str()));
-
-    if (kernel.function) {
-      hip_device_assert(device, hipFuncSetCacheConfig(kernel.function, hipFuncCachePreferL1));
-
-      hip_device_assert(
-          device,
-          hipModuleOccupancyMaxPotentialBlockSize(
-              &kernel.min_blocks, &kernel.num_threads_per_block, kernel.function, 0, 0));
-    }
-    else {
-      LOG(ERROR) << "Unable to load kernel " << function_name;
-    }
-  }
-
-  loaded = true;
-}
-
-const HIPDeviceKernel &HIPDeviceKernels::get(DeviceKernel kernel) const
-{
-  return kernels_[(int)kernel];
-}
-
-bool HIPDeviceKernels::available(DeviceKernel kernel) const
-{
-  return kernels_[(int)kernel].function != nullptr;
-}
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_HIP*/
--- a/intern/cycles/device/hip/kernel.h
+++ b/intern/cycles/device/hip/kernel.h
@@ -1,54 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef WITH_HIP
-
-#  include "device/device_kernel.h"
-
-#  ifdef WITH_HIP_DYNLOAD
-#    include "hipew.h"
-#  endif
-
-CCL_NAMESPACE_BEGIN
-
-class HIPDevice;
-
-/* HIP kernel and associate occupancy information. */
-class HIPDeviceKernel {
- public:
-  hipFunction_t function = nullptr;
-
-  int num_threads_per_block = 0;
-  int min_blocks = 0;
-};
-
-/* Cache of HIP kernels for each DeviceKernel. */
-class HIPDeviceKernels {
- public:
-  void load(HIPDevice *device);
-  const HIPDeviceKernel &get(DeviceKernel kernel) const;
-  bool available(DeviceKernel kernel) const;
-
- protected:
-  HIPDeviceKernel kernels_[DEVICE_KERNEL_NUM];
-  bool loaded = false;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_HIP */
--- a/intern/cycles/device/hip/queue.cpp
+++ b/intern/cycles/device/hip/queue.cpp
@@ -1,209 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_HIP
-
-#  include "device/hip/queue.h"
-
-#  include "device/hip/device_impl.h"
-#  include "device/hip/graphics_interop.h"
-#  include "device/hip/kernel.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* HIPDeviceQueue */
-
-HIPDeviceQueue::HIPDeviceQueue(HIPDevice *device)
-    : DeviceQueue(device), hip_device_(device), hip_stream_(nullptr)
-{
-  const HIPContextScope scope(hip_device_);
-  hip_device_assert(hip_device_, hipStreamCreateWithFlags(&hip_stream_, hipStreamNonBlocking));
-}
-
-HIPDeviceQueue::~HIPDeviceQueue()
-{
-  const HIPContextScope scope(hip_device_);
-  hipStreamDestroy(hip_stream_);
-}
-
-int HIPDeviceQueue::num_concurrent_states(const size_t /*state_size*/) const
-{
-  /* TODO: compute automatically. */
-  /* TODO: must have at least num_threads_per_block. */
-  return 14416128;
-}
-
-int HIPDeviceQueue::num_concurrent_busy_states() const
-{
-  const int max_num_threads = hip_device_->get_num_multiprocessors() *
-                              hip_device_->get_max_num_threads_per_multiprocessor();
-
-  if (max_num_threads == 0) {
-    return 65536;
-  }
-
-  return 4 * max_num_threads;
-}
-
-void HIPDeviceQueue::init_execution()
-{
-  /* Synchronize all textures and memory copies before executing task. */
-  HIPContextScope scope(hip_device_);
-  hip_device_->load_texture_info();
-  hip_device_assert(hip_device_, hipDeviceSynchronize());
-
-  debug_init_execution();
-}
-
-bool HIPDeviceQueue::kernel_available(DeviceKernel kernel) const
-{
-  return hip_device_->kernels.available(kernel);
-}
-
-bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
-{
-  if (hip_device_->have_error()) {
-    return false;
-  }
-
-  debug_enqueue(kernel, work_size);
-
-  const HIPContextScope scope(hip_device_);
-  const HIPDeviceKernel &hip_kernel = hip_device_->kernels.get(kernel);
-
-  /* Compute kernel launch parameters. */
-  const int num_threads_per_block = hip_kernel.num_threads_per_block;
-  const int num_blocks = divide_up(work_size, num_threads_per_block);
-
-  int shared_mem_bytes = 0;
-
-  switch (kernel) {
-    case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
-      /* See parall_active_index.h for why this amount of shared memory is needed. */
-      shared_mem_bytes = (num_threads_per_block + 1) * sizeof(int);
-      break;
-    default:
-      break;
-  }
-
-  /* Launch kernel. */
-  hip_device_assert(hip_device_,
-                    hipModuleLaunchKernel(hip_kernel.function,
-                                          num_blocks,
-                                          1,
-                                          1,
-                                          num_threads_per_block,
-                                          1,
-                                          1,
-                                          shared_mem_bytes,
-                                          hip_stream_,
-                                          args,
-                                          0));
-  return !(hip_device_->have_error());
-}
-
-bool HIPDeviceQueue::synchronize()
-{
-  if (hip_device_->have_error()) {
-    return false;
-  }
-
-  const HIPContextScope scope(hip_device_);
-  hip_device_assert(hip_device_, hipStreamSynchronize(hip_stream_));
-  debug_synchronize();
-
-  return !(hip_device_->have_error());
-}
-
-void HIPDeviceQueue::zero_to_device(device_memory &mem)
-{
-  assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
-
-  if (mem.memory_size() == 0) {
-    return;
-  }
-
-  /* Allocate on demand. */
-  if (mem.device_pointer == 0) {
-    hip_device_->mem_alloc(mem);
-  }
-
-  /* Zero memory on device. */
-  assert(mem.device_pointer != 0);
-
-  const HIPContextScope scope(hip_device_);
-  hip_device_assert(
-      hip_device_,
-      hipMemsetD8Async((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size(), hip_stream_));
-}
-
-void HIPDeviceQueue::copy_to_device(device_memory &mem)
-{
-  assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
-
-  if (mem.memory_size() == 0) {
-    return;
-  }
-
-  /* Allocate on demand. */
-  if (mem.device_pointer == 0) {
-    hip_device_->mem_alloc(mem);
-  }
-
-  assert(mem.device_pointer != 0);
-  assert(mem.host_pointer != nullptr);
-
-  /* Copy memory to device. */
-  const HIPContextScope scope(hip_device_);
-  hip_device_assert(
-      hip_device_,
-      hipMemcpyHtoDAsync(
-          (hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size(), hip_stream_));
-}
-
-void HIPDeviceQueue::copy_from_device(device_memory &mem)
-{
-  assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
-
-  if (mem.memory_size() == 0) {
-    return;
-  }
-
-  assert(mem.device_pointer != 0);
-  assert(mem.host_pointer != nullptr);
-
-  /* Copy memory from device. */
-  const HIPContextScope scope(hip_device_);
-  hip_device_assert(
-      hip_device_,
-      hipMemcpyDtoHAsync(
-          mem.host_pointer, (hipDeviceptr_t)mem.device_pointer, mem.memory_size(), hip_stream_));
-}
-
-// TODO : (Arya) Enable this after stabilizing dev branch
-unique_ptr<DeviceGraphicsInterop> HIPDeviceQueue::graphics_interop_create()
-{
-  return make_unique<HIPDeviceGraphicsInterop>(this);
-}
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_HIP */
--- a/intern/cycles/device/hip/queue.h
+++ b/intern/cycles/device/hip/queue.h
@@ -1,68 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef WITH_HIP
-
-#  include "device/device_kernel.h"
-#  include "device/device_memory.h"
-#  include "device/device_queue.h"
-
-#  include "device/hip/util.h"
-
-CCL_NAMESPACE_BEGIN
-
-class HIPDevice;
-class device_memory;
-
-/* Base class for HIP queues. */
-class HIPDeviceQueue : public DeviceQueue {
- public:
-  HIPDeviceQueue(HIPDevice *device);
-  ~HIPDeviceQueue();
-
-  virtual int num_concurrent_states(const size_t state_size) const override;
-  virtual int num_concurrent_busy_states() const override;
-
-  virtual void init_execution() override;
-
-  virtual bool kernel_available(DeviceKernel kernel) const override;
-
-  virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;
-
-  virtual bool synchronize() override;
-
-  virtual void zero_to_device(device_memory &mem) override;
-  virtual void copy_to_device(device_memory &mem) override;
-  virtual void copy_from_device(device_memory &mem) override;
-
-  virtual hipStream_t stream()
-  {
-    return hip_stream_;
-  }
-
-  // TODO : (Arya) Enable this after stabilizing the dev branch
-  virtual unique_ptr<DeviceGraphicsInterop> graphics_interop_create() override;
-
- protected:
-  HIPDevice *hip_device_;
-  hipStream_t hip_stream_;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_HIP */
--- a/intern/cycles/device/hip/util.cpp
+++ b/intern/cycles/device/hip/util.cpp
@@ -1,61 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_HIP
-
-#  include "device/hip/util.h"
-#  include "device/hip/device_impl.h"
-
-CCL_NAMESPACE_BEGIN
-
-HIPContextScope::HIPContextScope(HIPDevice *device) : device(device)
-{
-  hip_device_assert(device, hipCtxPushCurrent(device->hipContext));
-}
-
-HIPContextScope::~HIPContextScope()
-{
-  hip_device_assert(device, hipCtxPopCurrent(NULL));
-}
-
-#  ifndef WITH_HIP_DYNLOAD
-const char *hipewErrorString(hipError_t result)
-{
-  /* We can only give error code here without major code duplication, that
-   * should be enough since dynamic loading is only being disabled by folks
-   * who knows what they're doing anyway.
-   *
-   * NOTE: Avoid call from several threads.
-   */
-  static string error;
-  error = string_printf("%d", result);
-  return error.c_str();
-}
-
-const char *hipewCompilerPath()
-{
-  return CYCLES_HIP_HIPCC_EXECUTABLE;
-}
-
-int hipewCompilerVersion()
-{
-  return (HIP_VERSION / 100) + (HIP_VERSION % 100 / 10);
-}
-#  endif
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_HIP */
--- a/intern/cycles/device/hip/util.h
+++ b/intern/cycles/device/hip/util.h
@@ -1,63 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef WITH_HIP
-
-#  ifdef WITH_HIP_DYNLOAD
-#    include "hipew.h"
-#  endif
-
-CCL_NAMESPACE_BEGIN
-
-class HIPDevice;
-
-/* Utility to push/pop HIP context. */
-class HIPContextScope {
- public:
-  HIPContextScope(HIPDevice *device);
-  ~HIPContextScope();
-
- private:
-  HIPDevice *device;
-};
-
-/* Utility for checking return values of HIP function calls. */
-#  define hip_device_assert(hip_device, stmt) \
-    { \
-      hipError_t result = stmt; \
-      if (result != hipSuccess) { \
-        const char *name = hipewErrorString(result); \
-        hip_device->set_error( \
-            string_printf("%s in %s (%s:%d)", name, #stmt, __FILE__, __LINE__)); \
-      } \
-    } \
-    (void)0
-
-#  define hip_assert(stmt) hip_device_assert(this, stmt)
-
-#  ifndef WITH_HIP_DYNLOAD
-/* Transparently implement some functions, so majority of the file does not need
- * to worry about difference between dynamically loaded and linked HIP at all. */
-const char *hipewErrorString(hipError_t result);
-const char *hipewCompilerPath();
-int hipewCompilerVersion();
-#  endif /* WITH_HIP_DYNLOAD */
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_HIP */
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -315,11 +315,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
  group_descs[PG_HITS].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
  group_descs[PG_HITS].hitgroup.moduleAH = optix_module;
  group_descs[PG_HITS].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_shadow_all_hit";
-  group_descs[PG_HITV].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
-  group_descs[PG_HITV].hitgroup.moduleCH = optix_module;
-  group_descs[PG_HITV].hitgroup.entryFunctionNameCH = "__closesthit__kernel_optix_hit";
-  group_descs[PG_HITV].hitgroup.moduleAH = optix_module;
-  group_descs[PG_HITV].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_volume_test";

  if (kernel_features & KERNEL_FEATURE_HAIR) {
    if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
@@ -402,7 +397,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
  trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH);
  trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH);
  trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
-  trace_css = std::max(trace_css, stack_size[PG_HITV].cssIS + stack_size[PG_HITV].cssAH);
  trace_css = std::max(trace_css,
                       stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
  trace_css = std::max(trace_css,
@@ -427,7 +421,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
    pipeline_groups.push_back(groups[PG_HITD]);
    pipeline_groups.push_back(groups[PG_HITS]);
    pipeline_groups.push_back(groups[PG_HITL]);
-    pipeline_groups.push_back(groups[PG_HITV]);
    if (motion_blur) {
      pipeline_groups.push_back(groups[PG_HITD_MOTION]);
      pipeline_groups.push_back(groups[PG_HITS_MOTION]);
@@ -466,7 +459,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
    pipeline_groups.push_back(groups[PG_HITD]);
    pipeline_groups.push_back(groups[PG_HITS]);
    pipeline_groups.push_back(groups[PG_HITL]);
-    pipeline_groups.push_back(groups[PG_HITV]);
    if (motion_blur) {
      pipeline_groups.push_back(groups[PG_HITD_MOTION]);
      pipeline_groups.push_back(groups[PG_HITS_MOTION]);
@@ -1398,33 +1390,25 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
      /* Set user instance ID to object index (but leave low bit blank). */
      instance.instanceId = ob->get_device_index() << 1;

-      /* Add some of the object visibility bits to the mask.
-       * __prim_visibility contains the combined visibility bits of all instances, so is not
-       * reliable if they differ between instances. But the OptiX visibility mask can only contain
-       * 8 bits, so have to trade-off here and select just a few important ones.
-       */
-      instance.visibilityMask = ob->visibility_for_tracing() & 0xFF;
-
      /* Have to have at least one bit in the mask, or else instance would always be culled. */
-      if (0 == instance.visibilityMask) {
-        instance.visibilityMask = 0xFF;
+      instance.visibilityMask = 1;
+
+      if (ob->get_geometry()->has_volume) {
+        /* Volumes have a special bit set in the visibility mask so a trace can mask only volumes.
+         */
+        instance.visibilityMask |= 2;
      }

-      if (ob->get_geometry()->geometry_type == Geometry::HAIR &&
-          static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) {
-        if (motion_blur && ob->get_geometry()->has_motion_blur()) {
+      if (ob->get_geometry()->geometry_type == Geometry::HAIR) {
+        /* Same applies to curves (so they can be skipped in local trace calls). */
+        instance.visibilityMask |= 4;
+
+        if (motion_blur && ob->get_geometry()->has_motion_blur() &&
+            static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) {
          /* Select between motion blur and non-motion blur built-in intersection module. */
          instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
        }
      }
-      else {
-        /* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves,
-         * since it needs to filter out end-caps there).
-         * It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit
-         * programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT.
-         */
-        instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT;
-      }

      /* Insert motion traversable if object has motion. */
      if (motion_blur && ob->use_motion()) {
@@ -1490,7 +1474,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
        delete[] reinterpret_cast<uint8_t *>(&motion_transform);

        /* Disable instance transform if object uses motion transform already. */
-        instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
+        instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;

        /* Get traversable handle to motion transform. */
        optixConvertPointerToTraversableHandle(context,
@@ -1507,7 +1491,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
        }
        else {
          /* Disable instance transform if geometry already has it applied to vertex data. */
-          instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
+          instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
          /* Non-instanced objects read ID from 'prim_object', so distinguish
           * them from instanced objects with the low bit set. */
          instance.instanceId |= 1;
--- a/intern/cycles/device/optix/device_impl.h
+++ b/intern/cycles/device/optix/device_impl.h
@@ -40,7 +40,6 @@ enum {
  PG_HITD, /* Default hit group. */
  PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */
  PG_HITL, /* __BVH_LOCAL__ hit group (only used for triangles). */
-  PG_HITV, /* __VOLUME__ hit group. */
  PG_HITD_MOTION,
  PG_HITS_MOTION,
  PG_CALL_SVM_AO,
@@ -52,7 +51,7 @@ enum {
 static const int MISS_PROGRAM_GROUP_OFFSET = PG_MISS;
 static const int NUM_MIS_PROGRAM_GROUPS = 1;
 static const int HIT_PROGAM_GROUP_OFFSET = PG_HITD;
-static const int NUM_HIT_PROGRAM_GROUPS = 6;
+static const int NUM_HIT_PROGRAM_GROUPS = 5;
 static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
 static const int NUM_CALLABLE_PROGRAM_GROUPS = 3;

--- a/intern/cycles/integrator/CMakeLists.txt
+++ b/intern/cycles/integrator/CMakeLists.txt
@@ -27,8 +27,6 @@ set(SRC
  pass_accessor.cpp
  pass_accessor_cpu.cpp
  pass_accessor_gpu.cpp
-  path_trace_display.cpp
-  path_trace_tile.cpp
  path_trace_work.cpp
  path_trace_work_cpu.cpp
  path_trace_work_gpu.cpp
@@ -49,8 +47,6 @@ set(SRC_HEADERS
  pass_accessor.h
  pass_accessor_cpu.h
  pass_accessor_gpu.h
-  path_trace_display.h
-  path_trace_tile.h
  path_trace_work.h
  path_trace_work_cpu.h
  path_trace_work_gpu.h
--- a/intern/cycles/integrator/pass_accessor.cpp
+++ b/intern/cycles/integrator/pass_accessor.cpp
@@ -149,6 +149,9 @@ bool PassAccessor::get_render_tile_pixels(const RenderBuffers *render_buffers,
      /* Denoised passes store their final pixels, no need in special calculation. */
      get_pass_float(render_buffers, buffer_params, destination);
    }
+    else if (type == PASS_RENDER_TIME) {
+      /* TODO(sergey): Needs implementation. */
+    }
    else if (type == PASS_DEPTH) {
      get_pass_depth(render_buffers, buffer_params, destination);
    }
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -19,9 +19,8 @@
 #include "device/cpu/device.h"
 #include "device/device.h"
 #include "integrator/pass_accessor.h"
-#include "integrator/path_trace_display.h"
-#include "integrator/path_trace_tile.h"
 #include "integrator/render_scheduler.h"
+#include "render/gpu_display.h"
 #include "render/pass.h"
 #include "render/scene.h"
 #include "render/tile.h"
@@ -68,11 +67,11 @@ PathTrace::PathTrace(Device *device,
 PathTrace::~PathTrace()
 {
  /* Destroy any GPU resource which was used for graphics interop.
-   * Need to have access to the PathTraceDisplay as it is the only source of drawing context which
-   * is used for interop. */
-  if (display_) {
+   * Need to have access to the GPUDisplay as it is the only source of drawing context which is
+   * used for interop. */
+  if (gpu_display_) {
    for (auto &&path_trace_work : path_trace_works_) {
-      path_trace_work->destroy_gpu_resources(display_.get());
+      path_trace_work->destroy_gpu_resources(gpu_display_.get());
    }
  }
 }
@@ -95,7 +94,7 @@ bool PathTrace::ready_to_reset()
 {
  /* The logic here is optimized for the best feedback in the viewport, which implies having a GPU
   * display. Of there is no such display, the logic here will break. */
-  DCHECK(display_);
+  DCHECK(gpu_display_);

  /* The logic here tries to provide behavior which feels the most interactive feel to artists.
   * General idea is to be able to reset as quickly as possible, while still providing interactive
@@ -127,8 +126,8 @@ void PathTrace::reset(const BufferParams &full_params, const BufferParams &big_t
  /* NOTE: GPU display checks for buffer modification and avoids unnecessary re-allocation.
   * It is requires to inform about reset whenever it happens, so that the redraw state tracking is
   * properly updated. */
-  if (display_) {
-    display_->reset(full_params);
+  if (gpu_display_) {
+    gpu_display_->reset(full_params);
  }

  render_state_.has_denoised_result = false;
@@ -536,35 +535,25 @@ void PathTrace::denoise(const RenderWork &render_work)
  render_scheduler_.report_denoise_time(render_work, time_dt() - start_time);
 }

-void PathTrace::set_output_driver(unique_ptr<OutputDriver> driver)
+void PathTrace::set_gpu_display(unique_ptr<GPUDisplay> gpu_display)
 {
-  output_driver_ = move(driver);
+  gpu_display_ = move(gpu_display);
 }

-void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
+void PathTrace::clear_gpu_display()
 {
-  if (driver) {
-    display_ = make_unique<PathTraceDisplay>(move(driver));
-  }
-  else {
-    display_ = nullptr;
-  }
-}
-
-void PathTrace::clear_display()
-{
-  if (display_) {
-    display_->clear();
+  if (gpu_display_) {
+    gpu_display_->clear();
  }
 }

 void PathTrace::draw()
 {
-  if (!display_) {
+  if (!gpu_display_) {
    return;
  }

-  did_draw_after_reset_ |= display_->draw();
+  did_draw_after_reset_ |= gpu_display_->draw();
 }

 void PathTrace::update_display(const RenderWork &render_work)
@@ -573,32 +562,31 @@ void PathTrace::update_display(const RenderWork &render_work)
    return;
  }

-  if (!display_ && !output_driver_) {
+  if (!gpu_display_ && !tile_buffer_update_cb) {
    VLOG(3) << "Ignore display update.";
    return;
  }

  if (full_params_.width == 0 || full_params_.height == 0) {
-    VLOG(3) << "Skipping PathTraceDisplay update due to 0 size of the render buffer.";
+    VLOG(3) << "Skipping GPUDisplay update due to 0 size of the render buffer.";
    return;
  }

  const double start_time = time_dt();

-  if (output_driver_) {
+  if (tile_buffer_update_cb) {
    VLOG(3) << "Invoke buffer update callback.";

-    PathTraceTile tile(*this);
-    output_driver_->update_render_tile(tile);
+    tile_buffer_update_cb();
  }

-  if (display_) {
+  if (gpu_display_) {
    VLOG(3) << "Perform copy to GPUDisplay work.";

    const int resolution_divider = render_work.resolution_divider;
    const int texture_width = max(1, full_params_.width / resolution_divider);
    const int texture_height = max(1, full_params_.height / resolution_divider);
-    if (!display_->update_begin(texture_width, texture_height)) {
+    if (!gpu_display_->update_begin(texture_width, texture_height)) {
      LOG(ERROR) << "Error beginning GPUDisplay update.";
      return;
    }
@@ -612,10 +600,10 @@ void PathTrace::update_display(const RenderWork &render_work)
     * all works in parallel. */
    const int num_samples = get_num_samples_in_buffer();
    for (auto &&path_trace_work : path_trace_works_) {
-      path_trace_work->copy_to_display(display_.get(), pass_mode, num_samples);
+      path_trace_work->copy_to_gpu_display(gpu_display_.get(), pass_mode, num_samples);
    }

-    display_->update_end();
+    gpu_display_->update_end();
  }

  render_scheduler_.report_display_update_time(render_work, time_dt() - start_time);
@@ -765,26 +753,20 @@ bool PathTrace::is_cancel_requested()

 void PathTrace::tile_buffer_write()
 {
-  if (!output_driver_) {
+  if (!tile_buffer_write_cb) {
    return;
  }

-  PathTraceTile tile(*this);
-  output_driver_->write_render_tile(tile);
+  tile_buffer_write_cb();
 }

 void PathTrace::tile_buffer_read()
 {
-  if (!device_scene_->data.bake.use) {
+  if (!tile_buffer_read_cb) {
    return;
  }

-  if (!output_driver_) {
-    return;
-  }
-
-  PathTraceTile tile(*this);
-  if (output_driver_->read_render_tile(tile)) {
+  if (tile_buffer_read_cb()) {
    tbb::parallel_for_each(path_trace_works_, [](unique_ptr<PathTraceWork> &path_trace_work) {
      path_trace_work->copy_render_buffers_to_device();
    });
@@ -819,7 +801,7 @@ void PathTrace::tile_buffer_write_to_disk()
  }

  if (!tile_manager_.write_tile(*buffers)) {
-    device_->set_error("Error writing tile to file");
+    LOG(ERROR) << "Error writing tile to file.";
  }
 }

@@ -912,14 +894,7 @@ void PathTrace::process_full_buffer_from_disk(string_view filename)

  DenoiseParams denoise_params;
  if (!tile_manager_.read_full_buffer_from_disk(filename, &full_frame_buffers, &denoise_params)) {
-    const string error_message = "Error reading tiles from file";
-    if (progress_) {
-      progress_->set_error(error_message);
-      progress_->set_cancel(error_message);
-    }
-    else {
-      LOG(ERROR) << error_message;
-    }
+    LOG(ERROR) << "Error reading tiles from file.";
    return;
  }

@@ -1023,11 +998,6 @@ int2 PathTrace::get_render_tile_offset() const
  return make_int2(tile.x, tile.y);
 }

-int2 PathTrace::get_render_size() const
-{
-  return tile_manager_.get_size();
-}
-
 const BufferParams &PathTrace::get_render_tile_params() const
 {
  if (full_frame_state_.render_buffers) {
@@ -1058,8 +1028,6 @@ static const char *device_type_for_description(const DeviceType type)
      return "CUDA";
    case DEVICE_OPTIX:
      return "OptiX";
-    case DEVICE_HIP:
-      return "HIP";
    case DEVICE_DUMMY:
      return "Dummy";
    case DEVICE_MULTI:
--- a/intern/cycles/integrator/path_trace.h
+++ b/intern/cycles/integrator/path_trace.h
@@ -31,14 +31,12 @@ CCL_NAMESPACE_BEGIN
 class AdaptiveSampling;
 class Device;
 class DeviceScene;
-class DisplayDriver;
 class Film;
 class RenderBuffers;
 class RenderScheduler;
 class RenderWork;
-class PathTraceDisplay;
-class OutputDriver;
 class Progress;
+class GPUDisplay;
 class TileManager;

 /* PathTrace class takes care of kernel graph and scheduling on a (multi)device. It takes care of
@@ -100,16 +98,13 @@ class PathTrace {
   * Use this to configure the adaptive sampler before rendering any samples. */
  void set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling);

-  /* Sets output driver for render buffer output. */
-  void set_output_driver(unique_ptr<OutputDriver> driver);
+  /* Set GPU display which takes care of drawing the render result. */
+  void set_gpu_display(unique_ptr<GPUDisplay> gpu_display);

-  /* Set display driver for interactive render buffer display. */
-  void set_display_driver(unique_ptr<DisplayDriver> driver);
+  /* Clear the GPU display by filling it in with all zeroes. */
+  void clear_gpu_display();

-  /* Clear the display buffer by filling it in with all zeroes. */
-  void clear_display();
-
-  /* Perform drawing of the current state of the DisplayDriver. */
+  /* Perform drawing of the current state of the GPUDisplay. */
  void draw();

  /* Cancel rendering process as soon as possible, without waiting for full tile to be sampled.
@@ -162,7 +157,6 @@ class PathTrace {
   * instead. */
  int2 get_render_tile_size() const;
  int2 get_render_tile_offset() const;
-  int2 get_render_size() const;

  /* Get buffer parameters of the current tile.
   *
@@ -174,6 +168,18 @@ class PathTrace {
   * times, and so on. */
  string full_report() const;

+  /* Callback which communicates an updates state of the render buffer of the current big tile.
+   * Is called during path tracing to communicate work-in-progress state of the final buffer. */
+  function<void(void)> tile_buffer_update_cb;
+
+  /* Callback which communicates final rendered buffer. Is called after path-tracing is done. */
+  function<void(void)> tile_buffer_write_cb;
+
+  /* Callback which initializes rendered buffer. Is called before path-tracing starts.
+   *
+   * This is used for baking. */
+  function<bool(void)> tile_buffer_read_cb;
+
  /* Callback which is called to report current rendering progress.
   *
   * It is supposed to be cheaper than buffer update/write, hence can be called more often.
@@ -246,11 +252,7 @@ class PathTrace {
  RenderScheduler &render_scheduler_;
  TileManager &tile_manager_;

-  /* Display driver for interactive render buffer display. */
-  unique_ptr<PathTraceDisplay> display_;
-
-  /* Output driver to write render buffer to. */
-  unique_ptr<OutputDriver> output_driver_;
+  unique_ptr<GPUDisplay> gpu_display_;

  /* Per-compute device descriptors of work which is responsible for path tracing on its configured
   * device. */
--- a/intern/cycles/integrator/path_trace_tile.cpp
+++ b/intern/cycles/integrator/path_trace_tile.cpp
@@ -1,107 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "integrator/path_trace_tile.h"
-#include "integrator/pass_accessor_cpu.h"
-#include "integrator/path_trace.h"
-
-#include "render/buffers.h"
-#include "render/film.h"
-#include "render/pass.h"
-#include "render/scene.h"
-
-CCL_NAMESPACE_BEGIN
-
-PathTraceTile::PathTraceTile(PathTrace &path_trace)
-    : OutputDriver::Tile(path_trace.get_render_tile_offset(),
-                         path_trace.get_render_tile_size(),
-                         path_trace.get_render_size(),
-                         path_trace.get_render_tile_params().layer,
-                         path_trace.get_render_tile_params().view),
-      path_trace_(path_trace),
-      copied_from_device_(false)
-{
-}
-
-bool PathTraceTile::get_pass_pixels(const string_view pass_name,
-                                    const int num_channels,
-                                    float *pixels) const
-{
-  /* NOTE: The code relies on a fact that session is fully update and no scene/buffer modification
-   * is happening while this function runs. */
-
-  if (!copied_from_device_) {
-    /* Copy from device on demand. */
-    path_trace_.copy_render_tile_from_device();
-    const_cast<PathTraceTile *>(this)->copied_from_device_ = true;
-  }
-
-  const BufferParams &buffer_params = path_trace_.get_render_tile_params();
-
-  const BufferPass *pass = buffer_params.find_pass(pass_name);
-  if (pass == nullptr) {
-    return false;
-  }
-
-  const bool has_denoised_result = path_trace_.has_denoised_result();
-  if (pass->mode == PassMode::DENOISED && !has_denoised_result) {
-    pass = buffer_params.find_pass(pass->type);
-    if (pass == nullptr) {
-      /* Happens when denoised result pass is requested but is never written by the kernel. */
-      return false;
-    }
-  }
-
-  pass = buffer_params.get_actual_display_pass(pass);
-
-  const float exposure = buffer_params.exposure;
-  const int num_samples = path_trace_.get_num_render_tile_samples();
-
-  PassAccessor::PassAccessInfo pass_access_info(*pass);
-  pass_access_info.use_approximate_shadow_catcher = buffer_params.use_approximate_shadow_catcher;
-  pass_access_info.use_approximate_shadow_catcher_background =
-      pass_access_info.use_approximate_shadow_catcher && !buffer_params.use_transparent_background;
-
-  const PassAccessorCPU pass_accessor(pass_access_info, exposure, num_samples);
-  const PassAccessor::Destination destination(pixels, num_channels);
-
-  return path_trace_.get_render_tile_pixels(pass_accessor, destination);
-}
-
-bool PathTraceTile::set_pass_pixels(const string_view pass_name,
-                                    const int num_channels,
-                                    const float *pixels) const
-{
-  /* NOTE: The code relies on a fact that session is fully update and no scene/buffer modification
-   * is happening while this function runs. */
-
-  const BufferParams &buffer_params = path_trace_.get_render_tile_params();
-  const BufferPass *pass = buffer_params.find_pass(pass_name);
-  if (!pass) {
-    return false;
-  }
-
-  const float exposure = buffer_params.exposure;
-  const int num_samples = 1;
-
-  const PassAccessor::PassAccessInfo pass_access_info(*pass);
-  PassAccessorCPU pass_accessor(pass_access_info, exposure, num_samples);
-  PassAccessor::Source source(pixels, num_channels);
-
-  return path_trace_.set_render_tile_pixels(pass_accessor, source);
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/integrator/path_trace_tile.h
+++ b/intern/cycles/integrator/path_trace_tile.h
@@ -1,43 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "render/output_driver.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* PathTraceTile
- *
- * Implementation of OutputDriver::Tile interface for path tracer. */
-
-class PathTrace;
-
-class PathTraceTile : public OutputDriver::Tile {
- public:
-  PathTraceTile(PathTrace &path_trace);
-
-  bool get_pass_pixels(const string_view pass_name, const int num_channels, float *pixels) const;
-  bool set_pass_pixels(const string_view pass_name,
-                       const int num_channels,
-                       const float *pixels) const;
-
- private:
-  PathTrace &path_trace_;
-  bool copied_from_device_;
-};
-
-CCL_NAMESPACE_END
--- a/intern/cycles/integrator/path_trace_work.cpp
+++ b/intern/cycles/integrator/path_trace_work.cpp
@@ -16,12 +16,12 @@

 #include "device/device.h"

-#include "integrator/path_trace_display.h"
 #include "integrator/path_trace_work.h"
 #include "integrator/path_trace_work_cpu.h"
 #include "integrator/path_trace_work_gpu.h"
 #include "render/buffers.h"
 #include "render/film.h"
+#include "render/gpu_display.h"
 #include "render/scene.h"

 #include "kernel/kernel_types.h"
@@ -185,12 +185,12 @@ PassAccessor::PassAccessInfo PathTraceWork::get_display_pass_access_info(PassMod
  return pass_access_info;
 }

-PassAccessor::Destination PathTraceWork::get_display_destination_template(
-    const PathTraceDisplay *display) const
+PassAccessor::Destination PathTraceWork::get_gpu_display_destination_template(
+    const GPUDisplay *gpu_display) const
 {
  PassAccessor::Destination destination(film_->get_display_pass());

-  const int2 display_texture_size = display->get_texture_size();
+  const int2 display_texture_size = gpu_display->get_texture_size();
  const int texture_x = effective_buffer_params_.full_x - effective_full_params_.full_x;
  const int texture_y = effective_buffer_params_.full_y - effective_full_params_.full_y;

--- a/intern/cycles/integrator/path_trace_work.h
+++ b/intern/cycles/integrator/path_trace_work.h
@@ -28,7 +28,7 @@ class BufferParams;
 class Device;
 class DeviceScene;
 class Film;
-class PathTraceDisplay;
+class GPUDisplay;
 class RenderBuffers;

 class PathTraceWork {
@@ -83,9 +83,11 @@ class PathTraceWork {
   * noisy pass mode will be passed here when it is known that the buffer does not have denoised
   * passes yet (because denoiser did not run). If the denoised pass is requested and denoiser is
   * not used then this function will fall-back to the noisy pass instead. */
-  virtual void copy_to_display(PathTraceDisplay *display, PassMode pass_mode, int num_samples) = 0;
+  virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
+                                   PassMode pass_mode,
+                                   int num_samples) = 0;

-  virtual void destroy_gpu_resources(PathTraceDisplay *display) = 0;
+  virtual void destroy_gpu_resources(GPUDisplay *gpu_display) = 0;

  /* Copy data from/to given render buffers.
   * Will copy pixels from a corresponding place (from multi-device point of view) of the render
@@ -160,8 +162,8 @@ class PathTraceWork {

  /* Get destination which offset and stride are configured so that writing to it will write to a
   * proper location of GPU display texture, taking current tile and device slice into account. */
-  PassAccessor::Destination get_display_destination_template(
-      const PathTraceDisplay *display) const;
+  PassAccessor::Destination get_gpu_display_destination_template(
+      const GPUDisplay *gpu_display) const;

  /* Device which will be used for path tracing.
   * Note that it is an actual render device (and never is a multi-device). */
--- a/intern/cycles/integrator/path_trace_work_cpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_cpu.cpp
@@ -19,12 +19,10 @@
 #include "device/cpu/kernel.h"
 #include "device/device.h"

-#include "kernel/kernel_path_state.h"
-
 #include "integrator/pass_accessor_cpu.h"
-#include "integrator/path_trace_display.h"

 #include "render/buffers.h"
+#include "render/gpu_display.h"
 #include "render/scene.h"

 #include "util/util_atomic.h"
@@ -118,17 +116,13 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global
                                                    const KernelWorkTile &work_tile,
                                                    const int samples_num)
 {
+  const bool has_shadow_catcher = device_scene_->data.integrator.has_shadow_catcher;
  const bool has_bake = device_scene_->data.bake.use;

-  IntegratorStateCPU integrator_states[2];
+  IntegratorStateCPU integrator_states[2] = {};

  IntegratorStateCPU *state = &integrator_states[0];
-  IntegratorStateCPU *shadow_catcher_state = nullptr;
-
-  if (device_scene_->data.integrator.has_shadow_catcher) {
-    shadow_catcher_state = &integrator_states[1];
-    path_state_init_queues(kernel_globals, shadow_catcher_state);
-  }
+  IntegratorStateCPU *shadow_catcher_state = &integrator_states[1];

  KernelWorkTile sample_work_tile = work_tile;
  float *render_buffer = buffers_->buffer.data();
@@ -153,7 +147,7 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global

    kernels_.integrator_megakernel(kernel_globals, state, render_buffer);

-    if (shadow_catcher_state) {
+    if (has_shadow_catcher) {
      kernels_.integrator_megakernel(kernel_globals, shadow_catcher_state, render_buffer);
    }

@@ -161,14 +155,14 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global
  }
 }

-void PathTraceWorkCPU::copy_to_display(PathTraceDisplay *display,
-                                       PassMode pass_mode,
-                                       int num_samples)
+void PathTraceWorkCPU::copy_to_gpu_display(GPUDisplay *gpu_display,
+                                           PassMode pass_mode,
+                                           int num_samples)
 {
-  half4 *rgba_half = display->map_texture_buffer();
+  half4 *rgba_half = gpu_display->map_texture_buffer();
  if (!rgba_half) {
-    /* TODO(sergey): Look into using copy_to_display() if mapping failed. Might be needed for
-     * some implementations of PathTraceDisplay which can not map memory? */
+    /* TODO(sergey): Look into using copy_to_gpu_display() if mapping failed. Might be needed for
+     * some implementations of GPUDisplay which can not map memory? */
    return;
  }

@@ -178,7 +172,7 @@ void PathTraceWorkCPU::copy_to_display(PathTraceDisplay *display,

  const PassAccessorCPU pass_accessor(pass_access_info, kfilm.exposure, num_samples);

-  PassAccessor::Destination destination = get_display_destination_template(display);
+  PassAccessor::Destination destination = get_gpu_display_destination_template(gpu_display);
  destination.pixels_half_rgba = rgba_half;

  tbb::task_arena local_arena = local_tbb_arena_create(device_);
@@ -186,10 +180,10 @@ void PathTraceWorkCPU::copy_to_display(PathTraceDisplay *display,
    pass_accessor.get_render_tile_pixels(buffers_.get(), effective_buffer_params_, destination);
  });

-  display->unmap_texture_buffer();
+  gpu_display->unmap_texture_buffer();
 }

-void PathTraceWorkCPU::destroy_gpu_resources(PathTraceDisplay * /*display*/)
+void PathTraceWorkCPU::destroy_gpu_resources(GPUDisplay * /*gpu_display*/)
 {
 }

--- a/intern/cycles/integrator/path_trace_work_cpu.h
+++ b/intern/cycles/integrator/path_trace_work_cpu.h
@@ -50,10 +50,10 @@ class PathTraceWorkCPU : public PathTraceWork {
                              int start_sample,
                              int samples_num) override;

-  virtual void copy_to_display(PathTraceDisplay *display,
-                               PassMode pass_mode,
-                               int num_samples) override;
-  virtual void destroy_gpu_resources(PathTraceDisplay *display) override;
+  virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
+                                   PassMode pass_mode,
+                                   int num_samples) override;
+  virtual void destroy_gpu_resources(GPUDisplay *gpu_display) override;

  virtual bool copy_render_buffers_from_device() override;
  virtual bool copy_render_buffers_to_device() override;
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -15,12 +15,12 @@
 */

 #include "integrator/path_trace_work_gpu.h"
-#include "integrator/path_trace_display.h"

 #include "device/device.h"

 #include "integrator/pass_accessor_gpu.h"
 #include "render/buffers.h"
+#include "render/gpu_display.h"
 #include "render/scene.h"
 #include "util/util_logging.h"
 #include "util/util_tbb.h"
@@ -46,7 +46,7 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
      queued_paths_(device, "queued_paths", MEM_READ_WRITE),
      num_queued_paths_(device, "num_queued_paths", MEM_READ_WRITE),
      work_tiles_(device, "work_tiles", MEM_READ_WRITE),
-      display_rgba_half_(device, "display buffer half", MEM_READ_WRITE),
+      gpu_display_rgba_half_(device, "display buffer half", MEM_READ_WRITE),
      max_num_paths_(queue_->num_concurrent_states(sizeof(IntegratorStateCPU))),
      min_num_active_paths_(queue_->num_concurrent_busy_states()),
      max_active_path_index_(0)
@@ -95,8 +95,8 @@ void PathTraceWorkGPU::alloc_integrator_soa()
 #define KERNEL_STRUCT_END(name) \
  break; \
  }
-#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
-  if (array_index == gpu_array_size - 1) { \
+#define KERNEL_STRUCT_END_ARRAY(name, array_size) \
+  if (array_index == array_size - 1) { \
    break; \
  } \
  }
@@ -652,7 +652,7 @@ int PathTraceWorkGPU::get_num_active_paths()
 bool PathTraceWorkGPU::should_use_graphics_interop()
 {
  /* There are few aspects with the graphics interop when using multiple devices caused by the fact
-   * that the PathTraceDisplay has a single texture:
+   * that the GPUDisplay has a single texture:
   *
   *   CUDA will return `CUDA_ERROR_NOT_SUPPORTED` from `cuGraphicsGLRegisterBuffer()` when
   *   attempting to register OpenGL PBO which has been mapped. Which makes sense, because
@@ -678,9 +678,9 @@ bool PathTraceWorkGPU::should_use_graphics_interop()
  return interop_use_;
 }

-void PathTraceWorkGPU::copy_to_display(PathTraceDisplay *display,
-                                       PassMode pass_mode,
-                                       int num_samples)
+void PathTraceWorkGPU::copy_to_gpu_display(GPUDisplay *gpu_display,
+                                           PassMode pass_mode,
+                                           int num_samples)
 {
  if (device_->have_error()) {
    /* Don't attempt to update GPU display if the device has errors: the error state will make
@@ -694,7 +694,7 @@ void PathTraceWorkGPU::copy_to_display(PathTraceDisplay *display,
  }

  if (should_use_graphics_interop()) {
-    if (copy_to_display_interop(display, pass_mode, num_samples)) {
+    if (copy_to_gpu_display_interop(gpu_display, pass_mode, num_samples)) {
      return;
    }

@@ -703,12 +703,12 @@ void PathTraceWorkGPU::copy_to_display(PathTraceDisplay *display,
    interop_use_ = false;
  }

-  copy_to_display_naive(display, pass_mode, num_samples);
+  copy_to_gpu_display_naive(gpu_display, pass_mode, num_samples);
 }

-void PathTraceWorkGPU::copy_to_display_naive(PathTraceDisplay *display,
-                                             PassMode pass_mode,
-                                             int num_samples)
+void PathTraceWorkGPU::copy_to_gpu_display_naive(GPUDisplay *gpu_display,
+                                                 PassMode pass_mode,
+                                                 int num_samples)
 {
  const int full_x = effective_buffer_params_.full_x;
  const int full_y = effective_buffer_params_.full_y;
@@ -725,42 +725,43 @@ void PathTraceWorkGPU::copy_to_display_naive(PathTraceDisplay *display,
   * NOTE: allocation happens to the final resolution so that no re-allocation happens on every
   * change of the resolution divider. However, if the display becomes smaller, shrink the
   * allocated memory as well. */
-  if (display_rgba_half_.data_width != final_width ||
-      display_rgba_half_.data_height != final_height) {
-    display_rgba_half_.alloc(final_width, final_height);
+  if (gpu_display_rgba_half_.data_width != final_width ||
+      gpu_display_rgba_half_.data_height != final_height) {
+    gpu_display_rgba_half_.alloc(final_width, final_height);
    /* TODO(sergey): There should be a way to make sure device-side memory is allocated without
     * transferring zeroes to the device. */
-    queue_->zero_to_device(display_rgba_half_);
+    queue_->zero_to_device(gpu_display_rgba_half_);
  }

  PassAccessor::Destination destination(film_->get_display_pass());
-  destination.d_pixels_half_rgba = display_rgba_half_.device_pointer;
+  destination.d_pixels_half_rgba = gpu_display_rgba_half_.device_pointer;

  get_render_tile_film_pixels(destination, pass_mode, num_samples);

-  queue_->copy_from_device(display_rgba_half_);
-  queue_->synchronize();
+  gpu_display_rgba_half_.copy_from_device();

-  display->copy_pixels_to_texture(display_rgba_half_.data(), texture_x, texture_y, width, height);
+  gpu_display->copy_pixels_to_texture(
+      gpu_display_rgba_half_.data(), texture_x, texture_y, width, height);
 }

-bool PathTraceWorkGPU::copy_to_display_interop(PathTraceDisplay *display,
-                                               PassMode pass_mode,
-                                               int num_samples)
+bool PathTraceWorkGPU::copy_to_gpu_display_interop(GPUDisplay *gpu_display,
+                                                   PassMode pass_mode,
+                                                   int num_samples)
 {
  if (!device_graphics_interop_) {
    device_graphics_interop_ = queue_->graphics_interop_create();
  }

-  const DisplayDriver::GraphicsInterop graphics_interop_dst = display->graphics_interop_get();
-  device_graphics_interop_->set_display_interop(graphics_interop_dst);
+  const DeviceGraphicsInteropDestination graphics_interop_dst =
+      gpu_display->graphics_interop_get();
+  device_graphics_interop_->set_destination(graphics_interop_dst);

  const device_ptr d_rgba_half = device_graphics_interop_->map();
  if (!d_rgba_half) {
    return false;
  }

-  PassAccessor::Destination destination = get_display_destination_template(display);
+  PassAccessor::Destination destination = get_gpu_display_destination_template(gpu_display);
  destination.d_pixels_half_rgba = d_rgba_half;

  get_render_tile_film_pixels(destination, pass_mode, num_samples);
@@ -770,14 +771,14 @@ bool PathTraceWorkGPU::copy_to_display_interop(PathTraceDisplay *display,
  return true;
 }

-void PathTraceWorkGPU::destroy_gpu_resources(PathTraceDisplay *display)
+void PathTraceWorkGPU::destroy_gpu_resources(GPUDisplay *gpu_display)
 {
  if (!device_graphics_interop_) {
    return;
  }
-  display->graphics_interop_activate();
+  gpu_display->graphics_interop_activate();
  device_graphics_interop_ = nullptr;
-  display->graphics_interop_deactivate();
+  gpu_display->graphics_interop_deactivate();
 }

 void PathTraceWorkGPU::get_render_tile_film_pixels(const PassAccessor::Destination &destination,
--- a/intern/cycles/integrator/path_trace_work_gpu.h
+++ b/intern/cycles/integrator/path_trace_work_gpu.h
@@ -48,10 +48,10 @@ class PathTraceWorkGPU : public PathTraceWork {
                              int start_sample,
                              int samples_num) override;

-  virtual void copy_to_display(PathTraceDisplay *display,
-                               PassMode pass_mode,
-                               int num_samples) override;
-  virtual void destroy_gpu_resources(PathTraceDisplay *display) override;
+  virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
+                                   PassMode pass_mode,
+                                   int num_samples) override;
+  virtual void destroy_gpu_resources(GPUDisplay *gpu_display) override;

  virtual bool copy_render_buffers_from_device() override;
  virtual bool copy_render_buffers_to_device() override;
@@ -88,16 +88,16 @@ class PathTraceWorkGPU : public PathTraceWork {

  int get_num_active_paths();

-  /* Check whether graphics interop can be used for the PathTraceDisplay update. */
+  /* Check whether graphics interop can be used for the GPUDisplay update. */
  bool should_use_graphics_interop();

-  /* Naive implementation of the `copy_to_display()` which performs film conversion on the
-   * device, then copies pixels to the host and pushes them to the `display`. */
-  void copy_to_display_naive(PathTraceDisplay *display, PassMode pass_mode, int num_samples);
+  /* Naive implementation of the `copy_to_gpu_display()` which performs film conversion on the
+   * device, then copies pixels to the host and pushes them to the `gpu_display`. */
+  void copy_to_gpu_display_naive(GPUDisplay *gpu_display, PassMode pass_mode, int num_samples);

-  /* Implementation of `copy_to_display()` which uses driver's OpenGL/GPU interoperability
+  /* Implementation of `copy_to_gpu_display()` which uses driver's OpenGL/GPU interoperability
   * functionality, avoiding copy of pixels to the host. */
-  bool copy_to_display_interop(PathTraceDisplay *display, PassMode pass_mode, int num_samples);
+  bool copy_to_gpu_display_interop(GPUDisplay *gpu_display, PassMode pass_mode, int num_samples);

  /* Synchronously run film conversion kernel and store display result in the given destination. */
  void get_render_tile_film_pixels(const PassAccessor::Destination &destination,
@@ -139,9 +139,9 @@ class PathTraceWorkGPU : public PathTraceWork {
  /* Temporary buffer for passing work tiles to kernel. */
  device_vector<KernelWorkTile> work_tiles_;

-  /* Temporary buffer used by the copy_to_display() whenever graphics interoperability is not
+  /* Temporary buffer used by the copy_to_gpu_display() whenever graphics interoperability is not
   * available. Is allocated on-demand. */
-  device_vector<half4> display_rgba_half_;
+  device_vector<half4> gpu_display_rgba_half_;

  unique_ptr<DeviceGraphicsInterop> device_graphics_interop_;

--- a/intern/cycles/integrator/render_scheduler.cpp
+++ b/intern/cycles/integrator/render_scheduler.cpp
@@ -384,7 +384,7 @@ bool RenderScheduler::set_postprocess_render_work(RenderWork *render_work)
  }

  if (denoiser_params_.use && !state_.last_work_tile_was_denoised) {
-    render_work->tile.denoise = !tile_manager_.has_multiple_tiles();
+    render_work->tile.denoise = true;
    any_scheduled = true;
  }

@@ -903,12 +903,6 @@ bool RenderScheduler::work_need_denoise(bool &delayed, bool &ready_to_display)
    return false;
  }

-  /* When multiple tiles are used the full frame will be denoised.
-   * Avoid per-tile denoising to save up render time. */
-  if (tile_manager_.has_multiple_tiles()) {
-    return false;
-  }
-
  if (done()) {
    /* Always denoise at the last sample. */
    return true;
--- a/intern/cycles/integrator/render_scheduler.h
+++ b/intern/cycles/integrator/render_scheduler.h
@@ -344,7 +344,7 @@ class RenderScheduler {
    /* Number of rendered samples on top of the start sample. */
    int num_rendered_samples = 0;

-    /* Point in time the latest PathTraceDisplay work has been scheduled. */
+    /* Point in time the latest GPUDisplay work has been scheduled. */
    double last_display_update_time = 0.0;
    /* Value of -1 means display was never updated. */
    int last_display_update_sample = -1;
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -35,10 +35,6 @@ set(SRC_DEVICE_CUDA
  device/cuda/kernel.cu
 )

-set(SRC_DEVICE_HIP
-  device/hip/kernel.cpp
-)
-
 set(SRC_DEVICE_OPTIX
  device/optix/kernel.cu
  device/optix/kernel_shader_raytrace.cu
@@ -110,12 +106,6 @@ set(SRC_DEVICE_CUDA_HEADERS
  device/cuda/globals.h
 )

-set(SRC_DEVICE_HIP_HEADERS
-  device/hip/compat.h
-  device/hip/config.h
-  device/hip/globals.h
-)
-
 set(SRC_DEVICE_OPTIX_HEADERS
  device/optix/compat.h
  device/optix/globals.h
@@ -468,104 +458,6 @@ if(WITH_CYCLES_CUDA_BINARIES)
  cycles_set_solution_folder(cycles_kernel_cuda)
 endif()

-####################################################### START
-
-# HIP module
-
-if(WITH_CYCLES_HIP_BINARIES)
-  # 64 bit only
-  set(HIP_BITS 64)
-
-  # HIP version
-  execute_process(COMMAND ${HIP_HIPCC_EXECUTABLE} "--version" OUTPUT_VARIABLE HIPCC_OUT)
-  string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" HIP_VERSION_MAJOR "${HIPCC_OUT}")
-  string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" HIP_VERSION_MINOR "${HIPCC_OUT}")
-  set(HIP_VERSION "${HIP_VERSION_MAJOR}${HIP_VERSION_MINOR}")
-
-
-  message(WARNING
-    "HIP version ${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR} detected")
-
-  # build for each arch
-  set(hip_sources device/hip/kernel.cpp
-    ${SRC_HEADERS}
-    ${SRC_DEVICE_HIP_HEADERS}
-    ${SRC_BVH_HEADERS}
-    ${SRC_SVM_HEADERS}
-    ${SRC_GEOM_HEADERS}
-    ${SRC_INTEGRATOR_HEADERS}
-    ${SRC_CLOSURE_HEADERS}
-    ${SRC_UTIL_HEADERS}
-  )
-  set(hip_fatbins)
-
-  macro(CYCLES_HIP_KERNEL_ADD arch prev_arch name flags sources experimental)
-    if(${arch} MATCHES "compute_.*")
-      set(format "ptx")
-    else()
-      set(format "fatbin")
-    endif()
-    set(hip_file ${name}_${arch}.${format})
-
-    set(kernel_sources ${sources})
-    if(NOT ${prev_arch} STREQUAL "none")
-      if(${prev_arch} MATCHES "compute_.*")
-        set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx)
-      else()
-        set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin)
-      endif()
-    endif()
-
-    set(hip_kernel_src "/device/hip/${name}.cpp")
-
-    set(hip_flags ${flags}
-      -D CCL_NAMESPACE_BEGIN=
-      -D CCL_NAMESPACE_END=
-      -D HIPCC
-      -m ${HIP_BITS}
-      -I ${CMAKE_CURRENT_SOURCE_DIR}/..
-      -I ${CMAKE_CURRENT_SOURCE_DIR}/device/hip
-      --use_fast_math
-      -o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
-
-    if(${experimental})
-      set(hip_flags ${hip_flags} -D __KERNEL_EXPERIMENTAL__)
-      set(name ${name}_experimental)
-    endif()
-
-    if(WITH_CYCLES_DEBUG)
-      set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__)
-    endif()
-
-    if(WITH_NANOVDB)
-      set(hip_flags ${hip_flags}
-        -D WITH_NANOVDB
-        -I "${NANOVDB_INCLUDE_DIR}")
-    endif()
-  endmacro()
-
-  set(prev_arch "none")
-  foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
-      set(hip_hipcc_executable ${HIP_HIPCC_EXECUTABLE})
-      set(hip_toolkit_root_dir ${HIP_TOOLKIT_ROOT_DIR})
-    if(DEFINED hip_hipcc_executable AND DEFINED hip_toolkit_root_dir)
-      # Compile regular kernel
-      CYCLES_HIP_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${hip_sources}" FALSE)
-
-      if(WITH_CYCLES_HIP_BUILD_SERIAL)
-        set(prev_arch ${arch})
-      endif()
-
-      unset(hip_hipcc_executable)
-      unset(hip_toolkit_root_dir)
-    endif()
-  endforeach()
-
-  add_custom_target(cycles_kernel_hip ALL DEPENDS ${hip_fatbins})
-  cycles_set_solution_folder(cycles_kernel_hip)
-endif()
-
-####################################################### END
 # OptiX PTX modules

 if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
@@ -710,13 +602,11 @@ endif()
 cycles_add_library(cycles_kernel "${LIB}"
  ${SRC_DEVICE_CPU}
  ${SRC_DEVICE_CUDA}
-  ${SRC_DEVICE_HIP}
  ${SRC_DEVICE_OPTIX}
  ${SRC_HEADERS}
  ${SRC_DEVICE_CPU_HEADERS}
  ${SRC_DEVICE_GPU_HEADERS}
  ${SRC_DEVICE_CUDA_HEADERS}
-  ${SRC_DEVICE_HIP_HEADERS}
  ${SRC_DEVICE_OPTIX_HEADERS}
  ${SRC_BVH_HEADERS}
  ${SRC_CLOSURE_HEADERS}
@@ -731,7 +621,6 @@ source_group("geom" FILES ${SRC_GEOM_HEADERS})
 source_group("integrator" FILES ${SRC_INTEGRATOR_HEADERS})
 source_group("kernel" FILES ${SRC_HEADERS})
 source_group("device\\cpu" FILES ${SRC_DEVICE_CPU} ${SRC_DEVICE_CPU_HEADERS})
-source_group("device\\hip" FILES ${SRC_DEVICE_HIP} ${SRC_DEVICE_HIP_HEADERS})
 source_group("device\\gpu" FILES ${SRC_DEVICE_GPU_HEADERS})
 source_group("device\\cuda" FILES ${SRC_DEVICE_CUDA} ${SRC_DEVICE_CUDA_HEADERS})
 source_group("device\\optix" FILES ${SRC_DEVICE_OPTIX} ${SRC_DEVICE_OPTIX_HEADERS})
@@ -743,19 +632,14 @@ endif()
 if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
  add_dependencies(cycles_kernel cycles_kernel_optix)
 endif()
-if(WITH_CYCLES_HIP)
-  add_dependencies(cycles_kernel cycles_kernel_hip)
-endif()

 # Install kernel source for runtime compilation

 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_CUDA}" ${CYCLES_INSTALL_PATH}/source/kernel/device/cuda)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_HIP}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_GPU_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/gpu)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_CUDA_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/cuda)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/bvh)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/closure)
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -167,25 +167,15 @@ ccl_device_intersect bool scene_intersect(const KernelGlobals *kg,
  uint p4 = visibility;
  uint p5 = PRIMITIVE_NONE;

-  uint ray_mask = visibility & 0xFF;
-  uint ray_flags = OPTIX_RAY_FLAG_NONE;
-  if (0 == ray_mask && (visibility & ~0xFF) != 0) {
-    ray_mask = 0xFF;
-    ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT;
-  }
-  else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
-    ray_flags = OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT;
-  }
-
  optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
             ray->P,
             ray->D,
             0.0f,
             ray->t,
             ray->time,
-             ray_mask,
-             ray_flags,
-             0, /* SBT offset for PG_HITD */
+             0xF,
+             OPTIX_RAY_FLAG_NONE,
+             0,  // SBT offset for PG_HITD
             0,
             0,
             p0,
@@ -261,11 +251,11 @@ ccl_device_intersect bool scene_intersect_local(const KernelGlobals *kg,
  uint p2 = ((uint64_t)local_isect) & 0xFFFFFFFF;
  uint p3 = (((uint64_t)local_isect) >> 32) & 0xFFFFFFFF;
  uint p4 = local_object;
-  /* Is set to zero on miss or if ray is aborted, so can be used as return value. */
+  // Is set to zero on miss or if ray is aborted, so can be used as return value
  uint p5 = max_hits;

  if (local_isect) {
-    local_isect->num_hits = 0; /* Initialize hit count to zero. */
+    local_isect->num_hits = 0;  // Initialize hit count to zero
  }
  optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
             ray->P,
@@ -273,10 +263,11 @@ ccl_device_intersect bool scene_intersect_local(const KernelGlobals *kg,
             0.0f,
             ray->t,
             ray->time,
-             0xFF,
-             /* Need to always call into __anyhit__kernel_optix_local_hit. */
+             // Skip curves
+             0x3,
+             // Need to always call into __anyhit__kernel_optix_local_hit
             OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
-             2, /* SBT offset for PG_HITL */
+             2,  // SBT offset for PG_HITL
             0,
             0,
             p0,
@@ -374,22 +365,17 @@ ccl_device_intersect bool scene_intersect_shadow_all(const KernelGlobals *kg,
  uint p4 = visibility;
  uint p5 = false;

-  uint ray_mask = visibility & 0xFF;
-  if (0 == ray_mask && (visibility & ~0xFF) != 0) {
-    ray_mask = 0xFF;
-  }
-
-  *num_hits = 0; /* Initialize hit count to zero. */
+  *num_hits = 0;  // Initialize hit count to zero
  optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
             ray->P,
             ray->D,
             0.0f,
             ray->t,
             ray->time,
-             ray_mask,
-             /* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */
+             0xF,
+             // Need to always call into __anyhit__kernel_optix_shadow_all_hit
             OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
-             1, /* SBT offset for PG_HITS */
+             1,  // SBT offset for PG_HITS
             0,
             0,
             p0,
@@ -458,21 +444,16 @@ ccl_device_intersect bool scene_intersect_volume(const KernelGlobals *kg,
  uint p4 = visibility;
  uint p5 = PRIMITIVE_NONE;

-  uint ray_mask = visibility & 0xFF;
-  if (0 == ray_mask && (visibility & ~0xFF) != 0) {
-    ray_mask = 0xFF;
-  }
-
  optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
             ray->P,
             ray->D,
             0.0f,
             ray->t,
             ray->time,
-             ray_mask,
-             /* Need to always call into __anyhit__kernel_optix_volume_test. */
-             OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
-             3, /* SBT offset for PG_HITV */
+             // Skip everything but volumes
+             0x2,
+             OPTIX_RAY_FLAG_NONE,
+             0,  // SBT offset for PG_HITD
             0,
             0,
             p0,
--- a/intern/cycles/kernel/device/gpu/parallel_active_index.h
+++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h
@@ -25,11 +25,7 @@ CCL_NAMESPACE_BEGIN

 #include "util/util_atomic.h"

-#ifdef __HIP__
-#  define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 1024
-#else
-#  define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 512
-#endif
+#define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 512

 template<uint blocksize, typename IsActiveOp>
 __device__ void gpu_parallel_active_index_array(const uint num_states,
--- a/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h
+++ b/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h
@@ -27,11 +27,7 @@ CCL_NAMESPACE_BEGIN

 #include "util/util_atomic.h"

-#ifdef __HIP__
-#  define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 1024
-#else
-#  define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512
-#endif
+#define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512

 template<uint blocksize> __device__ void gpu_parallel_prefix_sum(int *values, const int num_values)
 {
--- a/intern/cycles/kernel/device/gpu/parallel_reduce.h
+++ b/intern/cycles/kernel/device/gpu/parallel_reduce.h
@@ -26,11 +26,7 @@ CCL_NAMESPACE_BEGIN
 * the overall cost of the algorithm while keeping the work complexity O(n) and
 * the step complexity O(log n). (Brent's Theorem optimization) */

-#ifdef __HIP__
-#  define GPU_PARALLEL_SUM_DEFAULT_BLOCK_SIZE 1024
-#else
-#  define GPU_PARALLEL_SUM_DEFAULT_BLOCK_SIZE 512
-#endif
+#define GPU_PARALLEL_SUM_DEFAULT_BLOCK_SIZE 512

 template<uint blocksize, typename InputT, typename OutputT, typename ConvertOp>
 __device__ void gpu_parallel_sum(
--- a/intern/cycles/kernel/device/gpu/parallel_sorted_index.h
+++ b/intern/cycles/kernel/device/gpu/parallel_sorted_index.h
@@ -26,11 +26,7 @@ CCL_NAMESPACE_BEGIN

 #include "util/util_atomic.h"

-#ifdef __HIP__
-#  define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 1024
-#else
-#  define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 512
-#endif
+#define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 512
 #define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY (~0)

 template<uint blocksize, typename GetKeyOp>
--- a/intern/cycles/kernel/device/hip/compat.h
+++ b/intern/cycles/kernel/device/hip/compat.h
@@ -1,121 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#define __KERNEL_GPU__
-#define __KERNEL_HIP__
-#define CCL_NAMESPACE_BEGIN
-#define CCL_NAMESPACE_END
-
-#ifndef ATTR_FALLTHROUGH
-#  define ATTR_FALLTHROUGH
-#endif
-
-#ifdef __HIPCC_RTC__
-typedef unsigned int uint32_t;
-typedef unsigned long long uint64_t;
-#else
-#  include <stdint.h>
-#endif
-
-#ifdef CYCLES_HIPBIN_CC
-#  define FLT_MIN 1.175494350822287507969e-38f
-#  define FLT_MAX 340282346638528859811704183484516925440.0f
-#  define FLT_EPSILON 1.192092896e-07F
-#endif
-
-/* Qualifiers */
-
-#define ccl_device __device__ __inline__
-#define ccl_device_inline __device__ __inline__
-#define ccl_device_forceinline __device__ __forceinline__
-#define ccl_device_noinline __device__ __noinline__
-#define ccl_device_noinline_cpu ccl_device
-#define ccl_global
-#define ccl_static_constant __constant__
-#define ccl_device_constant __constant__ __device__
-#define ccl_constant const
-#define ccl_gpu_shared __shared__
-#define ccl_private
-#define ccl_may_alias
-#define ccl_addr_space
-#define ccl_restrict __restrict__
-#define ccl_loop_no_unroll
-#define ccl_align(n) __align__(n)
-#define ccl_optional_struct_init
-
-#define kernel_assert(cond)
-
-/* Types */
-#ifdef __HIP__
-#  include "hip/hip_fp16.h"
-#  include "hip/hip_runtime.h"
-#endif
-
-#ifdef _MSC_VER
-#  include <immintrin.h>
-#endif
-
-#define ccl_gpu_thread_idx_x (threadIdx.x)
-#define ccl_gpu_block_dim_x (blockDim.x)
-#define ccl_gpu_block_idx_x (blockIdx.x)
-#define ccl_gpu_grid_dim_x (gridDim.x)
-#define ccl_gpu_warp_size (warpSize)
-
-#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
-#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
-
-/* GPU warp synchronization */
-
-#define ccl_gpu_syncthreads() __syncthreads()
-#define ccl_gpu_ballot(predicate) __ballot(predicate)
-#define ccl_gpu_shfl_down_sync(mask, var, detla) __shfl_down(var, detla)
-#define ccl_gpu_popc(x) __popc(x)
-
-/* GPU texture objects */
-typedef hipTextureObject_t ccl_gpu_tex_object;
-
-template<typename T>
-ccl_device_forceinline T ccl_gpu_tex_object_read_2D(const ccl_gpu_tex_object texobj,
-                                                    const float x,
-                                                    const float y)
-{
-  return tex2D<T>(texobj, x, y);
-}
-
-template<typename T>
-ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object texobj,
-                                                    const float x,
-                                                    const float y,
-                                                    const float z)
-{
-  return tex3D<T>(texobj, x, y, z);
-}
-
-/* Use fast math functions */
-
-#define cosf(x) __cosf(((float)(x)))
-#define sinf(x) __sinf(((float)(x)))
-#define powf(x, y) __powf(((float)(x)), ((float)(y)))
-#define tanf(x) __tanf(((float)(x)))
-#define logf(x) __logf(((float)(x)))
-#define expf(x) __expf(((float)(x)))
-
-/* Types */
-
-#include "util/util_half.h"
-#include "util/util_types.h"
--- a/intern/cycles/kernel/device/hip/config.h
+++ b/intern/cycles/kernel/device/hip/config.h
@@ -1,57 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Device data taken from HIP occupancy calculator.
- *
- * Terminology
- * - HIP GPUs have multiple streaming multiprocessors
- * - Each multiprocessor executes multiple thread blocks
- * - Each thread block contains a number of threads, also known as the block size
- * - Multiprocessors have a fixed number of registers, and the amount of registers
- *   used by each threads limits the number of threads per block.
- */
-
-/* Launch Bound Definitions */
-#define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
-#define GPU_MULTIPROCESSOR_MAX_BLOCKS 64
-#define GPU_BLOCK_MAX_THREADS 1024
-#define GPU_THREAD_MAX_REGISTERS 255
-
-#define GPU_KERNEL_BLOCK_NUM_THREADS 1024
-#define GPU_KERNEL_MAX_REGISTERS 64
-
-/* Compute number of threads per block and minimum blocks per multiprocessor
- * given the maximum number of registers per thread. */
-
-#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
-  extern "C" __global__ void __launch_bounds__(block_num_threads, \
-                                               GPU_MULTIPRESSOR_MAX_REGISTERS / \
-                                                   (block_num_threads * thread_num_registers))
-
-/* sanity checks */
-
-#if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
-#  error "Maximum number of threads per block exceeded"
-#endif
-
-#if GPU_MULTIPRESSOR_MAX_REGISTERS / (GPU_KERNEL_BLOCK_NUM_THREADS * GPU_KERNEL_MAX_REGISTERS) > \
-    GPU_MULTIPROCESSOR_MAX_BLOCKS
-#  error "Maximum number of blocks per multiprocessor exceeded"
-#endif
-
-#if GPU_KERNEL_MAX_REGISTERS > GPU_THREAD_MAX_REGISTERS
-#  error "Maximum number of registers per thread exceeded"
-#endif
--- a/intern/cycles/kernel/device/hip/globals.h
+++ b/intern/cycles/kernel/device/hip/globals.h
@@ -1,49 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Constant Globals */
-
-#pragma once
-
-#include "kernel/kernel_profiling.h"
-#include "kernel/kernel_types.h"
-
-#include "kernel/integrator/integrator_state.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Not actually used, just a NULL pointer that gets passed everywhere, which we
- * hope gets optimized out by the compiler. */
-struct KernelGlobals {
-  /* NOTE: Keep the size in sync with SHADOW_STACK_MAX_HITS. */
-  int unused[1];
-};
-
-/* Global scene data and textures */
-__constant__ KernelData __data;
-#define KERNEL_TEX(type, name) __attribute__((used)) const __constant__ __device__ type *name;
-#include "kernel/kernel_textures.h"
-
-/* Integrator state */
-__constant__ IntegratorStateGPU __integrator_state;
-
-/* Abstraction macros */
-#define kernel_data __data
-#define kernel_tex_fetch(t, index) t[(index)]
-#define kernel_tex_array(t) (t)
-#define kernel_integrator_state __integrator_state
-
-CCL_NAMESPACE_END
--- a/intern/cycles/kernel/device/hip/kernel.cpp
+++ b/intern/cycles/kernel/device/hip/kernel.cpp
@@ -1,28 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* HIP kernel entry points */
-
-#ifdef __HIP_DEVICE_COMPILE__
-
-#  include "kernel/device/hip/compat.h"
-#  include "kernel/device/hip/config.h"
-#  include "kernel/device/hip/globals.h"
-
-#  include "kernel/device/gpu/image.h"
-#  include "kernel/device/gpu/kernel.h"
-
-#endif
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -19,7 +19,7 @@
 #include "kernel/device/optix/compat.h"
 #include "kernel/device/optix/globals.h"

-#include "kernel/device/gpu/image.h"  /* Texture lookup uses normal CUDA intrinsics. */
+#include "kernel/device/gpu/image.h"  // Texture lookup uses normal CUDA intrinsics

 #include "kernel/integrator/integrator_state.h"
 #include "kernel/integrator/integrator_state_flow.h"
@@ -44,18 +44,18 @@ template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
 template<bool always = false> ccl_device_forceinline uint get_object_id()
 {
 #ifdef __OBJECT_MOTION__
-  /* Always get the the instance ID from the TLAS.
-   * There might be a motion transform node between TLAS and BLAS which does not have one. */
+  // Always get the the instance ID from the TLAS
+  // There might be a motion transform node between TLAS and BLAS which does not have one
  uint object = optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
 #else
  uint object = optixGetInstanceId();
 #endif
-  /* Choose between always returning object ID or only for instances. */
+  // Choose between always returning object ID or only for instances
  if (always || (object & 1) == 0)
-    /* Can just remove the low bit since instance always contains object ID. */
+    // Can just remove the low bit since instance always contains object ID
    return object >> 1;
  else
-    /* Set to OBJECT_NONE if this is not an instanced object. */
+    // Set to OBJECT_NONE if this is not an instanced object
    return OBJECT_NONE;
 }

@@ -93,30 +93,23 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_volume_st

 extern "C" __global__ void __miss__kernel_optix_miss()
 {
-  /* 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss. */
+  // 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss
  optixSetPayload_0(__float_as_uint(optixGetRayTmax()));
  optixSetPayload_5(PRIMITIVE_NONE);
 }

 extern "C" __global__ void __anyhit__kernel_optix_local_hit()
 {
-#ifdef __HAIR__
-  if (!optixIsTriangleHit()) {
-    /* Ignore curves. */
-    return optixIgnoreIntersection();
-  }
-#endif
-
 #ifdef __BVH_LOCAL__
  const uint object = get_object_id<true>();
  if (object != optixGetPayload_4() /* local_object */) {
-    /* Only intersect with matching object. */
+    // Only intersect with matching object
    return optixIgnoreIntersection();
  }

  const uint max_hits = optixGetPayload_5();
  if (max_hits == 0) {
-    /* Special case for when no hit information is requested, just report that something was hit */
+    // Special case for when no hit information is requested, just report that something was hit
    optixSetPayload_5(true);
    return optixTerminateRay();
  }
@@ -143,9 +136,8 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
  }
  else {
    if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
-      /* Record closest intersection only.
-       * Do not terminate ray here, since there is no guarantee about distance ordering in any-hit.
-       */
+      // Record closest intersection only
+      // Do not terminate ray here, since there is no guarantee about distance ordering in any-hit
      return optixIgnoreIntersection();
    }

@@ -162,14 +154,14 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
  isect->u = 1.0f - barycentrics.y - barycentrics.x;
  isect->v = barycentrics.x;

-  /* Record geometric normal. */
+  // Record geometric normal
  const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
  const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0));
  const float3 tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1));
  const float3 tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2));
  local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));

-  /* Continue tracing (without this the trace call would return after the first hit). */
+  // Continue tracing (without this the trace call would return after the first hit)
  optixIgnoreIntersection();
 #endif
 }
@@ -198,7 +190,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
    u = __uint_as_float(optixGetAttribute_0());
    v = __uint_as_float(optixGetAttribute_1());

-    /* Filter out curve endcaps. */
+    // Filter out curve endcaps
    if (u == 0.0f || u == 1.0f) {
      ignore_intersection = true;
    }
@@ -249,10 +241,10 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
    isect->type = kernel_tex_fetch(__prim_type, prim);

 #  ifdef __TRANSPARENT_SHADOWS__
-    /* Detect if this surface has a shader with transparent shadows. */
+    // Detect if this surface has a shader with transparent shadows
    if (!shader_transparent_shadow(NULL, isect) || max_hits == 0) {
 #  endif
-      /* If no transparent shadows, all light is blocked and we can stop immediately. */
+      // If no transparent shadows, all light is blocked and we can stop immediately
      optixSetPayload_5(true);
      return optixTerminateRay();
 #  ifdef __TRANSPARENT_SHADOWS__
@@ -260,39 +252,24 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
 #  endif
  }

-  /* Continue tracing. */
+  // Continue tracing
  optixIgnoreIntersection();
 #endif
 }

-extern "C" __global__ void __anyhit__kernel_optix_volume_test()
+extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
 {
-#ifdef __HAIR__
-  if (!optixIsTriangleHit()) {
-    /* Ignore curves. */
-    return optixIgnoreIntersection();
-  }
-#endif
-
+  uint visibility = optixGetPayload_4();
 #ifdef __VISIBILITY_FLAG__
  const uint prim = optixGetPrimitiveIndex();
-  const uint visibility = optixGetPayload_4();
  if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
    return optixIgnoreIntersection();
  }
 #endif

-  const uint object = get_object_id<true>();
-  if ((kernel_tex_fetch(__object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
-    return optixIgnoreIntersection();
-  }
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
-{
 #ifdef __HAIR__
  if (!optixIsTriangleHit()) {
-    /* Filter out curve endcaps. */
+    // Filter out curve endcaps
    const float u = __uint_as_float(optixGetAttribute_0());
    if (u == 0.0f || u == 1.0f) {
      return optixIgnoreIntersection();
@@ -300,26 +277,18 @@ extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
  }
 #endif

-#ifdef __VISIBILITY_FLAG__
-  const uint prim = optixGetPrimitiveIndex();
-  const uint visibility = optixGetPayload_4();
-  if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
-    return optixIgnoreIntersection();
-  }
-
-  /* Shadow ray early termination. */
+  // Shadow ray early termination
  if (visibility & PATH_RAY_SHADOW_OPAQUE) {
    return optixTerminateRay();
  }
-#endif
 }

 extern "C" __global__ void __closesthit__kernel_optix_hit()
 {
-  optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */
+  optixSetPayload_0(__float_as_uint(optixGetRayTmax()));  // Intersection distance
  optixSetPayload_3(optixGetPrimitiveIndex());
  optixSetPayload_4(get_object_id());
-  /* Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index. */
+  // Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index
  optixSetPayload_5(kernel_tex_fetch(__prim_type, optixGetPrimitiveIndex()));

  if (optixIsTriangleHit()) {
@@ -328,7 +297,7 @@ extern "C" __global__ void __closesthit__kernel_optix_hit()
    optixSetPayload_2(__float_as_uint(barycentrics.x));
  }
  else {
-    optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */
+    optixSetPayload_1(optixGetAttribute_0());  // Same as 'optixGetCurveParameter()'
    optixSetPayload_2(optixGetAttribute_1());
  }
 }
@@ -342,7 +311,7 @@ ccl_device_inline void optix_intersection_curve(const uint prim, const uint type
  float3 P = optixGetObjectRayOrigin();
  float3 dir = optixGetObjectRayDirection();

-  /* The direction is not normalized by default, but the curve intersection routine expects that */
+  // The direction is not normalized by default, but the curve intersection routine expects that
  float len;
  dir = normalize_len(dir, &len);

@@ -354,15 +323,15 @@ ccl_device_inline void optix_intersection_curve(const uint prim, const uint type

  Intersection isect;
  isect.t = optixGetRayTmax();
-  /* Transform maximum distance into object space. */
+  // Transform maximum distance into object space
  if (isect.t != FLT_MAX)
    isect.t *= len;

  if (curve_intersect(NULL, &isect, P, dir, isect.t, visibility, object, prim, time, type)) {
    optixReportIntersection(isect.t / len,
                            type & PRIMITIVE_ALL,
-                            __float_as_int(isect.u),  /* Attribute_0 */
-                            __float_as_int(isect.v)); /* Attribute_1 */
+                            __float_as_int(isect.u),   // Attribute_0
+                            __float_as_int(isect.v));  // Attribute_1
  }
 }

--- a/intern/cycles/kernel/geom/geom_curve_intersect.h
+++ b/intern/cycles/kernel/geom/geom_curve_intersect.h
@@ -713,7 +713,7 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg,

    P = transform_point(&tfm, P);
    D = transform_direction(&tfm, D * t);
-    D = safe_normalize_len(D, &t);
+    D = normalize_len(D, &t);
  }

  int prim = kernel_tex_fetch(__prim_index, isect_prim);
@@ -764,10 +764,8 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg,
    /* Thick curves, compute normal using direction from inside the curve.
     * This could be optimized by recording the normal in the intersection,
     * however for Optix this would go beyond the size of the payload. */
-    /* NOTE: It is possible that P will be the same as P_inside (precision issues, or very small
-     * radius). In this case use the view direction to approximate the normal. */
    const float3 P_inside = float4_to_float3(catmull_rom_basis_eval(P_curve, sd->u));
-    const float3 Ng = (!isequal_float3(P, P_inside)) ? normalize(P - P_inside) : -sd->I;
+    const float3 Ng = normalize(P - P_inside);

    sd->N = Ng;
    sd->Ng = Ng;
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -41,18 +41,7 @@ ccl_device_inline int find_attribute_motion(const KernelGlobals *kg,
  uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);

  while (attr_map.x != id) {
-    if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
-      if (UNLIKELY(attr_map.y == 0)) {
-        return (int)ATTR_STD_NOT_FOUND;
-      }
-      else {
-        /* Chain jump to a different part of the table. */
-        attr_offset = attr_map.z;
-      }
-    }
-    else {
-      attr_offset += ATTR_PRIM_TYPES;
-    }
+    attr_offset += ATTR_PRIM_TYPES;
    attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
  }

--- a/intern/cycles/kernel/geom/geom_shader_data.h
+++ b/intern/cycles/kernel/geom/geom_shader_data.h
@@ -103,7 +103,7 @@ ccl_device_inline void shader_setup_from_ray(const KernelGlobals *ccl_restrict k

  sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;

-  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+  if (isect->object != OBJECT_NONE) {
    /* instance transform */
    object_normal_transform_auto(kg, sd, &sd->N);
    object_normal_transform_auto(kg, sd, &sd->Ng);
--- a/intern/cycles/kernel/integrator/integrator_init_from_bake.h
+++ b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
@@ -109,17 +109,9 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
  }

  /* Position and normal on triangle. */
-  const int object = kernel_data.bake.object_index;
  float3 P, Ng;
  int shader;
-  triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
-
-  const int object_flag = kernel_tex_fetch(__object_flag, object);
-  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-    Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
-    P = transform_point_auto(&tfm, P);
-  }
-
+  triangle_point_normal(kg, kernel_data.bake.object_index, prim, u, v, &P, &Ng, &shader);
  if (kernel_data.film.pass_background != PASS_UNUSED) {
    /* Environment baking. */

@@ -138,13 +130,8 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
  }
  else {
    /* Surface baking. */
-    float3 N = (shader & SHADER_SMOOTH_NORMAL) ? triangle_smooth_normal(kg, Ng, prim, u, v) : Ng;
-
-    if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-      Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-      N = normalize(transform_direction_transposed(&itfm, N));
-      Ng = normalize(transform_direction_transposed(&itfm, Ng));
-    }
+    const float3 N = (shader & SHADER_SMOOTH_NORMAL) ? triangle_smooth_normal(kg, Ng, prim, u, v) :
+                                                       Ng;

    /* Setup ray. */
    Ray ray ccl_optional_struct_init;
@@ -156,12 +143,6 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
    /* Setup differentials. */
    float3 dPdu, dPdv;
    triangle_dPdudv(kg, prim, &dPdu, &dPdv);
-    if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-      Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
-      dPdu = transform_direction(&tfm, dPdu);
-      dPdv = transform_direction(&tfm, dPdv);
-    }
-
    differential3 dP;
    dP.dx = dPdu * dudx + dPdv * dvdx;
    dP.dy = dPdu * dudy + dPdv * dvdy;
--- a/intern/cycles/kernel/integrator/integrator_intersect_closest.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_closest.h
@@ -123,7 +123,7 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel(
 #ifdef __SHADOW_CATCHER__
  const int object_flags = intersection_get_object_flags(kg, isect);
  if (kernel_shadow_catcher_split(INTEGRATOR_STATE_PASS, object_flags)) {
-    if (kernel_data.film.pass_background != PASS_UNUSED && !kernel_data.background.transparent) {
+    if (kernel_data.film.use_approximate_shadow_catcher && !kernel_data.background.transparent) {
      INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;

      if (use_raytrace_kernel) {
--- a/intern/cycles/kernel/integrator/integrator_state.h
+++ b/intern/cycles/kernel/integrator/integrator_state.h
@@ -60,15 +60,7 @@ CCL_NAMESPACE_BEGIN
 * TODO: these could be made dynamic depending on the features used in the scene. */

 #define INTEGRATOR_VOLUME_STACK_SIZE VOLUME_STACK_SIZE
-
-#define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024
-#define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4
-
-#ifdef __KERNEL_CPU__
-#  define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
-#else
-#  define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_GPU
-#endif
+#define INTEGRATOR_SHADOW_ISECT_SIZE 4

 /* Data structures */

@@ -82,9 +74,9 @@ typedef struct IntegratorStateCPU {
 #define KERNEL_STRUCT_END(name) \
  } \
  name;
-#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
+#define KERNEL_STRUCT_END_ARRAY(name, size) \
  } \
-  name[cpu_size];
+  name[size];
 #include "kernel/integrator/integrator_state_template.h"
 #undef KERNEL_STRUCT_BEGIN
 #undef KERNEL_STRUCT_MEMBER
@@ -111,9 +103,9 @@ typedef struct IntegratorStateGPU {
 #define KERNEL_STRUCT_END(name) \
  } \
  name;
-#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
+#define KERNEL_STRUCT_END_ARRAY(name, size) \
  } \
-  name[gpu_size];
+  name[size];
 #include "kernel/integrator/integrator_state_template.h"
 #undef KERNEL_STRUCT_BEGIN
 #undef KERNEL_STRUCT_MEMBER
--- a/intern/cycles/kernel/integrator/integrator_state_template.h
+++ b/intern/cycles/kernel/integrator/integrator_state_template.h
@@ -107,7 +107,7 @@ KERNEL_STRUCT_END(subsurface)
 KERNEL_STRUCT_BEGIN(volume_stack)
 KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, object, KERNEL_FEATURE_VOLUME)
 KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, shader, KERNEL_FEATURE_VOLUME)
-KERNEL_STRUCT_END_ARRAY(volume_stack, INTEGRATOR_VOLUME_STACK_SIZE, INTEGRATOR_VOLUME_STACK_SIZE)
+KERNEL_STRUCT_END_ARRAY(volume_stack, INTEGRATOR_VOLUME_STACK_SIZE)

 /********************************* Shadow Path State **************************/

@@ -153,15 +153,11 @@ KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, object, KERNEL_FEATURE_PATH_TRACIN
 KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, type, KERNEL_FEATURE_PATH_TRACING)
 /* TODO: exclude for GPU. */
 KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_END_ARRAY(shadow_isect,
-                        INTEGRATOR_SHADOW_ISECT_SIZE_CPU,
-                        INTEGRATOR_SHADOW_ISECT_SIZE_GPU)
+KERNEL_STRUCT_END_ARRAY(shadow_isect, INTEGRATOR_SHADOW_ISECT_SIZE)

 /**************************** Shadow Volume Stack *****************************/

 KERNEL_STRUCT_BEGIN(shadow_volume_stack)
 KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, object, KERNEL_FEATURE_VOLUME)
 KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, shader, KERNEL_FEATURE_VOLUME)
-KERNEL_STRUCT_END_ARRAY(shadow_volume_stack,
-                        INTEGRATOR_VOLUME_STACK_SIZE,
-                        INTEGRATOR_VOLUME_STACK_SIZE)
+KERNEL_STRUCT_END_ARRAY(shadow_volume_stack, INTEGRATOR_VOLUME_STACK_SIZE)
--- a/intern/cycles/kernel/integrator/integrator_state_util.h
+++ b/intern/cycles/kernel/integrator/integrator_state_util.h
@@ -217,10 +217,10 @@ ccl_device_inline void integrator_state_copy_only(const IntegratorState to_state
    while (false) \
      ;

-#  define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
+#  define KERNEL_STRUCT_END_ARRAY(name, array_size) \
    ++index; \
    } \
-    while (index < gpu_array_size) \
+    while (index < array_size) \
      ;

 #  include "kernel/integrator/integrator_state_template.h"
@@ -264,12 +264,7 @@ ccl_device_inline void integrator_state_shadow_catcher_split(INTEGRATOR_STATE_AR

  IntegratorStateCPU *ccl_restrict split_state = state + 1;

-  /* Only copy the required subset, since shadow intersections are big and irrelevant here. */
-  split_state->path = state->path;
-  split_state->ray = state->ray;
-  split_state->isect = state->isect;
-  memcpy(split_state->volume_stack, state->volume_stack, sizeof(state->volume_stack));
-  split_state->shadow_path = state->shadow_path;
+  *split_state = *state;

  split_state->path.flag |= PATH_RAY_SHADOW_CATCHER_PASS;
 #endif
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -386,7 +386,7 @@ ccl_device_inline void kernel_accum_light(INTEGRATOR_STATE_CONST_ARGS,
 {
  /* The throughput for shadow paths already contains the light shader evaluation. */
  float3 contribution = INTEGRATOR_STATE(shadow_path, throughput);
-  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(shadow_path, bounce));
+  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(shadow_path, bounce) - 1);

  ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
                                                              render_buffer);
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -42,16 +42,6 @@ ccl_device void kernel_displace_evaluate(const KernelGlobals *kg,

  object_inverse_dir_transform(kg, &sd, &D);

-#ifdef __KERNEL_DEBUG_NAN__
-  if (!isfinite3_safe(D)) {
-    kernel_assert(!"Cycles displacement with non-finite value detected");
-  }
-#endif
-
-  /* Ensure finite displacement, preventing BVH from becoming degenerate and avoiding possible
-   * traversal issues caused by non-finite math. */
-  D = ensure_finite3(D);
-
  /* Write output. */
  output[offset] += make_float4(D.x, D.y, D.z, 0.0f);
 }
@@ -76,16 +66,7 @@ ccl_device void kernel_background_evaluate(const KernelGlobals *kg,
  const int path_flag = PATH_RAY_EMISSION;
  shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>(
      INTEGRATOR_STATE_PASS_NULL, &sd, NULL, path_flag);
-  float3 color = shader_background_eval(&sd);
-
-#ifdef __KERNEL_DEBUG_NAN__
-  if (!isfinite3_safe(color)) {
-    kernel_assert(!"Cycles background with non-finite value detected");
-  }
-#endif
-
-  /* Ensure finite color, avoiding possible numerical instabilities in the path tracing kernels. */
-  color = ensure_finite3(color);
+  const float3 color = shader_background_eval(&sd);

  /* Write output. */
  output[offset] += make_float4(color.x, color.y, color.z, 0.0f);
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -74,6 +74,10 @@ ccl_device_inline float cmj_randfloat_simple(uint i, uint p)

 ccl_device float pmj_sample_1D(const KernelGlobals *kg, uint sample, uint rng_hash, uint dimension)
 {
+  /* The PMJ sample sets contain a sample with (x,y) with NUM_PMJ_SAMPLES so for 1D
+   *  the x part is used as the sample (TODO(@leesonw): Add using both x and y parts
+   * independently). */
+
  /* Perform Owen shuffle of the sample number to reorder the samples. */
 #ifdef _SIMPLE_HASH_
  const uint rv = cmj_hash_simple(dimension, rng_hash);
@@ -91,10 +95,7 @@ ccl_device float pmj_sample_1D(const KernelGlobals *kg, uint sample, uint rng_ha
  const uint sample_set = s / NUM_PMJ_SAMPLES;
  const uint d = (dimension + sample_set);
  const uint dim = d % NUM_PMJ_PATTERNS;
-
-  /* The PMJ sample sets contain a sample with (x,y) with NUM_PMJ_SAMPLES so for 1D
-   *  the x part is used for even dims and the y for odd. */
-  int index = 2 * ((dim >> 1) * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES)) + (dim & 1);
+  int index = 2 * (dim * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES));

  float fx = kernel_tex_fetch(__sample_pattern_lut, index);

@@ -103,11 +104,12 @@ ccl_device float pmj_sample_1D(const KernelGlobals *kg, uint sample, uint rng_ha
 #  ifdef _SIMPLE_HASH_
  float dx = cmj_randfloat_simple(d, rng_hash);
 #  else
+  /* Only jitter within the grid interval. */
  float dx = cmj_randfloat(d, rng_hash);
 #  endif
-  /* Jitter sample locations and map back into [0 1]. */
-  fx = fx + dx;
+  fx = fx + dx * (1.0f / NUM_PMJ_SAMPLES);
  fx = fx - floorf(fx);
+
 #else
 #  warning "Not using Cranley-Patterson Rotation."
 #endif
@@ -134,7 +136,7 @@ ccl_device void pmj_sample_2D(
  /* Based on the sample number a sample pattern is selected and offset by the dimension. */
  const uint sample_set = s / NUM_PMJ_SAMPLES;
  const uint d = (dimension + sample_set);
-  uint dim = d % NUM_PMJ_PATTERNS;
+  const uint dim = d % NUM_PMJ_PATTERNS;
  int index = 2 * (dim * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES));

  float fx = kernel_tex_fetch(__sample_pattern_lut, index);
@@ -149,17 +151,17 @@ ccl_device void pmj_sample_2D(
  float dx = cmj_randfloat(d, rng_hash);
  float dy = cmj_randfloat(d + 1, rng_hash);
 #  endif
-  /* Jitter sample locations and map back to the unit square [0 1]x[0 1]. */
-  float sx = fx + dx;
-  float sy = fy + dy;
-  sx = sx - floorf(sx);
-  sy = sy - floorf(sy);
+  /* Only jitter within the grid cells. */
+  fx = fx + dx * (1.0f / NUM_PMJ_DIVISIONS);
+  fy = fy + dy * (1.0f / NUM_PMJ_DIVISIONS);
+  fx = fx - floorf(fx);
+  fy = fy - floorf(fy);
 #else
 #  warning "Not using Cranley Patterson Rotation."
 #endif

-  (*x) = sx;
-  (*y) = sy;
+  (*x) = fx;
+  (*y) = fy;
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -186,8 +186,8 @@ ccl_device_inline float _shader_bsdf_multi_eval(const KernelGlobals *kg,
                                                float sum_sample_weight,
                                                const uint light_shader_flags)
 {
-  /* This is the veach one-sample model with balance heuristic,
-   * some PDF factors drop out when using balance heuristic weighting. */
+  /* this is the veach one-sample model with balance heuristic, some pdf
+   * factors drop out when using balance heuristic weighting */
  for (int i = 0; i < sd->num_closure; i++) {
    const ShaderClosure *sc = &sd->closure[i];

--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -360,6 +360,7 @@ typedef enum PassType {
  PASS_MATERIAL_ID,
  PASS_MOTION,
  PASS_MOTION_WEIGHT,
+  PASS_RENDER_TIME,
  PASS_CRYPTOMATTE,
  PASS_AOV_COLOR,
  PASS_AOV_VALUE,
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -110,7 +110,6 @@ ustring OSLRenderServices::u_curve_thickness("geom:curve_thickness");
 ustring OSLRenderServices::u_curve_length("geom:curve_length");
 ustring OSLRenderServices::u_curve_tangent_normal("geom:curve_tangent_normal");
 ustring OSLRenderServices::u_curve_random("geom:curve_random");
-ustring OSLRenderServices::u_normal_map_normal("geom:normal_map_normal");
 ustring OSLRenderServices::u_path_ray_length("path:ray_length");
 ustring OSLRenderServices::u_path_ray_depth("path:ray_depth");
 ustring OSLRenderServices::u_path_diffuse_depth("path:diffuse_depth");
@@ -986,18 +985,8 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobals *kg,
    float3 f = curve_tangent_normal(kg, sd);
    return set_attribute_float3(f, type, derivatives, val);
  }
-  else if (name == u_normal_map_normal) {
-    if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
-      float3 f = triangle_smooth_normal_unnormalized(kg, sd, sd->Ng, sd->prim, sd->u, sd->v);
-      return set_attribute_float3(f, type, derivatives, val);
-    }
-    else {
-      return false;
-    }
-  }
-  else {
+  else
    return false;
-  }
 }

 bool OSLRenderServices::get_background_attribute(const KernelGlobals *kg,
--- a/intern/cycles/kernel/osl/osl_services.h
+++ b/intern/cycles/kernel/osl/osl_services.h
@@ -297,7 +297,6 @@ class OSLRenderServices : public OSL::RendererServices {
  static ustring u_curve_length;
  static ustring u_curve_tangent_normal;
  static ustring u_curve_random;
-  static ustring u_normal_map_normal;
  static ustring u_path_ray_length;
  static ustring u_path_ray_depth;
  static ustring u_path_diffuse_depth;
--- a/intern/cycles/kernel/shaders/CMakeLists.txt
+++ b/intern/cycles/kernel/shaders/CMakeLists.txt
@@ -41,7 +41,6 @@ set(SRC_OSL
  node_vector_displacement.osl
  node_emission.osl
  node_environment_texture.osl
-  node_float_curve.osl
  node_fresnel.osl
  node_gamma.osl
  node_geometry.osl
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Jacques Lucke	3bdd415254	realize non-standard virtual arrays	2021-09-26 15:07:48 +02:00
Jacques Lucke	46fe816fc6	use threads	2021-09-26 15:07:27 +02:00
Jacques Lucke	2dbbbc7e85	add slice method	2021-09-26 14:52:12 +02:00
Jacques Lucke	34ed0de287	improve	2021-09-26 14:11:42 +02:00
Jacques Lucke	3bcb30b13b	cleanup	2021-09-26 13:39:27 +02:00
Jacques Lucke	f55023c82a	progress	2021-09-26 13:23:09 +02:00
Jacques Lucke	d00c68fc7b	progress	2021-09-25 16:26:56 +02:00
Jacques Lucke	ab36a7de75	progress	2021-09-25 15:59:20 +02:00
Jacques Lucke	42d3210681	progress	2021-09-25 15:35:43 +02:00
Jacques Lucke	318e2dd00f	progress	2021-09-25 15:22:07 +02:00
Jacques Lucke	8fdc78678a	progress	2021-09-25 15:10:42 +02:00
Jacques Lucke	ad114f806b	progress	2021-09-25 15:01:09 +02:00
Jacques Lucke	02d9d13a83	initial commit	2021-09-25 14:37:12 +02:00