initial support for node groups

cleanup
improve visualization
2021-09-20 18:08:47 +02:00 · 2021-09-20 17:18:39 +02:00 · 2021-09-20 16:58:15 +02:00 · 2021-09-20 13:30:34 +02:00 · 2021-09-17 13:37:21 +02:00 · 2021-09-17 13:35:54 +02:00
1473 changed files with 53727 additions and 108513 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -403,7 +403,7 @@ option(WITH_CYCLES_CUDA_BINARIES    "Build Cycles CUDA binaries" OFF)
 option(WITH_CYCLES_CUBIN_COMPILER   "Build cubins with nvrtc based compiler instead of nvcc" OFF)
 option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF)
 mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
-set(CYCLES_TEST_DEVICES CPU CACHE STRING "Run regression tests on the specified device types (CPU CUDA OPTIX)" )
+set(CYCLES_TEST_DEVICES CPU CACHE STRING "Run regression tests on the specified device types (CPU CUDA OPTIX OPENCL)" )
 set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for")
 mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
 unset(PLATFORM_DEFAULT)
@@ -418,10 +418,12 @@ mark_as_advanced(WITH_CYCLES_DEBUG_NAN)
 mark_as_advanced(WITH_CYCLES_NATIVE_ONLY)

 option(WITH_CYCLES_DEVICE_CUDA              "Enable Cycles CUDA compute support" ON)
-option(WITH_CYCLES_DEVICE_OPTIX             "Enable Cycles OptiX support" ON)
-option(WITH_CYCLES_DEVICE_HIP               "Enable Cycles HIP support" OFF)
-mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
+option(WITH_CYCLES_DEVICE_OPTIX             "Enable Cycles OptiX support" OFF)
+option(WITH_CYCLES_DEVICE_OPENCL            "Enable Cycles OpenCL compute support" ON)
+option(WITH_CYCLES_NETWORK              "Enable Cycles compute over network support (EXPERIMENTAL and unfinished)" OFF)
 mark_as_advanced(WITH_CYCLES_DEVICE_CUDA)
+mark_as_advanced(WITH_CYCLES_DEVICE_OPENCL)
+mark_as_advanced(WITH_CYCLES_NETWORK)

 option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime" ON)
 mark_as_advanced(WITH_CUDA_DYNLOAD)
@@ -823,11 +825,6 @@ if(NOT WITH_CUDA_DYNLOAD)
  endif()
 endif()

-if(WITH_CYCLES_DEVICE_HIP)
-  # Currently HIP must be dynamically loaded, this may change in future toolkits
-  set(WITH_HIP_DYNLOAD ON)
-endif()
-
 #-----------------------------------------------------------------------------
 # Check check if submodules are cloned

@@ -1857,9 +1854,6 @@ elseif(WITH_CYCLES_STANDALONE)
  if(WITH_CUDA_DYNLOAD)
    add_subdirectory(extern/cuew)
  endif()
-  if(WITH_HIP_DYNLOAD)
-    add_subdirectory(extern/hipew)
-  endif()
  if(NOT WITH_SYSTEM_GLEW)
    add_subdirectory(extern/glew)
  endif()
--- a/build_files/build_environment/patches/ffmpeg.diff
+++ b/build_files/build_environment/patches/ffmpeg.diff
@@ -70,18 +70,16 @@
 }
 --- a/libavcodec/rl.c
 +++ b/libavcodec/rl.c
-@@ -71,17 +71,19 @@
+@@ -71,7 +71,7 @@ av_cold void ff_rl_init(RLTable *rl,
 av_cold void ff_rl_init_vlc(RLTable *rl, unsigned static_size)
 {
     int i, q;
 -    VLC_TYPE table[1500][2] = {{0}};
 +    VLC_TYPE (*table)[2] = av_calloc(sizeof(VLC_TYPE), 1500 * 2);
     VLC vlc = { .table = table, .table_allocated = static_size };
-    av_assert0(static_size <= FF_ARRAY_ELEMS(table));
-+    av_assert0(static_size < 1500);
+     av_assert0(static_size <= FF_ARRAY_ELEMS(table));
     init_vlc(&vlc, 9, rl->n + 1, &rl->table_vlc[0][1], 4, 2, &rl->table_vlc[0][0], 4, 2, INIT_VLC_USE_NEW_STATIC);
- 
-     for (q = 0; q < 32; q++) {
+@@ -80,8 +80,10 @@ av_cold void ff_rl_init_vlc(RLTable *rl, unsigned static_size)
         int qmul = q * 2;
         int qadd = (q - 1) | 1;
 
@@ -93,7 +91,7 @@
 
         if (q == 0) {
             qmul = 1;
-@@ -113,4 +115,5 @@
+@@ -113,4 +115,5 @@ av_cold void ff_rl_init_vlc(RLTable *rl, unsigned static_size)
             rl->rl_vlc[q][i].run   = run;
         }
     }
--- a/build_files/cmake/Modules/FindOptiX.cmake
+++ b/build_files/cmake/Modules/FindOptiX.cmake
@@ -33,23 +33,11 @@ FIND_PATH(OPTIX_INCLUDE_DIR
    include
 )

-IF(EXISTS "${OPTIX_INCLUDE_DIR}/optix.h")
-  FILE(STRINGS "${OPTIX_INCLUDE_DIR}/optix.h" _optix_version REGEX "^#define OPTIX_VERSION[ \t].*$")
-  STRING(REGEX MATCHALL "[0-9]+" _optix_version ${_optix_version})
-
-  MATH(EXPR _optix_version_major "${_optix_version} / 10000")
-  MATH(EXPR _optix_version_minor "(${_optix_version} % 10000) / 100")
-  MATH(EXPR _optix_version_patch "${_optix_version} % 100")
-
-  SET(OPTIX_VERSION "${_optix_version_major}.${_optix_version_minor}.${_optix_version_patch}")
-ENDIF()
-
 # handle the QUIETLY and REQUIRED arguments and set OPTIX_FOUND to TRUE if
 # all listed variables are TRUE
 INCLUDE(FindPackageHandleStandardArgs)
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(OptiX
-    REQUIRED_VARS OPTIX_INCLUDE_DIR
-    VERSION_VAR OPTIX_VERSION)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(OptiX DEFAULT_MSG
+    OPTIX_INCLUDE_DIR)

 IF(OPTIX_FOUND)
  SET(OPTIX_INCLUDE_DIRS ${OPTIX_INCLUDE_DIR})
@@ -57,7 +45,6 @@ ENDIF()

 MARK_AS_ADVANCED(
  OPTIX_INCLUDE_DIR
-  OPTIX_VERSION
 )

 UNSET(_optix_SEARCH_DIRS)
--- a/build_files/cmake/cmake_static_check_cppcheck.py
+++ b/build_files/cmake/cmake_static_check_cppcheck.py
@@ -24,7 +24,6 @@ import project_source_info
 import subprocess
 import sys
 import os
-import tempfile

 from typing import (
    Any,
@@ -36,6 +35,7 @@ USE_QUIET = (os.environ.get("QUIET", None) is not None)

 CHECKER_IGNORE_PREFIX = [
    "extern",
+    "intern/moto",
 ]

 CHECKER_BIN = "cppcheck"
@@ -47,19 +47,13 @@ CHECKER_ARGS = [
    "--max-configs=1",  # speeds up execution
    #  "--check-config", # when includes are missing
    "--enable=all",  # if you want sixty hundred pedantic suggestions
-
-    # Quiet output, otherwise all defines/includes are printed (overly verbose).
-    # Only enable this for troubleshooting (if defines are not set as expected for example).
-    "--quiet",
-
-    # NOTE: `--cppcheck-build-dir=<dir>` is added later as a temporary directory.
 ]

 if USE_QUIET:
    CHECKER_ARGS.append("--quiet")


-def cppcheck() -> None:
+def main() -> None:
    source_info = project_source_info.build_info(ignore_prefix_list=CHECKER_IGNORE_PREFIX)
    source_defines = project_source_info.build_defines_as_args()

@@ -84,10 +78,7 @@ def cppcheck() -> None:
            percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"

            sys.stdout.flush()
-            sys.stdout.write("%s %s\n" % (
-                percent_str,
-                os.path.relpath(c, project_source_info.SOURCE_DIR)
-            ))
+            sys.stdout.write("%s " % percent_str)

        return subprocess.Popen(cmd)

@@ -99,11 +90,5 @@ def cppcheck() -> None:
    print("Finished!")


-def main() -> None:
-    with tempfile.TemporaryDirectory() as temp_dir:
-        CHECKER_ARGS.append("--cppcheck-build-dir=" + temp_dir)
-        cppcheck()
-
-
 if __name__ == "__main__":
    main()
--- a/build_files/cmake/project_source_info.py
+++ b/build_files/cmake/project_source_info.py
@@ -243,9 +243,7 @@ def build_defines_as_args() -> List[str]:
 # use this module.
 def queue_processes(
        process_funcs: Sequence[Tuple[Callable[..., subprocess.Popen[Any]], Tuple[Any, ...]]],
-        *,
        job_total: int =-1,
-        sleep: float = 0.1,
 ) -> None:
    """ Takes a list of function arg pairs, each function must return a process
    """
@@ -273,20 +271,14 @@ def queue_processes(

                if len(processes) <= job_total:
                    break
-                time.sleep(sleep)
+                else:
+                    time.sleep(0.1)

            sys.stdout.flush()
            sys.stderr.flush()

            processes.append(func(*args))

-        # Don't return until all jobs have finished.
-        while 1:
-            processes[:] = [p for p in processes if p.poll() is None]
-            if not processes:
-                break
-            time.sleep(sleep)
-

 def main() -> None:
    if not os.path.exists(join(CMAKE_DIR, "CMakeCache.txt")):
--- a/build_files/config/pipeline_config.yaml
+++ b/build_files/config/pipeline_config.yaml
@@ -55,7 +55,7 @@ buildbot:
    cuda11:
        version: '11.4.1'
    optix:
-        version: '7.3.0'
+        version: '7.1.0'
    cmake:
        default:
            version: any
--- a/build_files/utils/make_update.py
+++ b/build_files/utils/make_update.py
@@ -200,20 +200,16 @@ def submodules_update(args, release_version, branch):
            if msg:
                skip_msg += submodule_path + " skipped: " + msg + "\n"
            else:
-                # Find a matching branch that exists.
-                call([args.git_command, "fetch", "origin"])
-                if make_utils.git_branch_exists(args.git_command, submodule_branch):
-                    pass
-                elif make_utils.git_branch_exists(args.git_command, submodule_branch_fallback):
-                    submodule_branch = submodule_branch_fallback
-                else:
-                    submodule_branch = None
-
-                # Switch to branch and pull.
-                if submodule_branch:
-                    if make_utils.git_branch(args.git_command) != submodule_branch:
-                        call([args.git_command, "checkout", submodule_branch])
-                    call([args.git_command, "pull", "--rebase", "origin", submodule_branch])
+                # We are using `exit_on_error=False` here because sub-modules are allowed to not have requested branch,
+                # in which case falling back to default back-up branch is fine.
+                if make_utils.git_branch(args.git_command) != submodule_branch:
+                    call([args.git_command, "fetch", "origin"])
+                    call([args.git_command, "checkout", submodule_branch], exit_on_error=False)
+                call([args.git_command, "pull", "--rebase", "origin", submodule_branch], exit_on_error=False)
+                # If we cannot find the specified branch for this submodule, fallback to default one (aka master).
+                if make_utils.git_branch(args.git_command) != submodule_branch:
+                    call([args.git_command, "checkout", submodule_branch_fallback])
+                    call([args.git_command, "pull", "--rebase", "origin", submodule_branch_fallback])
        finally:
            os.chdir(cwd)

--- a/build_files/utils/make_utils.py
+++ b/build_files/utils/make_utils.py
@@ -8,19 +8,14 @@ import subprocess
 import sys


-def call(cmd, exit_on_error=True, silent=False):
-    if not silent:
-        print(" ".join(cmd))
+def call(cmd, exit_on_error=True):
+    print(" ".join(cmd))

    # Flush to ensure correct order output on Windows.
    sys.stdout.flush()
    sys.stderr.flush()

-    if silent:
-        retcode = subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-    else:
-        retcode = subprocess.call(cmd)
-
+    retcode = subprocess.call(cmd)
    if exit_on_error and retcode != 0:
        sys.exit(retcode)
    return retcode
@@ -43,11 +38,6 @@ def check_output(cmd, exit_on_error=True):
    return output.strip()


-def git_branch_exists(git_command, branch):
-    return call([git_command, "rev-parse", "--verify", branch], exit_on_error=False, silent=True) == 0 or \
-           call([git_command, "rev-parse", "--verify", "remotes/origin/" + branch], exit_on_error=False, silent=True) == 0
-
-
 def git_branch(git_command):
    # Get current branch name.
    try:
--- a/doc/python_api/examples/bpy.types.Bone.convert_local_to_pose.py
+++ b/doc/python_api/examples/bpy.types.Bone.convert_local_to_pose.py
@@ -1,40 +0,0 @@
-"""
-This method enables conversions between Local and Pose space for bones in
-the middle of updating the armature without having to update dependencies
-after each change, by manually carrying updated matrices in a recursive walk.
-"""
-
-def set_pose_matrices(obj, matrix_map):
-    "Assign pose space matrices of all bones at once, ignoring constraints."
-
-    def rec(pbone, parent_matrix):
-        matrix = matrix_map[pbone.name]
-
-        ## Instead of:
-        # pbone.matrix = matrix
-        # bpy.context.view_layer.update()
-
-        # Compute and assign local matrix, using the new parent matrix
-        if pbone.parent:
-            pbone.matrix_basis = pbone.bone.convert_local_to_pose(
-                matrix,
-                pbone.bone.matrix_local,
-                parent_matrix=parent_matrix,
-                parent_matrix_local=pbone.parent.bone.matrix_local,
-                invert=True
-            )
-        else:
-            pbone.matrix_basis = pbone.bone.convert_local_to_pose(
-                matrix,
-                pbone.bone.matrix_local,
-                invert=True
-            )
-
-        # Recursively process children, passing the new matrix through
-        for child in pbone.children:
-            rec(child, matrix)
-
-    # Scan all bone trees from their roots
-    for pbone in obj.pose.bones:
-        if not pbone.parent:
-            rec(pbone, None)
--- a/doc/python_api/sphinx_doc_gen.py
+++ b/doc/python_api/sphinx_doc_gen.py
@@ -1101,7 +1101,6 @@ context_type_map = {
    "scene": ("Scene", False),
    "sculpt_object": ("Object", False),
    "selectable_objects": ("Object", True),
-    "selected_asset_files": ("FileSelectEntry", True),
    "selected_bones": ("EditBone", True),
    "selected_editable_bones": ("EditBone", True),
    "selected_editable_fcurves": ("FCurve", True),
--- a/extern/CMakeLists.txt
+++ b/extern/CMakeLists.txt
@@ -67,12 +67,9 @@ endif()

 if(WITH_CYCLES OR WITH_COMPOSITOR OR WITH_OPENSUBDIV)
  add_subdirectory(clew)
-  if((WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX) AND WITH_CUDA_DYNLOAD)
+  if(WITH_CUDA_DYNLOAD)
    add_subdirectory(cuew)
  endif()
-  if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
-    add_subdirectory(hipew)
-  endif()
 endif()

 if(WITH_GHOST_X11 AND WITH_GHOST_XDND)
--- a/extern/audaspace/CMakeLists.txt
+++ b/extern/audaspace/CMakeLists.txt
@@ -129,7 +129,6 @@ set(SRC
 	src/util/Barrier.cpp
 	src/util/Buffer.cpp
 	src/util/BufferReader.cpp
-	src/util/RingBuffer.cpp
 	src/util/StreamBuffer.cpp
 	src/util/ThreadPool.cpp
 )
@@ -246,7 +245,6 @@ set(PUBLIC_HDR
 	include/util/BufferReader.h
 	include/util/ILockable.h
 	include/util/Math3D.h
-	include/util/RingBuffer.h
 	include/util/StreamBuffer.h
 	include/util/ThreadPool.h
 )
--- a/extern/audaspace/bindings/C/AUD_Sound.cpp
+++ b/extern/audaspace/bindings/C/AUD_Sound.cpp
@@ -102,30 +102,26 @@ AUD_API int AUD_Sound_getFileStreams(AUD_Sound* sound, AUD_StreamInfo **stream_i

 	if(file)
 	{
-		try
-		{
-			auto streams = file->queryStreams();
-
-			size_t size = sizeof(AUD_StreamInfo) * streams.size();
-
-			if(!size)
-			{
-				*stream_infos = nullptr;
-				return 0;
-			}
-
-			*stream_infos = reinterpret_cast<AUD_StreamInfo*>(std::malloc(size));
-			std::memcpy(*stream_infos, streams.data(), size);
-
-			return streams.size();
-		}
-		catch(Exception&)
+		auto streams = file->queryStreams();
+
+		size_t size = sizeof(AUD_StreamInfo) * streams.size();
+
+		if(!size)
 		{
+			*stream_infos = nullptr;
+			return 0;
 		}
+
+		*stream_infos = reinterpret_cast<AUD_StreamInfo*>(std::malloc(size));
+		std::memcpy(*stream_infos, streams.data(), size);
+
+		return streams.size();
+	}
+	else
+	{
+		*stream_infos = nullptr;
+		return 0;
 	}
-
-	*stream_infos = nullptr;
-	return 0;
 }

 AUD_API sample_t* AUD_Sound_data(AUD_Sound* sound, int* length, AUD_Specs* specs)
--- a/extern/audaspace/include/util/RingBuffer.h
+++ b/extern/audaspace/include/util/RingBuffer.h
@@ -1,97 +0,0 @@
-/*******************************************************************************
- * Copyright 2009-2021 Jörg Müller
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-#pragma once
-
-/**
- * @file RingBuffer.h
- * @ingroup util
- * The RingBuffer class.
- */
-
-#include "Audaspace.h"
-#include "Buffer.h"
-
-#include <cstddef>
-
-AUD_NAMESPACE_BEGIN
-
-/**
- * This class is a simple ring buffer in RAM which is 32 Byte aligned and provides
- * functionality for concurrent reading and writting without locks.
- */
-class AUD_API RingBuffer
-{
-private:
-	/// The buffer storing the actual data.
-	Buffer m_buffer;
-
-	/// The reading pointer.
-	volatile size_t m_read;
-
-	/// The writing pointer.
-	volatile size_t m_write;
-
-	// delete copy constructor and operator=
-	RingBuffer(const RingBuffer&) = delete;
-	RingBuffer& operator=(const RingBuffer&) = delete;
-
-public:
-	/**
-	 * Creates a new ring buffer.
-	 * \param size The size of the buffer in bytes.
-	 */
-	RingBuffer(int size = 0);
-
-	/**
-	 * Returns the pointer to the ring buffer in memory.
-	 */
-	sample_t* getBuffer() const;
-
-	/**
-	 * Returns the size of the ring buffer in bytes.
-	 */
-	int getSize() const;
-
-	size_t getReadSize() const;
-
-	size_t getWriteSize() const;
-
-	size_t read(data_t* target, size_t size);
-
-	size_t write(data_t* source, size_t size);
-
-	/**
-	 * Resets the ring buffer to a state where nothing has been written or read.
-	 */
-	void reset();
-
-	/**
-	 * Resizes the ring buffer.
-	 * \param size The new size of the ring buffer, measured in bytes.
-	 */
-	void resize(int size);
-
-	/**
-	 * Makes sure the ring buffer has a minimum size.
-	 * If size is >= current size, nothing will happen.
-	 * Otherwise the ring buffer is resized with keep as parameter.
-	 * \param size The new minimum size of the ring buffer, measured in bytes.
-	 */
-	void assureSize(int size);
-};
-
-AUD_NAMESPACE_END
--- a/extern/audaspace/plugins/pulseaudio/PulseAudioDevice.cpp
+++ b/extern/audaspace/plugins/pulseaudio/PulseAudioDevice.cpp
@@ -23,121 +23,95 @@

 AUD_NAMESPACE_BEGIN

-PulseAudioDevice::PulseAudioSynchronizer::PulseAudioSynchronizer(PulseAudioDevice *device) :
-	m_device(device)
-{
-}
-
-double PulseAudioDevice::PulseAudioSynchronizer::getPosition(std::shared_ptr<IHandle> handle)
-{
-	pa_usec_t latency;
-	int negative;
-	AUD_pa_stream_get_latency(m_device->m_stream, &latency, &negative);
-
-	double delay = m_device->m_ring_buffer.getReadSize() / (AUD_SAMPLE_SIZE(m_device->m_specs) * m_device->m_specs.rate) + latency * 1.0e-6;
-
-	return handle->getPosition() - delay;
-}
-
-void PulseAudioDevice::updateRingBuffer()
-{
-	unsigned int samplesize = AUD_SAMPLE_SIZE(m_specs);
-
-	std::unique_lock<std::mutex> lock(m_mixingLock);
-
-	Buffer buffer;
-
-	while(m_valid)
-	{
-		size_t size = m_ring_buffer.getWriteSize();
-
-		size_t sample_count = size / samplesize;
-
-		if(sample_count > 0)
-		{
-			size = sample_count * samplesize;
-
-			buffer.assureSize(size);
-
-			mix(reinterpret_cast<data_t*>(buffer.getBuffer()), sample_count);
-
-			m_ring_buffer.write(reinterpret_cast<data_t*>(buffer.getBuffer()), size);
-		}
-
-		m_mixingCondition.wait(lock);
-	}
-}
-
 void PulseAudioDevice::PulseAudio_state_callback(pa_context *context, void *data)
 {
 	PulseAudioDevice* device = (PulseAudioDevice*)data;

-	device->m_state = AUD_pa_context_get_state(context);
+	std::lock_guard<ILockable> lock(*device);

-	AUD_pa_threaded_mainloop_signal(device->m_mainloop, 0);
+	device->m_state = AUD_pa_context_get_state(context);
 }

 void PulseAudioDevice::PulseAudio_request(pa_stream *stream, size_t total_bytes, void *data)
 {
 	PulseAudioDevice* device = (PulseAudioDevice*)data;

-	data_t* buffer;
-
-	size_t sample_size = AUD_DEVICE_SAMPLE_SIZE(device->m_specs);
+	void* buffer;

 	while(total_bytes > 0)
 	{
 		size_t num_bytes = total_bytes;

-		AUD_pa_stream_begin_write(stream, reinterpret_cast<void**>(&buffer), &num_bytes);
+		AUD_pa_stream_begin_write(stream, &buffer, &num_bytes);

-		size_t readsamples = device->m_ring_buffer.getReadSize();
+		device->mix((data_t*)buffer, num_bytes / AUD_DEVICE_SAMPLE_SIZE(device->m_specs));

-		readsamples = std::min(readsamples, size_t(num_bytes)) / sample_size;
-
-		device->m_ring_buffer.read(buffer, readsamples * sample_size);
-
-		if(readsamples * sample_size < num_bytes)
-			std::memset(buffer + readsamples * sample_size, 0, num_bytes - readsamples * sample_size);
-
-		if(device->m_mixingLock.try_lock())
-		{
-			device->m_mixingCondition.notify_all();
-			device->m_mixingLock.unlock();
-		}
-
-		AUD_pa_stream_write(stream, reinterpret_cast<void*>(buffer), num_bytes, nullptr, 0, PA_SEEK_RELATIVE);
+		AUD_pa_stream_write(stream, buffer, num_bytes, nullptr, 0, PA_SEEK_RELATIVE);

 		total_bytes -= num_bytes;
 	}
 }

-void PulseAudioDevice::playing(bool playing)
+void PulseAudioDevice::PulseAudio_underflow(pa_stream *stream, void *data)
 {
-	m_playback = playing;
+	PulseAudioDevice* device = (PulseAudioDevice*)data;

-	AUD_pa_threaded_mainloop_lock(m_mainloop);
-	AUD_pa_stream_cork(m_stream, playing ? 0 : 1, nullptr, nullptr);
-	AUD_pa_threaded_mainloop_unlock(m_mainloop);
+	DeviceSpecs specs = device->getSpecs();
+
+	if(++device->m_underflows > 4 && device->m_buffersize < AUD_DEVICE_SAMPLE_SIZE(specs) * specs.rate * 2)
+	{
+		device->m_buffersize <<= 1;
+		device->m_underflows = 0;
+
+		pa_buffer_attr buffer_attr;
+
+		buffer_attr.fragsize = -1U;
+		buffer_attr.maxlength = -1U;
+		buffer_attr.minreq = -1U;
+		buffer_attr.prebuf = -1U;
+		buffer_attr.tlength = device->m_buffersize;
+
+		AUD_pa_stream_set_buffer_attr(stream, &buffer_attr, nullptr, nullptr);
+	}
+}
+
+void PulseAudioDevice::runMixingThread()
+{
+	for(;;)
+	{
+		{
+			std::lock_guard<ILockable> lock(*this);
+
+			if(shouldStop())
+			{
+				AUD_pa_stream_cork(m_stream, 1, nullptr, nullptr);
+				AUD_pa_stream_flush(m_stream, nullptr, nullptr);
+				doStop();
+				return;
+			}
+		}
+
+		if(AUD_pa_stream_is_corked(m_stream))
+			AUD_pa_stream_cork(m_stream, 0, nullptr, nullptr);
+
+		// similar to AUD_pa_mainloop_iterate(m_mainloop, false, nullptr); except with a longer timeout
+		AUD_pa_mainloop_prepare(m_mainloop, 1 << 14);
+		AUD_pa_mainloop_poll(m_mainloop);
+		AUD_pa_mainloop_dispatch(m_mainloop);
+	}
 }

 PulseAudioDevice::PulseAudioDevice(std::string name, DeviceSpecs specs, int buffersize) :
-	m_synchronizer(this),
-	m_playback(false),
 	m_state(PA_CONTEXT_UNCONNECTED),
-	m_valid(true),
 	m_underflows(0)
 {
-	m_mainloop = AUD_pa_threaded_mainloop_new();
+	m_mainloop = AUD_pa_mainloop_new();

-	AUD_pa_threaded_mainloop_lock(m_mainloop);
-
-	m_context = AUD_pa_context_new(AUD_pa_threaded_mainloop_get_api(m_mainloop), name.c_str());
+	m_context = AUD_pa_context_new(AUD_pa_mainloop_get_api(m_mainloop), name.c_str());

 	if(!m_context)
 	{
-		AUD_pa_threaded_mainloop_unlock(m_mainloop);
-		AUD_pa_threaded_mainloop_free(m_mainloop);
+		AUD_pa_mainloop_free(m_mainloop);

 		AUD_THROW(DeviceException, "Could not connect to PulseAudio.");
 	}
@@ -146,26 +120,21 @@ PulseAudioDevice::PulseAudioDevice(std::string name, DeviceSpecs specs, int buff

 	AUD_pa_context_connect(m_context, nullptr, PA_CONTEXT_NOFLAGS, nullptr);

-	AUD_pa_threaded_mainloop_start(m_mainloop);
-
 	while(m_state != PA_CONTEXT_READY)
 	{
 		switch(m_state)
 		{
 		case PA_CONTEXT_FAILED:
 		case PA_CONTEXT_TERMINATED:
-			AUD_pa_threaded_mainloop_unlock(m_mainloop);
-			AUD_pa_threaded_mainloop_stop(m_mainloop);
-
 			AUD_pa_context_disconnect(m_context);
 			AUD_pa_context_unref(m_context);

-			AUD_pa_threaded_mainloop_free(m_mainloop);
+			AUD_pa_mainloop_free(m_mainloop);

 			AUD_THROW(DeviceException, "Could not connect to PulseAudio.");
 			break;
 		default:
-			AUD_pa_threaded_mainloop_wait(m_mainloop);
+			AUD_pa_mainloop_iterate(m_mainloop, true, nullptr);
 			break;
 		}
 	}
@@ -213,18 +182,16 @@ PulseAudioDevice::PulseAudioDevice(std::string name, DeviceSpecs specs, int buff

 	if(!m_stream)
 	{
-		AUD_pa_threaded_mainloop_unlock(m_mainloop);
-		AUD_pa_threaded_mainloop_stop(m_mainloop);
-
 		AUD_pa_context_disconnect(m_context);
 		AUD_pa_context_unref(m_context);

-		AUD_pa_threaded_mainloop_free(m_mainloop);
+		AUD_pa_mainloop_free(m_mainloop);

 		AUD_THROW(DeviceException, "Could not create PulseAudio stream.");
 	}

 	AUD_pa_stream_set_write_callback(m_stream, PulseAudio_request, this);
+	AUD_pa_stream_set_underflow_callback(m_stream, PulseAudio_underflow, this);

 	buffersize *= AUD_DEVICE_SAMPLE_SIZE(m_specs);
 	m_buffersize = buffersize;
@@ -237,53 +204,31 @@ PulseAudioDevice::PulseAudioDevice(std::string name, DeviceSpecs specs, int buff
 	buffer_attr.prebuf = -1U;
 	buffer_attr.tlength = buffersize;

-	m_ring_buffer.resize(buffersize);
-
-	if(AUD_pa_stream_connect_playback(m_stream, nullptr, &buffer_attr, static_cast<pa_stream_flags_t>(PA_STREAM_INTERPOLATE_TIMING | PA_STREAM_ADJUST_LATENCY | PA_STREAM_AUTO_TIMING_UPDATE), nullptr, nullptr) < 0)
+	if(AUD_pa_stream_connect_playback(m_stream, nullptr, &buffer_attr, static_cast<pa_stream_flags_t>(PA_STREAM_START_CORKED | PA_STREAM_INTERPOLATE_TIMING | PA_STREAM_ADJUST_LATENCY | PA_STREAM_AUTO_TIMING_UPDATE), nullptr, nullptr) < 0)
 	{
-		AUD_pa_threaded_mainloop_unlock(m_mainloop);
-		AUD_pa_threaded_mainloop_stop(m_mainloop);
-
 		AUD_pa_context_disconnect(m_context);
 		AUD_pa_context_unref(m_context);

-		AUD_pa_threaded_mainloop_free(m_mainloop);
+		AUD_pa_mainloop_free(m_mainloop);

 		AUD_THROW(DeviceException, "Could not connect PulseAudio stream.");
 	}

-	AUD_pa_threaded_mainloop_unlock(m_mainloop);
-
 	create();
-
-	m_mixingThread = std::thread(&PulseAudioDevice::updateRingBuffer, this);
 }

 PulseAudioDevice::~PulseAudioDevice()
 {
-	m_valid = false;
-
-	m_mixingLock.lock();
-	m_mixingCondition.notify_all();
-	m_mixingLock.unlock();
-
-	m_mixingThread.join();
-
-	AUD_pa_threaded_mainloop_stop(m_mainloop);
+	stopMixingThread();

 	AUD_pa_context_disconnect(m_context);
 	AUD_pa_context_unref(m_context);

-	AUD_pa_threaded_mainloop_free(m_mainloop);
+	AUD_pa_mainloop_free(m_mainloop);

 	destroy();
 }

-ISynchronizer *PulseAudioDevice::getSynchronizer()
-{
-	return &m_synchronizer;
-}
-
 class PulseAudioDeviceFactory : public IDeviceFactory
 {
 private:
--- a/extern/audaspace/plugins/pulseaudio/PulseAudioDevice.h
+++ b/extern/audaspace/plugins/pulseaudio/PulseAudioDevice.h
@@ -26,11 +26,7 @@
 * The PulseAudioDevice class.
 */

-#include "devices/SoftwareDevice.h"
-#include "util/RingBuffer.h"
-
-#include <condition_variable>
-#include <thread>
+#include "devices/ThreadedDevice.h"

 #include <pulse/pulseaudio.h>

@@ -39,65 +35,17 @@ AUD_NAMESPACE_BEGIN
 /**
 * This device plays back through PulseAudio, the simple direct media layer.
 */
-class AUD_PLUGIN_API PulseAudioDevice : public SoftwareDevice
+class AUD_PLUGIN_API PulseAudioDevice : public ThreadedDevice
 {
 private:
-	class PulseAudioSynchronizer : public DefaultSynchronizer
-	{
-		PulseAudioDevice* m_device;
-
-	public:
-		PulseAudioSynchronizer(PulseAudioDevice* device);
-
-		virtual double getPosition(std::shared_ptr<IHandle> handle);
-	};
-
-	/// Synchronizer.
-	PulseAudioSynchronizer m_synchronizer;
-
-	/**
-	 * Whether there is currently playback.
-	 */
-	volatile bool m_playback;
-
-	pa_threaded_mainloop* m_mainloop;
+	pa_mainloop* m_mainloop;
 	pa_context* m_context;
 	pa_stream* m_stream;
 	pa_context_state_t m_state;

-	/**
-	 * The mixing ring buffer.
-	 */
-	RingBuffer m_ring_buffer;
-
-	/**
-	 * Whether the device is valid.
-	 */
-	bool m_valid;
-
 	int m_buffersize;
 	uint32_t m_underflows;

-	/**
-	 * The mixing thread.
-	 */
-	std::thread m_mixingThread;
-
-	/**
-	 * Mutex for mixing.
-	 */
-	std::mutex m_mixingLock;
-
-	/**
-	 * Condition for mixing.
-	 */
-	std::condition_variable m_mixingCondition;
-
-	/**
-	 * Updates the ring buffer.
-	 */
-	AUD_LOCAL void updateRingBuffer();
-
 	/**
 	 * Reports the state of the PulseAudio server connection.
 	 * \param context The PulseAudio context.
@@ -113,13 +61,23 @@ private:
 	 */
 	AUD_LOCAL static void PulseAudio_request(pa_stream* stream, size_t total_bytes, void* data);

+	/**
+	 * Reports an underflow from the PulseAudio server.
+	 * Automatically adjusts the latency if this happens too often.
+	 * @param stream The PulseAudio stream.
+	 * \param data The PulseAudio device.
+	 */
+	AUD_LOCAL static void PulseAudio_underflow(pa_stream* stream, void* data);
+
+	/**
+	 * Streaming thread main function.
+	 */
+	AUD_LOCAL void runMixingThread();
+
 	// delete copy constructor and operator=
 	PulseAudioDevice(const PulseAudioDevice&) = delete;
 	PulseAudioDevice& operator=(const PulseAudioDevice&) = delete;

-protected:
-	virtual void playing(bool playing);
-
 public:
 	/**
 	 * Opens the PulseAudio audio device for playback.
@@ -135,8 +93,6 @@ public:
 	 */
 	virtual ~PulseAudioDevice();

-	virtual ISynchronizer* getSynchronizer();
-
 	/**
 	 * Registers this plugin.
 	 */
--- a/extern/audaspace/plugins/pulseaudio/PulseAudioSymbols.h
+++ b/extern/audaspace/plugins/pulseaudio/PulseAudioSymbols.h
@@ -25,7 +25,6 @@ PULSEAUDIO_SYMBOL(pa_stream_begin_write);
 PULSEAUDIO_SYMBOL(pa_stream_connect_playback);
 PULSEAUDIO_SYMBOL(pa_stream_cork);
 PULSEAUDIO_SYMBOL(pa_stream_flush);
-PULSEAUDIO_SYMBOL(pa_stream_get_latency);
 PULSEAUDIO_SYMBOL(pa_stream_is_corked);
 PULSEAUDIO_SYMBOL(pa_stream_new);
 PULSEAUDIO_SYMBOL(pa_stream_set_buffer_attr);
@@ -40,13 +39,3 @@ PULSEAUDIO_SYMBOL(pa_mainloop_iterate);
 PULSEAUDIO_SYMBOL(pa_mainloop_prepare);
 PULSEAUDIO_SYMBOL(pa_mainloop_poll);
 PULSEAUDIO_SYMBOL(pa_mainloop_dispatch);
-
-PULSEAUDIO_SYMBOL(pa_threaded_mainloop_free);
-PULSEAUDIO_SYMBOL(pa_threaded_mainloop_get_api);
-PULSEAUDIO_SYMBOL(pa_threaded_mainloop_lock);
-PULSEAUDIO_SYMBOL(pa_threaded_mainloop_new);
-PULSEAUDIO_SYMBOL(pa_threaded_mainloop_signal);
-PULSEAUDIO_SYMBOL(pa_threaded_mainloop_start);
-PULSEAUDIO_SYMBOL(pa_threaded_mainloop_stop);
-PULSEAUDIO_SYMBOL(pa_threaded_mainloop_unlock);
-PULSEAUDIO_SYMBOL(pa_threaded_mainloop_wait);
--- a/extern/audaspace/src/util/RingBuffer.cpp
+++ b/extern/audaspace/src/util/RingBuffer.cpp
@@ -1,137 +0,0 @@
-/*******************************************************************************
- * Copyright 2009-2021 Jörg Müller
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-#include "util/RingBuffer.h"
-
-#include <algorithm>
-#include <cstring>
-#include <cstdlib>
-
-#define ALIGNMENT 32
-#define ALIGN(a) (a + ALIGNMENT - ((long long)a & (ALIGNMENT-1)))
-
-AUD_NAMESPACE_BEGIN
-
-RingBuffer::RingBuffer(int size) :
-	m_buffer(size),
-	m_read(0),
-	m_write(0)
-{
-}
-
-sample_t* RingBuffer::getBuffer() const
-{
-	return m_buffer.getBuffer();
-}
-
-int RingBuffer::getSize() const
-{
-	return m_buffer.getSize();
-}
-
-size_t RingBuffer::getReadSize() const
-{
-	size_t read = m_read;
-	size_t write = m_write;
-
-	if(read > write)
-		return write + getSize() - read;
-	else
-		return write - read;
-}
-
-size_t RingBuffer::getWriteSize() const
-{
-	size_t read = m_read;
-	size_t write = m_write;
-
-	if(read > write)
-		return read - write - 1;
-	else
-		return read + getSize() - write - 1;
-}
-
-size_t RingBuffer::read(data_t* target, size_t size)
-{
-	size = std::min(size, getReadSize());
-
-	data_t* buffer = reinterpret_cast<data_t*>(m_buffer.getBuffer());
-
-	if(m_read + size > m_buffer.getSize())
-	{
-		size_t read_first = m_buffer.getSize() - m_read;
-		size_t read_second = size - read_first;
-
-		std::memcpy(target, buffer + m_read, read_first);
-		std::memcpy(target + read_first, buffer, read_second);
-
-		m_read = read_second;
-	}
-	else
-	{
-		std::memcpy(target, buffer + m_read, size);
-
-		m_read += size;
-	}
-
-	return size;
-}
-
-size_t RingBuffer::write(data_t* source, size_t size)
-{
-	size = std::min(size, getWriteSize());
-
-	data_t* buffer = reinterpret_cast<data_t*>(m_buffer.getBuffer());
-
-	if(m_write + size > m_buffer.getSize())
-	{
-		size_t write_first = m_buffer.getSize() - m_write;
-		size_t write_second = size - write_first;
-
-		std::memcpy(buffer + m_write, source, write_first);
-		std::memcpy(buffer, source + write_first, write_second);
-
-		m_write = write_second;
-	}
-	else
-	{
-		std::memcpy(buffer + m_write, source, size);
-
-		m_write += size;
-	}
-
-	return size;
-}
-
-void RingBuffer::reset()
-{
-	m_read = 0;
-	m_write = 0;
-}
-
-void RingBuffer::resize(int size)
-{
-	m_buffer.resize(size);
-	reset();
-}
-
-void RingBuffer::assureSize(int size)
-{
-	m_buffer.assureSize(size);
-	reset();
-}
-
-AUD_NAMESPACE_END
--- a/extern/cuew/include/cuew.h
+++ b/extern/cuew/include/cuew.h
@@ -645,8 +645,7 @@ typedef enum CUdevice_P2PAttribute_enum {
  CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 0x01,
  CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 0x02,
  CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 0x03,
-  CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED = 0x04,
-  CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED = 0x04,
+  CU_DEVICE_P2P_ATTRIBUTE_ARRAY_ACCESS_ACCESS_SUPPORTED = 0x04,
 } CUdevice_P2PAttribute;

 typedef void (CUDA_CB *CUstreamCallback)(CUstream hStream, CUresult status, void* userData);
--- a/extern/hipew/CMakeLists.txt
+++ b/extern/hipew/CMakeLists.txt
@@ -1,39 +0,0 @@
-# ***** BEGIN GPL LICENSE BLOCK *****
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-#
-# The Original Code is Copyright (C) 2021, Blender Foundation
-# All rights reserved.
-# ***** END GPL LICENSE BLOCK *****
-
-set(INC
-  .
-  include
-)
-
-set(INC_SYS
-
-)
-
-set(SRC
-  src/hipew.c
-
-  include/hipew.h
-)
-
-set(LIB
-)
-
-blender_add_lib(extern_hipew "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
--- a/extern/hipew/include/hipew.h
+++ b/extern/hipew/include/hipew.h
--- a/extern/hipew/src/hipew.c
+++ b/extern/hipew/src/hipew.c
@@ -1,533 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License
- */
-#ifdef _MSC_VER
-#  if _MSC_VER < 1900
-#    define snprintf _snprintf
-#  endif
-#  define popen _popen
-#  define pclose _pclose
-#  define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <hipew.h>
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/stat.h>
-
-#ifdef _WIN32
-#  define WIN32_LEAN_AND_MEAN
-#  define VC_EXTRALEAN
-#  include <windows.h>
-
-/* Utility macros. */
-
-typedef HMODULE DynamicLibrary;
-
-#  define dynamic_library_open(path)         LoadLibraryA(path)
-#  define dynamic_library_close(lib)         FreeLibrary(lib)
-#  define dynamic_library_find(lib, symbol)  GetProcAddress(lib, symbol)
-#else
-#  include <dlfcn.h>
-
-typedef void* DynamicLibrary;
-
-#  define dynamic_library_open(path)         dlopen(path, RTLD_NOW)
-#  define dynamic_library_close(lib)         dlclose(lib)
-#  define dynamic_library_find(lib, symbol)  dlsym(lib, symbol)
-#endif
-
-#define _LIBRARY_FIND_CHECKED(lib, name) \
-        name = (t##name *)dynamic_library_find(lib, #name); \
-        assert(name);
-
-#define _LIBRARY_FIND(lib, name) \
-        name = (t##name *)dynamic_library_find(lib, #name);
-
-#define HIP_LIBRARY_FIND_CHECKED(name) \
-        _LIBRARY_FIND_CHECKED(hip_lib, name)
-#define HIP_LIBRARY_FIND(name) _LIBRARY_FIND(hip_lib, name)
-
-
-static DynamicLibrary hip_lib;
-
-/* Function definitions. */
-thipGetErrorName *hipGetErrorName;
-thipInit *hipInit;
-thipDriverGetVersion *hipDriverGetVersion;
-thipGetDevice *hipGetDevice;
-thipGetDeviceCount *hipGetDeviceCount;
-thipDeviceGetName *hipDeviceGetName;
-thipDeviceGetAttribute *hipDeviceGetAttribute;
-thipDeviceComputeCapability *hipDeviceComputeCapability;
-thipDevicePrimaryCtxRetain *hipDevicePrimaryCtxRetain;
-thipDevicePrimaryCtxRelease *hipDevicePrimaryCtxRelease;
-thipDevicePrimaryCtxSetFlags *hipDevicePrimaryCtxSetFlags;
-thipDevicePrimaryCtxGetState *hipDevicePrimaryCtxGetState;
-thipDevicePrimaryCtxReset *hipDevicePrimaryCtxReset;
-thipCtxCreate *hipCtxCreate;
-thipCtxDestroy *hipCtxDestroy;
-thipCtxPushCurrent *hipCtxPushCurrent;
-thipCtxPopCurrent *hipCtxPopCurrent;
-thipCtxSetCurrent *hipCtxSetCurrent;
-thipCtxGetCurrent *hipCtxGetCurrent;
-thipCtxGetDevice *hipCtxGetDevice;
-thipCtxGetFlags *hipCtxGetFlags;
-thipCtxSynchronize *hipCtxSynchronize;
-thipDeviceSynchronize *hipDeviceSynchronize;
-thipCtxGetCacheConfig *hipCtxGetCacheConfig;
-thipCtxSetCacheConfig *hipCtxSetCacheConfig;
-thipCtxGetSharedMemConfig *hipCtxGetSharedMemConfig;
-thipCtxSetSharedMemConfig *hipCtxSetSharedMemConfig;
-thipCtxGetApiVersion *hipCtxGetApiVersion;
-thipModuleLoad *hipModuleLoad;
-thipModuleLoadData *hipModuleLoadData;
-thipModuleLoadDataEx *hipModuleLoadDataEx;
-thipModuleUnload *hipModuleUnload;
-thipModuleGetFunction *hipModuleGetFunction;
-thipModuleGetGlobal *hipModuleGetGlobal;
-thipModuleGetTexRef *hipModuleGetTexRef;
-thipMemGetInfo *hipMemGetInfo;
-thipMalloc *hipMalloc;
-thipMemAllocPitch *hipMemAllocPitch;
-thipFree *hipFree;
-thipMemGetAddressRange *hipMemGetAddressRange;
-thipHostMalloc *hipHostMalloc;
-thipHostFree *hipHostFree;
-thipHostGetDevicePointer *hipHostGetDevicePointer;
-thipHostGetFlags *hipHostGetFlags;
-thipMallocManaged *hipMallocManaged;
-thipDeviceGetByPCIBusId *hipDeviceGetByPCIBusId;
-thipDeviceGetPCIBusId *hipDeviceGetPCIBusId;
-thipMemcpyPeer *hipMemcpyPeer;
-thipMemcpyHtoD *hipMemcpyHtoD;
-thipMemcpyDtoH *hipMemcpyDtoH;
-thipMemcpyDtoD *hipMemcpyDtoD;
-thipDrvMemcpy2DUnaligned *hipDrvMemcpy2DUnaligned;
-thipMemcpyParam2D *hipMemcpyParam2D;
-thipDrvMemcpy3D *hipDrvMemcpy3D;
-thipMemcpyHtoDAsync *hipMemcpyHtoDAsync;
-thipMemcpyDtoHAsync *hipMemcpyDtoHAsync;
-thipMemcpyParam2DAsync *hipMemcpyParam2DAsync;
-thipDrvMemcpy3DAsync *hipDrvMemcpy3DAsync;
-thipMemsetD8 *hipMemsetD8;
-thipMemsetD16 *hipMemsetD16;
-thipMemsetD32 *hipMemsetD32;
-thipMemsetD8Async *hipMemsetD8Async;
-thipMemsetD16Async *hipMemsetD16Async;
-thipMemsetD32Async *hipMemsetD32Async;
-thipArrayCreate *hipArrayCreate;
-thipArrayDestroy *hipArrayDestroy;
-thipArray3DCreate *hipArray3DCreate;
-thipStreamCreateWithFlags *hipStreamCreateWithFlags;
-thipStreamCreateWithPriority *hipStreamCreateWithPriority;
-thipStreamGetPriority *hipStreamGetPriority;
-thipStreamGetFlags *hipStreamGetFlags;
-thipStreamWaitEvent *hipStreamWaitEvent;
-thipStreamAddCallback *hipStreamAddCallback;
-thipStreamQuery *hipStreamQuery;
-thipStreamSynchronize *hipStreamSynchronize;
-thipStreamDestroy *hipStreamDestroy;
-thipEventCreateWithFlags *hipEventCreateWithFlags;
-thipEventRecord *hipEventRecord;
-thipEventQuery *hipEventQuery;
-thipEventSynchronize *hipEventSynchronize;
-thipEventDestroy *hipEventDestroy;
-thipEventElapsedTime *hipEventElapsedTime;
-thipFuncGetAttribute *hipFuncGetAttribute;
-thipFuncSetCacheConfig *hipFuncSetCacheConfig;
-thipModuleLaunchKernel *hipModuleLaunchKernel;
-thipDrvOccupancyMaxActiveBlocksPerMultiprocessor *hipDrvOccupancyMaxActiveBlocksPerMultiprocessor;
-thipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
-thipModuleOccupancyMaxPotentialBlockSize *hipModuleOccupancyMaxPotentialBlockSize;
-thipTexRefSetArray *hipTexRefSetArray;
-thipTexRefSetAddress *hipTexRefSetAddress;
-thipTexRefSetAddress2D *hipTexRefSetAddress2D;
-thipTexRefSetFormat *hipTexRefSetFormat;
-thipTexRefSetAddressMode *hipTexRefSetAddressMode;
-thipTexRefSetFilterMode *hipTexRefSetFilterMode;
-thipTexRefSetFlags *hipTexRefSetFlags;
-thipTexRefGetAddress *hipTexRefGetAddress;
-thipTexRefGetArray *hipTexRefGetArray;
-thipTexRefGetAddressMode *hipTexRefGetAddressMode;
-thipTexObjectCreate *hipTexObjectCreate;
-thipTexObjectDestroy *hipTexObjectDestroy;
-thipDeviceCanAccessPeer *hipDeviceCanAccessPeer;
-
-thipCtxEnablePeerAccess *hipCtxEnablePeerAccess;
-thipCtxDisablePeerAccess *hipCtxDisablePeerAccess;
-thipDeviceGetP2PAttribute *hipDeviceGetP2PAttribute;
-thipGraphicsUnregisterResource *hipGraphicsUnregisterResource;
-thipGraphicsMapResources *hipGraphicsMapResources;
-thipGraphicsUnmapResources *hipGraphicsUnmapResources;
-thipGraphicsResourceGetMappedPointer *hipGraphicsResourceGetMappedPointer;
-
-thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
-thipGLGetDevices *hipGLGetDevices;
-
-
-
-static DynamicLibrary dynamic_library_open_find(const char **paths) {
-  int i = 0;
-  while (paths[i] != NULL) {
-      DynamicLibrary lib = dynamic_library_open(paths[i]);
-      if (lib != NULL) {
-        return lib;
-      }
-      ++i;
-  }
-  return NULL;
-}
-
-/* Implementation function. */
-static void hipewHipExit(void) {
-  if (hip_lib != NULL) {
-    /*  Ignore errors. */
-    dynamic_library_close(hip_lib);
-    hip_lib = NULL;
-  }
-}
-
-static int hipewHipInit(void) {
-  /* Library paths. */
-#ifdef _WIN32
-  /* Expected in c:/windows/system or similar, no path needed. */
-  const char *hip_paths[] = {"amdhip64.dll", NULL};
-#elif defined(__APPLE__)
-  /* Default installation path. */
-  const char *hip_paths[] = {"", NULL};
-#else
-  const char *hip_paths[] = {"/opt/rocm/hip/lib/libamdhip64.so", NULL};
-#endif
-  static int initialized = 0;
-  static int result = 0;
-  int error, driver_version;
-
-  if (initialized) {
-    return result;
-  }
-
-  initialized = 1;
-
-  error = atexit(hipewHipExit);
-  if (error) {
-    result = HIPEW_ERROR_ATEXIT_FAILED;
-    return result;
-  }
-
-  /* Load library. */
-  hip_lib = dynamic_library_open_find(hip_paths);
-
-  if (hip_lib == NULL) {
-    result = HIPEW_ERROR_OPEN_FAILED;
-    return result;
-  }
-
-  /* Fetch all function pointers. */
-  HIP_LIBRARY_FIND_CHECKED(hipGetErrorName);
-  HIP_LIBRARY_FIND_CHECKED(hipInit);
-  HIP_LIBRARY_FIND_CHECKED(hipDriverGetVersion);
-  HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
-  HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
-  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRetain);
-  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRelease);
-  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxSetFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxGetState);
-  HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxReset);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxCreate);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxDestroy);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxPushCurrent);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxPopCurrent);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxSetCurrent);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetCurrent);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetDevice);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxSynchronize);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceSynchronize);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetCacheConfig);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxSetCacheConfig);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetSharedMemConfig);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxSetSharedMemConfig);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxGetApiVersion);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleLoad);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleLoadData);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleLoadDataEx);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleUnload);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleGetFunction);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleGetGlobal);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleGetTexRef);
-  HIP_LIBRARY_FIND_CHECKED(hipMemGetInfo);
-  HIP_LIBRARY_FIND_CHECKED(hipMalloc);
-  HIP_LIBRARY_FIND_CHECKED(hipMemAllocPitch);
-  HIP_LIBRARY_FIND_CHECKED(hipFree);
-  HIP_LIBRARY_FIND_CHECKED(hipMemGetAddressRange);
-  HIP_LIBRARY_FIND_CHECKED(hipHostMalloc);
-  HIP_LIBRARY_FIND_CHECKED(hipHostFree);
-  HIP_LIBRARY_FIND_CHECKED(hipHostGetDevicePointer);
-  HIP_LIBRARY_FIND_CHECKED(hipHostGetFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipMallocManaged);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetByPCIBusId);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetPCIBusId);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyPeer);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoD);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoH);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoD);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyParam2D);
-  HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3D);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoDAsync);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoHAsync);
-  HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy2DUnaligned);
-  HIP_LIBRARY_FIND_CHECKED(hipMemcpyParam2DAsync);
-  HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3DAsync);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD8);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD16);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD32);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD8Async);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD16Async);
-  HIP_LIBRARY_FIND_CHECKED(hipMemsetD32Async);
-  HIP_LIBRARY_FIND_CHECKED(hipArrayCreate);
-  HIP_LIBRARY_FIND_CHECKED(hipArrayDestroy);
-  HIP_LIBRARY_FIND_CHECKED(hipArray3DCreate);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithPriority);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamGetPriority);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamGetFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamWaitEvent);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamAddCallback);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamQuery);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamSynchronize);
-  HIP_LIBRARY_FIND_CHECKED(hipStreamDestroy);
-  HIP_LIBRARY_FIND_CHECKED(hipEventCreateWithFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipEventRecord);
-  HIP_LIBRARY_FIND_CHECKED(hipEventQuery);
-  HIP_LIBRARY_FIND_CHECKED(hipEventSynchronize);
-  HIP_LIBRARY_FIND_CHECKED(hipEventDestroy);
-  HIP_LIBRARY_FIND_CHECKED(hipEventElapsedTime);
-  HIP_LIBRARY_FIND_CHECKED(hipFuncGetAttribute);
-  HIP_LIBRARY_FIND_CHECKED(hipFuncSetCacheConfig);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleLaunchKernel);
-  HIP_LIBRARY_FIND_CHECKED(hipModuleOccupancyMaxPotentialBlockSize);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetArray);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddress);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddress2D);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFormat);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddressMode);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFilterMode);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFlags);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefGetAddress);
-  HIP_LIBRARY_FIND_CHECKED(hipTexRefGetAddressMode);
-  HIP_LIBRARY_FIND_CHECKED(hipTexObjectCreate);
-  HIP_LIBRARY_FIND_CHECKED(hipTexObjectDestroy);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceCanAccessPeer);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxEnablePeerAccess);
-  HIP_LIBRARY_FIND_CHECKED(hipCtxDisablePeerAccess);
-  HIP_LIBRARY_FIND_CHECKED(hipDeviceGetP2PAttribute);
-#ifdef _WIN32
-  HIP_LIBRARY_FIND_CHECKED(hipGraphicsUnregisterResource);
-  HIP_LIBRARY_FIND_CHECKED(hipGraphicsMapResources);
-  HIP_LIBRARY_FIND_CHECKED(hipGraphicsUnmapResources);
-  HIP_LIBRARY_FIND_CHECKED(hipGraphicsResourceGetMappedPointer);
-  HIP_LIBRARY_FIND_CHECKED(hipGraphicsGLRegisterBuffer);
-  HIP_LIBRARY_FIND_CHECKED(hipGLGetDevices);
-#endif
-  result = HIPEW_SUCCESS;
-  return result;
-}
-
-
-
-int hipewInit(hipuint32_t flags) {
-  int result = HIPEW_SUCCESS;
-
-  if (flags & HIPEW_INIT_HIP) {
-    result = hipewHipInit();
-    if (result != HIPEW_SUCCESS) {
-      return result;
-    }
-  }
-
-  return result;
-}
-
-
-const char *hipewErrorString(hipError_t result) {
-  switch (result) {
-    case hipSuccess: return "No errors";
-    case hipErrorInvalidValue: return "Invalid value";
-    case hipErrorOutOfMemory: return "Out of memory";
-    case hipErrorNotInitialized: return "Driver not initialized";
-    case hipErrorDeinitialized: return "Driver deinitialized";
-    case hipErrorProfilerDisabled: return "Profiler disabled";
-    case hipErrorProfilerNotInitialized: return "Profiler not initialized";
-    case hipErrorProfilerAlreadyStarted: return "Profiler already started";
-    case hipErrorProfilerAlreadyStopped: return "Profiler already stopped";
-    case hipErrorNoDevice: return "No HIP-capable device available";
-    case hipErrorInvalidDevice: return "Invalid device";
-    case hipErrorInvalidImage: return "Invalid kernel image";
-    case hipErrorInvalidContext: return "Invalid context";
-    case hipErrorContextAlreadyCurrent: return "Context already current";
-    case hipErrorMapFailed: return "Map failed";
-    case hipErrorUnmapFailed: return "Unmap failed";
-    case hipErrorArrayIsMapped: return "Array is mapped";
-    case hipErrorAlreadyMapped: return "Already mapped";
-    case hipErrorNoBinaryForGpu: return "No binary for GPU";
-    case hipErrorAlreadyAcquired: return "Already acquired";
-    case hipErrorNotMapped: return "Not mapped";
-    case hipErrorNotMappedAsArray: return "Mapped resource not available for access as an array";
-    case hipErrorNotMappedAsPointer: return "Mapped resource not available for access as a pointer";
-    case hipErrorECCNotCorrectable: return "Uncorrectable ECC error detected";
-    case hipErrorUnsupportedLimit: return "hipLimit_t not supported by device";
-    case hipErrorContextAlreadyInUse: return "Context already in use";
-    case hipErrorPeerAccessUnsupported: return "Peer access unsupported";
-    case hipErrorInvalidKernelFile: return "Invalid ptx";
-    case hipErrorInvalidGraphicsContext: return "Invalid graphics context";
-    case hipErrorInvalidSource: return "Invalid source";
-    case hipErrorFileNotFound: return "File not found";
-    case hipErrorSharedObjectSymbolNotFound: return "Link to a shared object failed to resolve";
-    case hipErrorSharedObjectInitFailed: return "Shared object initialization failed";
-    case hipErrorOperatingSystem: return "Operating system";
-    case hipErrorInvalidHandle: return "Invalid handle";
-    case hipErrorNotFound: return "Not found";
-    case hipErrorNotReady: return "HIP not ready";
-    case hipErrorIllegalAddress: return "Illegal address";
-    case hipErrorLaunchOutOfResources: return "Launch exceeded resources";
-    case hipErrorLaunchTimeOut: return "Launch exceeded timeout";
-    case hipErrorPeerAccessAlreadyEnabled: return "Peer access already enabled";
-    case hipErrorPeerAccessNotEnabled: return "Peer access not enabled";
-    case hipErrorSetOnActiveProcess: return "Primary context active";
-    case hipErrorAssert: return "Assert";
-    case hipErrorHostMemoryAlreadyRegistered: return "Host memory already registered";
-    case hipErrorHostMemoryNotRegistered: return "Host memory not registered";
-    case hipErrorLaunchFailure: return "Launch failed";
-    case hipErrorCooperativeLaunchTooLarge: return "Cooperative launch too large";
-    case hipErrorNotSupported: return "Not supported";
-    case hipErrorUnknown: return "Unknown error";
-    default: return "Unknown HIP error value";
-  }
-}
-
-static void path_join(const char *path1,
-                      const char *path2,
-                      int maxlen,
-                      char *result) {
-#if defined(WIN32) || defined(_WIN32)
-  const char separator = '\\';
-#else
-  const char separator = '/';
-#endif
-  int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
-  if (n != -1 && n < maxlen) {
-    result[n] = '\0';
-  }
-  else {
-    result[maxlen - 1] = '\0';
-  }
-}
-
-static int path_exists(const char *path) {
-  struct stat st;
-  if (stat(path, &st)) {
-    return 0;
-  }
-  return 1;
-}
-
-const char *hipewCompilerPath(void) {
-    #ifdef _WIN32
-    const char *hipPath = getenv("HIP_ROCCLR_HOME");
-    const char *windowsCommand = "perl ";
-    const char *executable = "bin/hipcc";
-
-    static char hipcc[65536];
-    static char finalCommand[65536];
-    if(hipPath) {
-      path_join(hipPath, executable, sizeof(hipcc), hipcc);
-      if(path_exists(hipcc)) {
-        snprintf(finalCommand, sizeof(hipcc), "%s %s", windowsCommand, hipcc);
-        return finalCommand;
-      } else {
-        printf("Could not find hipcc. Make sure HIP_ROCCLR_HOME points to the directory holding /bin/hipcc");
-      }
-    }
-    #else
-    const char *hipPath =  "opt/rocm/hip/bin";
-    const char *executable = "hipcc";
-
-    static char hipcc[65536];
-    if(hipPath) {
-      path_join(hipPath, executable, sizeof(hipcc), hipcc);
-      if(path_exists(hipcc)){
-        return hipcc;
-      }
-    }
-    #endif
-
-  {
-#ifdef _WIN32
-    FILE *handle = popen("where hipcc", "r");
-#else
-    FILE *handle = popen("which hipcc", "r");
-#endif
-    if (handle) {
-      char buffer[4096] = {0};
-      int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
-      buffer[len] = '\0';
-      pclose(handle);
-      if (buffer[0]) {
-        return "hipcc";
-      }
-    }
-  }
-
-  return NULL;
-}
-
-int hipewCompilerVersion(void) {
-  const char *path = hipewCompilerPath();
-  const char *marker = "Hip compilation tools, release ";
-  FILE *pipe;
-  int major, minor;
-  char *versionstr;
-  char buf[128];
-  char output[65536] = "\0";
-  char command[65536] = "\0";
-
-  if (path == NULL) {
-    return 0;
-  }
-
-  /* get --version output */
-  strcat(command, "\"");
-  strncat(command, path, sizeof(command) - 1);
-  strncat(command, "\" --version", sizeof(command) - strlen(path) - 1);
-  pipe = popen(command, "r");
-  if (!pipe) {
-    fprintf(stderr, "HIP: failed to run compiler to retrieve version");
-    return 0;
-  }
-
-  while (!feof(pipe)) {
-    if (fgets(buf, sizeof(buf), pipe) != NULL) {
-      strncat(output, buf, sizeof(output) - strlen(output) - 1);
-    }
-  }
-
-  pclose(pipe);
-  return 40;
-}
--- a/extern/json/README.blender
+++ b/extern/json/README.blender
@@ -1,5 +0,0 @@
-Project: JSON
-URL: https://github.com/nlohmann/json/
-License: MIT License
-Upstream version: 3.10.2
-Local modifications: None
--- a/extern/json/include/json.hpp
+++ b/extern/json/include/json.hpp
--- a/extern/tinygltf/README.blender
+++ b/extern/tinygltf/README.blender
@@ -1,6 +0,0 @@
-Project: TinyGLTF
-URL: https://github.com/syoyo/tinygltf
-License: MIT
-Upstream version: 2.5.0, 19a41d20ec0
-Local modifications: 
-* Silence "enum value not handled in switch" warnings due to JSON dependency.
--- a/extern/tinygltf/tiny_gltf.h
+++ b/extern/tinygltf/tiny_gltf.h
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -247,7 +247,7 @@ if(WITH_CYCLES_OSL)
 endif()

 if(WITH_CYCLES_DEVICE_OPTIX)
-  find_package(OptiX 7.3.0)
+  find_package(OptiX)

  if(OPTIX_FOUND)
    add_definitions(-DWITH_OPTIX)
@@ -286,18 +286,11 @@ if(WITH_OPENSUBDIV)
  )
 endif()

-if(WITH_OPENIMAGEDENOISE)
-  add_definitions(-DWITH_OPENIMAGEDENOISE)
-  add_definitions(-DOIDN_STATIC_LIB)
-  include_directories(
-    SYSTEM
-    ${OPENIMAGEDENOISE_INCLUDE_DIRS}
-  )
-endif()
-
 if(WITH_CYCLES_STANDALONE)
+  set(WITH_CYCLES_DEVICE_OPENCL TRUE)
  set(WITH_CYCLES_DEVICE_CUDA TRUE)
-  set(WITH_CYCLES_DEVICE_HIP TRUE)
+  # Experimental and unfinished.
+  set(WITH_CYCLES_NETWORK FALSE)
 endif()
 # TODO(sergey): Consider removing it, only causes confusion in interface.
 set(WITH_CYCLES_DEVICE_MULTI TRUE)
@@ -393,12 +386,18 @@ if(WITH_CYCLES_BLENDER)
  add_subdirectory(blender)
 endif()

-add_subdirectory(app)
+if(WITH_CYCLES_NETWORK)
+  add_definitions(-DWITH_NETWORK)
+endif()
+
+if(WITH_CYCLES_STANDALONE OR WITH_CYCLES_NETWORK OR WITH_CYCLES_CUBIN_COMPILER)
+  add_subdirectory(app)
+endif()
+
 add_subdirectory(bvh)
 add_subdirectory(device)
 add_subdirectory(doc)
 add_subdirectory(graph)
-add_subdirectory(integrator)
 add_subdirectory(kernel)
 add_subdirectory(render)
 add_subdirectory(subd)
--- a/intern/cycles/app/CMakeLists.txt
+++ b/intern/cycles/app/CMakeLists.txt
@@ -64,8 +64,6 @@ if(WITH_CYCLES_STANDALONE)
    cycles_standalone.cpp
    cycles_xml.cpp
    cycles_xml.h
-    oiio_output_driver.cpp
-    oiio_output_driver.h
  )
  add_executable(cycles ${SRC} ${INC} ${INC_SYS})
  unset(SRC)
@@ -75,7 +73,7 @@ if(WITH_CYCLES_STANDALONE)

  if(APPLE)
    if(WITH_OPENCOLORIO)
-      set_property(TARGET cycles APPEND_STRING PROPERTY LINK_FLAGS " -framework IOKit -framework Carbon")
+      set_property(TARGET cycles APPEND_STRING PROPERTY LINK_FLAGS " -framework IOKit")
    endif()
    if(WITH_OPENIMAGEDENOISE AND "${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
      # OpenImageDenoise uses BNNS from the Accelerate framework.
@@ -92,6 +90,24 @@ if(WITH_CYCLES_STANDALONE)
  endif()
 endif()

+#####################################################################
+# Cycles network server executable
+#####################################################################
+
+if(WITH_CYCLES_NETWORK)
+  set(SRC
+    cycles_server.cpp
+  )
+  add_executable(cycles_server ${SRC})
+  target_link_libraries(cycles_server ${LIBRARIES})
+  cycles_target_link_libraries(cycles_server)
+
+  if(UNIX AND NOT APPLE)
+    set_target_properties(cycles_server PROPERTIES INSTALL_RPATH $ORIGIN/lib)
+  endif()
+  unset(SRC)
+endif()
+
 #####################################################################
 # Cycles cubin compiler executable
 #####################################################################
--- a/intern/cycles/app/cycles_standalone.cpp
+++ b/intern/cycles/app/cycles_standalone.cpp
@@ -36,9 +36,6 @@
 #include "util/util_unique_ptr.h"
 #include "util/util_version.h"

-#include "app/cycles_xml.h"
-#include "app/oiio_output_driver.h"
-
 #ifdef WITH_CYCLES_STANDALONE_GUI
 #  include "util/util_view.h"
 #endif
@@ -56,8 +53,7 @@ struct Options {
  SessionParams session_params;
  bool quiet;
  bool show_help, interactive, pause;
-  string output_filepath;
-  string output_pass;
+  string output_path;
 } options;

 static void session_print(const string &str)
@@ -93,6 +89,30 @@ static void session_print_status()
  session_print(status);
 }

+static bool write_render(const uchar *pixels, int w, int h, int channels)
+{
+  string msg = string_printf("Writing image %s", options.output_path.c_str());
+  session_print(msg);
+
+  unique_ptr<ImageOutput> out = unique_ptr<ImageOutput>(ImageOutput::create(options.output_path));
+  if (!out) {
+    return false;
+  }
+
+  ImageSpec spec(w, h, channels, TypeDesc::UINT8);
+  if (!out->open(options.output_path, spec)) {
+    return false;
+  }
+
+  /* conversion for different top/bottom convention */
+  out->write_image(
+      TypeDesc::UINT8, pixels + (h - 1) * w * channels, AutoStride, -w * channels, AutoStride);
+
+  out->close();
+
+  return true;
+}
+
 static BufferParams &session_buffer_params()
 {
  static BufferParams buffer_params;
@@ -106,7 +126,7 @@ static BufferParams &session_buffer_params()

 static void scene_init()
 {
-  options.scene = options.session->scene;
+  options.scene = new Scene(options.scene_params, options.session->device);

  /* Read XML */
  xml_read_file(options.scene, options.filepath.c_str());
@@ -127,13 +147,8 @@ static void scene_init()

 static void session_init()
 {
-  options.output_pass = "combined";
-  options.session = new Session(options.session_params, options.scene_params);
-
-  if (!options.output_filepath.empty()) {
-    options.session->set_output_driver(make_unique<OIIOOutputDriver>(
-        options.output_filepath, options.output_pass, session_print));
-  }
+  options.session_params.write_render_cb = write_render;
+  options.session = new Session(options.session_params);

  if (options.session_params.background && !options.quiet)
    options.session->progress.set_update_callback(function_bind(&session_print_status));
@@ -144,13 +159,9 @@ static void session_init()

  /* load scene */
  scene_init();
+  options.session->scene = options.scene;

-  /* add pass for output. */
-  Pass *pass = options.scene->create_node<Pass>();
-  pass->set_name(ustring(options.output_pass.c_str()));
-  pass->set_type(PASS_COMBINED);
-
-  options.session->reset(options.session_params, session_buffer_params());
+  options.session->reset(session_buffer_params(), options.session_params.samples);
  options.session->start();
 }

@@ -212,7 +223,9 @@ static void display_info(Progress &progress)

 static void display()
 {
-  options.session->draw();
+  static DeviceDrawParams draw_params = DeviceDrawParams();
+
+  options.session->draw(session_buffer_params(), draw_params);

  display_info(options.session->progress);
 }
@@ -242,7 +255,7 @@ static void motion(int x, int y, int button)
    options.session->scene->camera->need_flags_update = true;
    options.session->scene->camera->need_device_update = true;

-    options.session->reset(options.session_params, session_buffer_params());
+    options.session->reset(session_buffer_params(), options.session_params.samples);
  }
 }

@@ -259,7 +272,7 @@ static void resize(int width, int height)
    options.session->scene->camera->need_flags_update = true;
    options.session->scene->camera->need_device_update = true;

-    options.session->reset(options.session_params, session_buffer_params());
+    options.session->reset(session_buffer_params(), options.session_params.samples);
  }
 }

@@ -271,7 +284,7 @@ static void keyboard(unsigned char key)

  /* Reset */
  else if (key == 'r')
-    options.session->reset(options.session_params, session_buffer_params());
+    options.session->reset(session_buffer_params(), options.session_params.samples);

  /* Cancel */
  else if (key == 27)  // escape
@@ -308,7 +321,7 @@ static void keyboard(unsigned char key)
    options.session->scene->camera->need_flags_update = true;
    options.session->scene->camera->need_device_update = true;

-    options.session->reset(options.session_params, session_buffer_params());
+    options.session->reset(session_buffer_params(), options.session_params.samples);
  }

  /* Set Max Bounces */
@@ -334,7 +347,7 @@ static void keyboard(unsigned char key)

    options.session->scene->integrator->set_max_bounce(bounce);

-    options.session->reset(options.session_params, session_buffer_params());
+    options.session->reset(session_buffer_params(), options.session_params.samples);
  }
 }
 #endif
@@ -349,13 +362,11 @@ static int files_parse(int argc, const char *argv[])

 static void options_parse(int argc, const char **argv)
 {
-  options.width = 1024;
-  options.height = 512;
+  options.width = 0;
+  options.height = 0;
  options.filepath = "";
  options.session = NULL;
  options.quiet = false;
-  options.session_params.use_auto_tile = false;
-  options.session_params.tile_size = 0;

  /* device names */
  string device_names = "";
@@ -401,7 +412,7 @@ static void options_parse(int argc, const char **argv)
             &options.session_params.samples,
             "Number of samples to render",
             "--output %s",
-             &options.output_filepath,
+             &options.output_path,
             "File path to write output image",
             "--threads %d",
             &options.session_params.threads,
@@ -412,9 +423,12 @@ static void options_parse(int argc, const char **argv)
             "--height %d",
             &options.height,
             "Window height in pixel",
-             "--tile-size %d",
-             &options.session_params.tile_size,
-             "Tile size in pixels",
+             "--tile-width %d",
+             &options.session_params.tile_size.x,
+             "Tile width in pixels",
+             "--tile-height %d",
+             &options.session_params.tile_size.y,
+             "Tile height in pixels",
             "--list-devices",
             &list,
             "List information about all available devices",
@@ -476,9 +490,8 @@ static void options_parse(int argc, const char **argv)
  options.session_params.background = true;
 #endif

-  if (options.session_params.tile_size > 0) {
-    options.session_params.use_auto_tile = true;
-  }
+  /* Use progressive rendering */
+  options.session_params.progressive = true;

  /* find matching device */
  DeviceType device_type = Device::type_from_string(devicename.c_str());
@@ -514,6 +527,9 @@ static void options_parse(int argc, const char **argv)
    fprintf(stderr, "No file path specified\n");
    exit(EXIT_FAILURE);
  }
+
+  /* For smoother Viewport */
+  options.session_params.start_resolution = 64;
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/app/cycles_xml.cpp
+++ b/intern/cycles/app/cycles_xml.cpp
@@ -333,7 +333,6 @@ static void xml_read_shader_graph(XMLReadState &state, Shader *shader, xml_node
      }

      snode = (ShaderNode *)node_type->create(node_type);
-      snode->set_owner(graph);
    }

    xml_read_node(graph_reader, snode, node);
@@ -704,7 +703,7 @@ void xml_read_file(Scene *scene, const char *filepath)

  xml_read_include(state, path_filename(filepath));

-  scene->params.bvh_type = BVH_TYPE_STATIC;
+  scene->params.bvh_type = SceneParams::BVH_STATIC;
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/app/oiio_output_driver.cpp
+++ b/intern/cycles/app/oiio_output_driver.cpp
@@ -1,71 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "app/oiio_output_driver.h"
-
-CCL_NAMESPACE_BEGIN
-
-OIIOOutputDriver::OIIOOutputDriver(const string_view filepath,
-                                   const string_view pass,
-                                   LogFunction log)
-    : filepath_(filepath), pass_(pass), log_(log)
-{
-}
-
-OIIOOutputDriver::~OIIOOutputDriver()
-{
-}
-
-void OIIOOutputDriver::write_render_tile(const Tile &tile)
-{
-  /* Only write the full buffer, no intermediate tiles. */
-  if (!(tile.size == tile.full_size)) {
-    return;
-  }
-
-  log_(string_printf("Writing image %s", filepath_.c_str()));
-
-  unique_ptr<ImageOutput> image_output(ImageOutput::create(filepath_));
-  if (image_output == nullptr) {
-    log_("Failed to create image file");
-    return;
-  }
-
-  const int width = tile.size.x;
-  const int height = tile.size.y;
-
-  ImageSpec spec(width, height, 4, TypeDesc::FLOAT);
-  if (!image_output->open(filepath_, spec)) {
-    log_("Failed to create image file");
-    return;
-  }
-
-  vector<float> pixels(width * height * 4);
-  if (!tile.get_pass_pixels(pass_, 4, pixels.data())) {
-    log_("Failed to read render pass pixels");
-    return;
-  }
-
-  /* Manipulate offset and stride to convert from bottom-up to top-down convention. */
-  image_output->write_image(TypeDesc::FLOAT,
-                            pixels.data() + (height - 1) * width * 4,
-                            AutoStride,
-                            -width * 4 * sizeof(float),
-                            AutoStride);
-  image_output->close();
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/app/oiio_output_driver.h
+++ b/intern/cycles/app/oiio_output_driver.h
@@ -1,42 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "render/output_driver.h"
-
-#include "util/util_function.h"
-#include "util/util_image.h"
-#include "util/util_string.h"
-#include "util/util_unique_ptr.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class OIIOOutputDriver : public OutputDriver {
- public:
-  typedef function<void(const string &)> LogFunction;
-
-  OIIOOutputDriver(const string_view filepath, const string_view pass, LogFunction log);
-  virtual ~OIIOOutputDriver();
-
-  void write_render_tile(const Tile &tile) override;
-
- protected:
-  string filepath_;
-  string pass_;
-  LogFunction log_;
-};
-
-CCL_NAMESPACE_END
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -31,14 +31,12 @@ set(INC_SYS
 set(SRC
  blender_camera.cpp
  blender_device.cpp
-  blender_display_driver.cpp
  blender_image.cpp
  blender_geometry.cpp
  blender_light.cpp
  blender_mesh.cpp
  blender_object.cpp
  blender_object_cull.cpp
-  blender_output_driver.cpp
  blender_particles.cpp
  blender_curves.cpp
  blender_logging.cpp
@@ -52,11 +50,9 @@ set(SRC

  CCL_api.h
  blender_device.h
-  blender_display_driver.h
  blender_id_map.h
  blender_image.h
  blender_object_cull.h
-  blender_output_driver.h
  blender_sync.h
  blender_session.h
  blender_texture.h
@@ -97,9 +93,14 @@ set(ADDON_FILES

 add_definitions(${GL_DEFINITIONS})

-if(WITH_CYCLES_DEVICE_HIP)
-  add_definitions(-DWITH_HIP)
+if(WITH_CYCLES_DEVICE_OPENCL)
+  add_definitions(-DWITH_OPENCL)
 endif()
+
+if(WITH_CYCLES_NETWORK)
+  add_definitions(-DWITH_NETWORK)
+endif()
+
 if(WITH_MOD_FLUID)
  add_definitions(-DWITH_FLUID)
 endif()
--- a/intern/cycles/blender/addon/init.py
+++ b/intern/cycles/blender/addon/init.py
@@ -58,6 +58,7 @@ class CyclesRender(bpy.types.RenderEngine):
    bl_use_eevee_viewport = True
    bl_use_preview = True
    bl_use_exclude_layers = True
+    bl_use_save_buffers = True
    bl_use_spherical_stereo = True
    bl_use_custom_freestyle = True
    bl_use_alembic_procedural = True
@@ -84,12 +85,6 @@ class CyclesRender(bpy.types.RenderEngine):
    def render(self, depsgraph):
        engine.render(self, depsgraph)

-    def render_frame_finish(self):
-        engine.render_frame_finish(self)
-
-    def draw(self, context, depsgraph):
-        engine.draw(self, depsgraph, context.space_data)
-
    def bake(self, depsgraph, obj, pass_type, pass_filter, width, height):
        engine.bake(self, depsgraph, obj, pass_type, pass_filter, width, height)

@@ -103,7 +98,7 @@ class CyclesRender(bpy.types.RenderEngine):
        engine.sync(self, depsgraph, context.blend_data)

    def view_draw(self, context, depsgraph):
-        engine.view_draw(self, depsgraph, context.region, context.space_data, context.region_data)
+        engine.draw(self, depsgraph, context.region, context.space_data, context.region_data)

    def update_script_node(self, node):
        if engine.with_osl():
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -18,17 +18,62 @@
 from __future__ import annotations


+def _is_using_buggy_driver():
+    import gpu
+    # We need to be conservative here because in multi-GPU systems display card
+    # might be quite old, but others one might be just good.
+    #
+    # So We shouldn't disable possible good dedicated cards just because display
+    # card seems weak. And instead we only blacklist configurations which are
+    # proven to cause problems.
+    if gpu.platform.vendor_get() == "ATI Technologies Inc.":
+        import re
+        version = gpu.platform.version_get()
+        if version.endswith("Compatibility Profile Context"):
+            # Old HD 4xxx and 5xxx series drivers did not have driver version
+            # in the version string, but those cards do not quite work and
+            # causing crashes.
+            return True
+        regex = re.compile(".*Compatibility Profile Context ([0-9]+(\\.[0-9]+)+)$")
+        if not regex.match(version):
+            # Skip cards like FireGL
+            return False
+        version = regex.sub("\\1", version).split('.')
+        return int(version[0]) == 8
+    return False
+
+
+def _workaround_buggy_drivers():
+    if _is_using_buggy_driver():
+        import _cycles
+        if hasattr(_cycles, "opencl_disable"):
+            print("Cycles: OpenGL driver known to be buggy, disabling OpenCL platform.")
+            _cycles.opencl_disable()
+
+
 def _configure_argument_parser():
    import argparse
    # No help because it conflicts with general Python scripts argument parsing
    parser = argparse.ArgumentParser(description="Cycles Addon argument parser",
                                     add_help=False)
+    parser.add_argument("--cycles-resumable-num-chunks",
+                        help="Number of chunks to split sample range into",
+                        default=None)
+    parser.add_argument("--cycles-resumable-current-chunk",
+                        help="Current chunk of samples range to render",
+                        default=None)
+    parser.add_argument("--cycles-resumable-start-chunk",
+                        help="Start chunk to render",
+                        default=None)
+    parser.add_argument("--cycles-resumable-end-chunk",
+                        help="End chunk to render",
+                        default=None)
    parser.add_argument("--cycles-print-stats",
                        help="Print rendering statistics to stderr",
                        action='store_true')
    parser.add_argument("--cycles-device",
                        help="Set the device to use for Cycles, overriding user preferences and the scene setting."
-                             "Valid options are 'CPU', 'CUDA', 'OPTIX', or 'HIP'"
+                             "Valid options are 'CPU', 'CUDA', 'OPTIX' or 'OPENCL'."
                             "Additionally, you can append '+CPU' to any GPU type for hybrid rendering.",
                        default=None)
    return parser
@@ -44,6 +89,21 @@ def _parse_command_line():
    parser = _configure_argument_parser()
    args, _ = parser.parse_known_args(argv[argv.index("--") + 1:])

+    if args.cycles_resumable_num_chunks is not None:
+        if args.cycles_resumable_current_chunk is not None:
+            import _cycles
+            _cycles.set_resumable_chunk(
+                int(args.cycles_resumable_num_chunks),
+                int(args.cycles_resumable_current_chunk),
+            )
+        elif args.cycles_resumable_start_chunk is not None and \
+                args.cycles_resumable_end_chunk:
+            import _cycles
+            _cycles.set_resumable_chunk_range(
+                int(args.cycles_resumable_num_chunks),
+                int(args.cycles_resumable_start_chunk),
+                int(args.cycles_resumable_end_chunk),
+            )
    if args.cycles_print_stats:
        import _cycles
        _cycles.enable_print_stats()
@@ -58,11 +118,23 @@ def init():
    import _cycles
    import os.path

+    # Workaround possibly buggy legacy drivers which crashes on the OpenCL
+    # device enumeration.
+    #
+    # This checks are not really correct because they might still fail
+    # in the case of multiple GPUs. However, currently buggy drivers
+    # are really old and likely to be used in single GPU systems only
+    # anyway.
+    #
+    # Can't do it in the background mode, so we hope OpenCL is no enabled
+    # in the user preferences.
+    if not bpy.app.background:
+        _workaround_buggy_drivers()
+
    path = os.path.dirname(__file__)
    user_path = os.path.dirname(os.path.abspath(bpy.utils.user_resource('CONFIG', path='')))
-    temp_path = bpy.app.tempdir

-    _cycles.init(path, user_path, temp_path, bpy.app.background)
+    _cycles.init(path, user_path, bpy.app.background)
    _parse_command_line()


@@ -105,25 +177,6 @@ def render(engine, depsgraph):
        _cycles.render(engine.session, depsgraph.as_pointer())


-def render_frame_finish(engine):
-    if not engine.session:
-        return
-
-    import _cycles
-    _cycles.render_frame_finish(engine.session)
-
-def draw(engine, depsgraph, space_image):
-    if not engine.session:
-        return
-
-    depsgraph_ptr = depsgraph.as_pointer()
-    space_image_ptr = space_image.as_pointer()
-    screen_ptr = space_image.id_data.as_pointer()
-
-    import _cycles
-    _cycles.draw(engine.session, depsgraph_ptr, screen_ptr, space_image_ptr)
-
-
 def bake(engine, depsgraph, obj, pass_type, pass_filter, width, height):
    import _cycles
    session = getattr(engine, "session", None)
@@ -151,14 +204,14 @@ def sync(engine, depsgraph, data):
    _cycles.sync(engine.session, depsgraph.as_pointer())


-def view_draw(engine, depsgraph, region, v3d, rv3d):
+def draw(engine, depsgraph, region, v3d, rv3d):
    import _cycles
    depsgraph = depsgraph.as_pointer()
    v3d = v3d.as_pointer()
    rv3d = rv3d.as_pointer()

    # draw render image
-    _cycles.view_draw(engine.session, depsgraph, v3d, rv3d)
+    _cycles.draw(engine.session, depsgraph, v3d, rv3d)


 def available_devices():
@@ -171,6 +224,11 @@ def with_osl():
    return _cycles.with_osl


+def with_network():
+    import _cycles
+    return _cycles.with_network
+
+
 def system_info():
    import _cycles
    return _cycles.system_info()
@@ -185,7 +243,6 @@ def list_render_passes(scene, srl):
    # Data passes.
    if srl.use_pass_z:                     yield ("Depth",         "Z",    'VALUE')
    if srl.use_pass_mist:                  yield ("Mist",          "Z",    'VALUE')
-    if srl.use_pass_position:              yield ("Position",      "XYZ",  'VECTOR')
    if srl.use_pass_normal:                yield ("Normal",        "XYZ",  'VECTOR')
    if srl.use_pass_vector:                yield ("Vector",        "XYZW", 'VECTOR')
    if srl.use_pass_uv:                    yield ("UV",            "UVA",  'VECTOR')
@@ -208,9 +265,9 @@ def list_render_passes(scene, srl):
    if srl.use_pass_environment:           yield ("Env",           "RGB",  'COLOR')
    if srl.use_pass_shadow:                yield ("Shadow",        "RGB",  'COLOR')
    if srl.use_pass_ambient_occlusion:     yield ("AO",            "RGB",  'COLOR')
-    if crl.use_pass_shadow_catcher:        yield ("Shadow Catcher",      "RGB",  'COLOR')

    # Debug passes.
+    if crl.pass_debug_render_time:             yield ("Debug Render Time",             "X",   'VALUE')
    if crl.pass_debug_sample_count:            yield ("Debug Sample Count",            "X",   'VALUE')

    # Cryptomatte passes.
@@ -226,20 +283,30 @@ def list_render_passes(scene, srl):
            yield ("CryptoAsset" + '{:02d}'.format(i), "RGBA", 'COLOR')

    # Denoising passes.
-    if scene.cycles.use_denoising and crl.use_denoising:
+    if (scene.cycles.use_denoising and crl.use_denoising) or crl.denoising_store_passes:
        yield ("Noisy Image", "RGBA", 'COLOR')
-        if crl.use_pass_shadow_catcher:
-            yield ("Noisy Shadow Catcher", "RGBA", 'COLOR')
-    if crl.denoising_store_passes:
-        yield ("Denoising Normal",          "XYZ", 'VECTOR')
-        yield ("Denoising Albedo",          "RGB", 'COLOR')
+        if crl.denoising_store_passes:
+            yield ("Denoising Normal",          "XYZ", 'VECTOR')
+            yield ("Denoising Albedo",          "RGB", 'COLOR')
+            yield ("Denoising Depth",           "Z",   'VALUE')
+
+            if scene.cycles.denoiser == 'NLM':
+                yield ("Denoising Shadowing",       "X",   'VALUE')
+                yield ("Denoising Variance",        "RGB", 'COLOR')
+                yield ("Denoising Intensity",       "X",   'VALUE')
+
+                clean_options = ("denoising_diffuse_direct", "denoising_diffuse_indirect",
+                                 "denoising_glossy_direct", "denoising_glossy_indirect",
+                                 "denoising_transmission_direct", "denoising_transmission_indirect")
+                if any(getattr(crl, option) for option in clean_options):
+                    yield ("Denoising Clean", "RGB", 'COLOR')

    # Custom AOV passes.
    for aov in srl.aovs:
        if aov.type == 'VALUE':
            yield (aov.name, "X", 'VALUE')
        else:
-            yield (aov.name, "RGB", 'COLOR')
+            yield (aov.name, "RGBA", 'COLOR')


 def register_passes(engine, scene, view_layer):
--- a/intern/cycles/blender/addon/presets.py
+++ b/intern/cycles/blender/addon/presets.py
@@ -60,48 +60,32 @@ class AddPresetSampling(AddPresetBase, Operator):
    ]

    preset_values = [
-        "cycles.use_adaptive_sampling",
        "cycles.samples",
-        "cycles.adaptive_threshold",
-        "cycles.adaptive_min_samples",
-        "cycles.time_limit",
-        "cycles.use_denoising",
-        "cycles.denoiser",
-        "cycles.denoising_input_passes",
-        "cycles.denoising_prefilter",
+        "cycles.preview_samples",
+        "cycles.aa_samples",
+        "cycles.preview_aa_samples",
+        "cycles.diffuse_samples",
+        "cycles.glossy_samples",
+        "cycles.transmission_samples",
+        "cycles.ao_samples",
+        "cycles.mesh_light_samples",
+        "cycles.subsurface_samples",
+        "cycles.volume_samples",
+        "cycles.use_square_samples",
+        "cycles.progressive",
+        "cycles.seed",
+        "cycles.sample_clamp_direct",
+        "cycles.sample_clamp_indirect",
+        "cycles.sample_all_lights_direct",
+        "cycles.sample_all_lights_indirect",
    ]

    preset_subdir = "cycles/sampling"


-class AddPresetViewportSampling(AddPresetBase, Operator):
-    '''Add a Viewport Sampling Preset'''
-    bl_idname = "render.cycles_viewport_sampling_preset_add"
-    bl_label = "Add Viewport Sampling Preset"
-    preset_menu = "CYCLES_PT_viewport_sampling_presets"
-
-    preset_defines = [
-        "cycles = bpy.context.scene.cycles"
-    ]
-
-    preset_values = [
-        "cycles.use_preview_adaptive_sampling",
-        "cycles.preview_samples",
-        "cycles.preview_adaptive_threshold",
-        "cycles.preview_adaptive_min_samples",
-        "cycles.use_preview_denoising",
-        "cycles.preview_denoiser",
-        "cycles.preview_denoising_input_passes",
-        "cycles.preview_denoising_prefilter",
-        "cycles.preview_denoising_start_sample",
-    ]
-
-    preset_subdir = "cycles/viewport_sampling"
-
 classes = (
    AddPresetIntegrator,
    AddPresetSampling,
-    AddPresetViewportSampling,
 )


--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -39,6 +39,11 @@ enum_devices = (
    ('GPU', "GPU Compute", "Use GPU compute device for rendering, configured in the system tab in the user preferences"),
 )

+from _cycles import with_network
+if with_network:
+    enum_devices += (('NETWORK', "Networked Device", "Use networked device for rendering"),)
+del with_network
+
 enum_feature_set = (
    ('SUPPORTED', "Supported", "Only use finished and supported features"),
    ('EXPERIMENTAL', "Experimental", "Use experimental and incomplete features that might be broken or change in the future", 'ERROR', 1),
@@ -79,6 +84,15 @@ enum_curve_shape = (
    ('THICK', "3D Curves", "Render hair as 3D curve, for accurate results when viewing hair close up"),
 )

+enum_tile_order = (
+    ('CENTER', "Center", "Render from center to the edges"),
+    ('RIGHT_TO_LEFT', "Right to Left", "Render from right to left"),
+    ('LEFT_TO_RIGHT', "Left to Right", "Render from left to right"),
+    ('TOP_TO_BOTTOM', "Top to Bottom", "Render from top to bottom"),
+    ('BOTTOM_TO_TOP', "Bottom to Top", "Render from bottom to top"),
+    ('HILBERT_SPIRAL', "Hilbert Spiral", "Render in a Hilbert Spiral"),
+)
+
 enum_use_layer_samples = (
    ('USE', "Use", "Per render layer number of samples override scene samples"),
    ('BOUNDED', "Bounded", "Bound per render layer number of samples by global samples"),
@@ -87,9 +101,15 @@ enum_use_layer_samples = (

 enum_sampling_pattern = (
    ('SOBOL', "Sobol", "Use Sobol random sampling pattern"),
+    ('CORRELATED_MUTI_JITTER', "Correlated Multi-Jitter", "Use Correlated Multi-Jitter random sampling pattern"),
    ('PROGRESSIVE_MUTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern"),
 )

+enum_integrator = (
+    ('BRANCHED_PATH', "Branched Path Tracing", "Path tracing integrator that branches on the first bounce, giving more control over the number of light and material samples"),
+    ('PATH', "Path Tracing", "Pure path tracing integrator"),
+)
+
 enum_volume_sampling = (
    ('DISTANCE', "Distance", "Use distance sampling, best for dense volumes with lights far away"),
    ('EQUIANGULAR', "Equiangular", "Use equiangular sampling, best for volumes with low density with light inside or near the volume"),
@@ -111,7 +131,7 @@ enum_device_type = (
    ('CPU', "CPU", "CPU", 0),
    ('CUDA', "CUDA", "CUDA", 1),
    ('OPTIX', "OptiX", "OptiX", 3),
-    ("HIP", "HIP", "HIP", 4)
+    ('OPENCL', "OpenCL", "OpenCL", 2)
 )

 enum_texture_limit = (
@@ -125,45 +145,38 @@ enum_texture_limit = (
    ('8192', "8192", "Limit texture size to 8192 pixels", 7),
 )

-# NOTE: Identifiers are expected to be an upper case version of identifiers from  `Pass::get_type_enum()`
 enum_view3d_shading_render_pass = (
    ('', "General", ""),

-    ('COMBINED', "Combined", "Show the Combined Render pass"),
-    ('EMISSION', "Emission", "Show the Emission render pass"),
-    ('BACKGROUND', "Background", "Show the Background render pass"),
-    ('AO', "Ambient Occlusion", "Show the Ambient Occlusion render pass"),
-    ('SHADOW', "Shadow", "Show the Shadow render pass"),
-    ('SHADOW_CATCHER', "Shadow Catcher", "Show the Shadow Catcher render pass"),
+    ('COMBINED', "Combined", "Show the Combined Render pass", 1),
+    ('EMISSION', "Emission", "Show the Emission render pass", 33),
+    ('BACKGROUND', "Background", "Show the Background render pass", 34),
+    ('AO', "Ambient Occlusion", "Show the Ambient Occlusion render pass", 35),

    ('', "Light", ""),

-    ('DIFFUSE_DIRECT', "Diffuse Direct", "Show the Diffuse Direct render pass"),
-    ('DIFFUSE_INDIRECT', "Diffuse Indirect", "Show the Diffuse Indirect render pass"),
-    ('DIFFUSE_COLOR', "Diffuse Color", "Show the Diffuse Color render pass"),
+    ('DIFFUSE_DIRECT', "Diffuse Direct", "Show the Diffuse Direct render pass", 38),
+    ('DIFFUSE_INDIRECT', "Diffuse Indirect", "Show the Diffuse Indirect render pass", 39),
+    ('DIFFUSE_COLOR', "Diffuse Color", "Show the Diffuse Color render pass", 40),

-    ('GLOSSY_DIRECT', "Glossy Direct", "Show the Glossy Direct render pass"),
-    ('GLOSSY_INDIRECT', "Glossy Indirect", "Show the Glossy Indirect render pass"),
-    ('GLOSSY_COLOR', "Glossy Color", "Show the Glossy Color render pass"),
+    ('GLOSSY_DIRECT', "Glossy Direct", "Show the Glossy Direct render pass", 41),
+    ('GLOSSY_INDIRECT', "Glossy Indirect", "Show the Glossy Indirect render pass", 42),
+    ('GLOSSY_COLOR', "Glossy Color", "Show the Glossy Color render pass", 43),

    ('', "", ""),

-    ('TRANSMISSION_DIRECT', "Transmission Direct", "Show the Transmission Direct render pass"),
-    ('TRANSMISSION_INDIRECT', "Transmission Indirect", "Show the Transmission Indirect render pass"),
-    ('TRANSMISSION_COLOR', "Transmission Color", "Show the Transmission Color render pass"),
+    ('TRANSMISSION_DIRECT', "Transmission Direct", "Show the Transmission Direct render pass", 44),
+    ('TRANSMISSION_INDIRECT', "Transmission Indirect", "Show the Transmission Indirect render pass", 45),
+    ('TRANSMISSION_COLOR', "Transmission Color", "Show the Transmission Color render pass", 46),

-    ('VOLUME_DIRECT', "Volume Direct", "Show the Volume Direct render pass"),
-    ('VOLUME_INDIRECT', "Volume Indirect", "Show the Volume Indirect render pass"),
+    ('VOLUME_DIRECT', "Volume Direct", "Show the Volume Direct render pass", 50),
+    ('VOLUME_INDIRECT', "Volume Indirect", "Show the Volume Indirect render pass", 51),

    ('', "Data", ""),

-    ('POSITION', "Position", "Show the Position render pass"),
-    ('NORMAL', "Normal", "Show the Normal render pass"),
-    ('UV', "UV", "Show the UV render pass"),
-    ('MIST', "Mist", "Show the Mist render pass"),
-    ('DENOISING_ALBEDO', "Denoising Albedo", "Albedo pass used by denoiser"),
-    ('DENOISING_NORMAL', "Denoising Normal", "Normal pass used by denoiser"),
-    ('SAMPLE_COUNT', "Sample Count", "Per-pixel number of samples"),
+    ('NORMAL', "Normal", "Show the Normal render pass", 3),
+    ('UV', "UV", "Show the UV render pass", 4),
+    ('MIST', "Mist", "Show the Mist render pass", 32),
 )


@@ -195,23 +208,18 @@ def enum_preview_denoiser(self, context):


 def enum_denoiser(self, context):
-    items = []
+    items = [('NLM', "NLM", "Cycles native non-local means denoiser, running on any compute device", 1)]
    items += enum_optix_denoiser(self, context)
    items += enum_openimagedenoise_denoiser(self, context)
    return items


 enum_denoising_input_passes = (
-    ('RGB', "None", "Don't use utility passes for denoising", 1),
-    ('RGB_ALBEDO', "Albedo", "Use albedo pass for denoising", 2),
-    ('RGB_ALBEDO_NORMAL', "Albedo and Normal", "Use albedo and normal passes for denoising", 3),
+    ('RGB', "Color", "Use only color as input", 1),
+    ('RGB_ALBEDO', "Color + Albedo", "Use color and albedo data as input", 2),
+    ('RGB_ALBEDO_NORMAL', "Color + Albedo + Normal", "Use color, albedo and normal data as input", 3),
 )

-enum_denoising_prefilter = (
-    ('NONE', "None", "No prefiltering, use when guiding passes are noise-free", 1),
-    ('FAST', "Fast", "Denoise color and guiding passes together. Improves quality when guiding passes are noisy using least amount of extra processing time", 2),
-    ('ACCURATE', "Accurate", "Prefilter noisy guiding passes before denoising color. Improves quality when guiding passes are noisy using extra processing time", 3),
-)

 def update_render_passes(self, context):
    scene = context.scene
@@ -244,6 +252,13 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
        description="Use Open Shading Language (CPU rendering only)",
    )

+    progressive: EnumProperty(
+        name="Integrator",
+        description="Method to sample lights and materials",
+        items=enum_integrator,
+        default='PATH',
+    )
+
    preview_pause: BoolProperty(
        name="Pause Preview",
        description="Pause all viewport preview renders",
@@ -253,88 +268,110 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
    use_denoising: BoolProperty(
        name="Use Denoising",
        description="Denoise the rendered image",
-        default=True,
+        default=False,
        update=update_render_passes,
    )
-    denoiser: EnumProperty(
-        name="Denoiser",
-        description="Denoise the image with the selected denoiser. "
-        "For denoising the image after rendering",
-        items=enum_denoiser,
-        default=4, # Use integer to avoid error in builds without OpenImageDenoise.
-        update=update_render_passes,
-    )
-    denoising_prefilter: EnumProperty(
-        name="Denoising Prefilter",
-        description="Prefilter noisy guiding (albedo and normal) passes to improve denoising quality when using OpenImageDenoiser",
-        items=enum_denoising_prefilter,
-        default='ACCURATE',
-    )
-    denoising_input_passes: EnumProperty(
-        name="Denoising Input Passes",
-        description="Passes used by the denoiser to distinguish noise from shader and geometry detail",
-        items=enum_denoising_input_passes,
-        default='RGB_ALBEDO_NORMAL',
-    )
-
    use_preview_denoising: BoolProperty(
        name="Use Viewport Denoising",
        description="Denoise the image in the 3D viewport",
        default=False,
    )
+
+    denoiser: EnumProperty(
+        name="Denoiser",
+        description="Denoise the image with the selected denoiser. "
+        "For denoising the image after rendering, denoising data render passes "
+        "also adapt to the selected denoiser",
+        items=enum_denoiser,
+        default=1,
+        update=update_render_passes,
+    )
    preview_denoiser: EnumProperty(
        name="Viewport Denoiser",
        description="Denoise the image after each preview update with the selected denoiser",
        items=enum_preview_denoiser,
        default=0,
    )
-    preview_denoising_prefilter: EnumProperty(
-        name="Viewport Denoising Prefilter",
-        description="Prefilter noisy guiding (albedo and normal) passes to improve denoising quality when using OpenImageDenoiser",
-        items=enum_denoising_prefilter,
-        default='FAST',
-    )
-    preview_denoising_input_passes: EnumProperty(
-        name="Viewport Denoising Input Passes",
-        description="Passes used by the denoiser to distinguish noise from shader and geometry detail",
-        items=enum_denoising_input_passes,
-        default='RGB_ALBEDO',
-    )
-    preview_denoising_start_sample: IntProperty(
-        name="Start Denoising",
-        description="Sample to start denoising the preview at",
-        min=0, max=(1 << 24),
-        default=1,
+
+    use_square_samples: BoolProperty(
+        name="Square Samples",
+        description="Square sampling values for easier artist control",
+        default=False,
    )

    samples: IntProperty(
        name="Samples",
        description="Number of samples to render for each pixel",
        min=1, max=(1 << 24),
-        default=4096,
+        default=128,
    )
    preview_samples: IntProperty(
        name="Viewport Samples",
        description="Number of samples to render in the viewport, unlimited if 0",
        min=0, max=(1 << 24),
-        default=1024,
+        default=32,
+    )
+    aa_samples: IntProperty(
+        name="AA Samples",
+        description="Number of antialiasing samples to render for each pixel",
+        min=1, max=2097151,
+        default=128,
+    )
+    preview_aa_samples: IntProperty(
+        name="AA Samples",
+        description="Number of antialiasing samples to render in the viewport, unlimited if 0",
+        min=0, max=2097151,
+        default=32,
    )

-    time_limit: FloatProperty(
-        name="Time Limit",
-        description="Limit the render time (excluding synchronization time)."
-        "Zero disables the limit",
-        min=0.0,
-        default=0.0,
-        step=100.0,
-        unit='TIME_ABSOLUTE',
+    diffuse_samples: IntProperty(
+        name="Diffuse Samples",
+        description="Number of diffuse bounce samples to render for each AA sample",
+        min=1, max=1024,
+        default=1,
+    )
+    glossy_samples: IntProperty(
+        name="Glossy Samples",
+        description="Number of glossy bounce samples to render for each AA sample",
+        min=1, max=1024,
+        default=1,
+    )
+    transmission_samples: IntProperty(
+        name="Transmission Samples",
+        description="Number of transmission bounce samples to render for each AA sample",
+        min=1, max=1024,
+        default=1,
+    )
+    ao_samples: IntProperty(
+        name="Ambient Occlusion Samples",
+        description="Number of ambient occlusion samples to render for each AA sample",
+        min=1, max=1024,
+        default=1,
+    )
+    mesh_light_samples: IntProperty(
+        name="Mesh Light Samples",
+        description="Number of mesh emission light samples to render for each AA sample",
+        min=1, max=1024,
+        default=1,
+    )
+    subsurface_samples: IntProperty(
+        name="Subsurface Samples",
+        description="Number of subsurface scattering samples to render for each AA sample",
+        min=1, max=1024,
+        default=1,
+    )
+    volume_samples: IntProperty(
+        name="Volume Samples",
+        description="Number of volume scattering samples to render for each AA sample",
+        min=1, max=1024,
+        default=1,
    )

    sampling_pattern: EnumProperty(
        name="Sampling Pattern",
        description="Random sampling pattern used by the integrator",
        items=enum_sampling_pattern,
-        default='PROGRESSIVE_MUTI_JITTER',
+        default='SOBOL',
    )

    use_layer_samples: EnumProperty(
@@ -344,6 +381,17 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
        default='USE',
    )

+    sample_all_lights_direct: BoolProperty(
+        name="Sample All Direct Lights",
+        description="Sample all lights (for direct samples), rather than randomly picking one",
+        default=True,
+    )
+
+    sample_all_lights_indirect: BoolProperty(
+        name="Sample All Indirect Lights",
+        description="Sample all lights (for indirect samples), rather than randomly picking one",
+        default=True,
+    )
    light_sampling_threshold: FloatProperty(
        name="Light Sampling Threshold",
        description="Probabilistically terminate light samples when the light contribution is below this threshold (more noise but faster rendering). "
@@ -355,39 +403,19 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
    use_adaptive_sampling: BoolProperty(
        name="Use Adaptive Sampling",
        description="Automatically reduce the number of samples per pixel based on estimated noise level",
-        default=True,
+        default=False,
    )
+
    adaptive_threshold: FloatProperty(
        name="Adaptive Sampling Threshold",
        description="Noise level step to stop sampling at, lower values reduce noise at the cost of render time. Zero for automatic setting based on number of AA samples",
        min=0.0, max=1.0,
-        soft_min=0.001,
-        default=0.01,
+        default=0.0,
        precision=4,
    )
    adaptive_min_samples: IntProperty(
        name="Adaptive Min Samples",
-        description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on noise threshold",
-        min=0, max=4096,
-        default=0,
-    )
-
-    use_preview_adaptive_sampling: BoolProperty(
-        name="Use Adaptive Sampling",
-        description="Automatically reduce the number of samples per pixel based on estimated noise level, for viewport renders",
-        default=True,
-    )
-    preview_adaptive_threshold: FloatProperty(
-        name="Adaptive Sampling Threshold",
-        description="Noise level step to stop sampling at, lower values reduce noise at the cost of render time. Zero for automatic setting based on number of AA samples, for viewport renders",
-        min=0.0, max=1.0,
-        soft_min=0.001,
-        default=0.1,
-        precision=4,
-    )
-    preview_adaptive_min_samples: IntProperty(
-        name="Adaptive Min Samples",
-        description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on noise threshold, for viewport renders",
+        description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on number of AA samples",
        min=0, max=4096,
        default=0,
    )
@@ -604,6 +632,53 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
        default=10.0,
    )

+    debug_tile_size: IntProperty(
+        name="Tile Size",
+        description="",
+        min=1, max=4096,
+        default=1024,
+    )
+
+    preview_start_resolution: IntProperty(
+        name="Start Resolution",
+        description="Resolution to start rendering preview at, "
+        "progressively increasing it to the full viewport size",
+        min=8, max=16384,
+        default=64,
+        subtype='PIXEL'
+    )
+    preview_denoising_start_sample: IntProperty(
+        name="Start Denoising",
+        description="Sample to start denoising the preview at",
+        min=0, max=(1 << 24),
+        default=1,
+    )
+    preview_denoising_input_passes: EnumProperty(
+        name="Viewport Input Passes",
+        description="Passes used by the denoiser to distinguish noise from shader and geometry detail",
+        items=enum_denoising_input_passes,
+        default='RGB_ALBEDO',
+    )
+
+    debug_reset_timeout: FloatProperty(
+        name="Reset timeout",
+        description="",
+        min=0.01, max=10.0,
+        default=0.1,
+    )
+    debug_cancel_timeout: FloatProperty(
+        name="Cancel timeout",
+        description="",
+        min=0.01, max=10.0,
+        default=0.1,
+    )
+    debug_text_timeout: FloatProperty(
+        name="Text timeout",
+        description="",
+        min=0.01, max=10.0,
+        default=1.0,
+    )
+
    debug_bvh_type: EnumProperty(
        name="Viewport BVH Type",
        description="Choose between faster updates, or faster render",
@@ -626,24 +701,38 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
        default=0,
        min=0, max=16,
    )
+    tile_order: EnumProperty(
+        name="Tile Order",
+        description="Tile order for rendering",
+        items=enum_tile_order,
+        default='HILBERT_SPIRAL',
+        options=set(),  # Not animatable!
+    )
+    use_progressive_refine: BoolProperty(
+        name="Progressive Refine",
+        description="Instead of rendering each tile until it is finished, "
+        "refine the whole image progressively "
+        "(this renders somewhat slower, "
+        "but time can be saved by manually stopping the render when the noise is low enough)",
+        default=False,
+    )

    bake_type: EnumProperty(
        name="Bake Type",
        default='COMBINED',
        description="Type of pass to bake",
        items=(
-            ('COMBINED', "Combined", "", 0),
-            ('AO', "Ambient Occlusion", "", 1),
-            ('SHADOW', "Shadow", "", 2),
-            ('POSITION', "Position", "", 11),
-            ('NORMAL', "Normal", "", 3),
-            ('UV', "UV", "", 4),
-            ('ROUGHNESS', "Roughness", "", 5),
-            ('EMIT', "Emit", "", 6),
-            ('ENVIRONMENT', "Environment", "", 7),
-            ('DIFFUSE', "Diffuse", "", 8),
-            ('GLOSSY', "Glossy", "", 9),
-            ('TRANSMISSION', "Transmission", "", 10),
+            ('COMBINED', "Combined", ""),
+            ('AO', "Ambient Occlusion", ""),
+            ('SHADOW', "Shadow", ""),
+            ('NORMAL', "Normal", ""),
+            ('UV', "UV", ""),
+            ('ROUGHNESS', "Roughness", ""),
+            ('EMIT', "Emit", ""),
+            ('ENVIRONMENT', "Environment", ""),
+            ('DIFFUSE', "Diffuse", ""),
+            ('GLOSSY', "Glossy", ""),
+            ('TRANSMISSION', "Transmission", ""),
        ),
    )

@@ -738,18 +827,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
        min=0, max=1024,
    )

-    use_auto_tile: BoolProperty(
-        name="Auto Tiles",
-        description="Automatically render high resolution images in tiles to reduce memory usage, using the specified tile size. Tiles are cached to disk while rendering to save memory",
-        default=True,
-    )
-    tile_size: IntProperty(
-        name="Tile Size",
-        default=2048,
-        description="",
-        min=8, max=16384,
-    )
-
    # Various fine-tuning debug flags

    def _devices_update_callback(self, context):
@@ -767,13 +844,45 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
        items=enum_bvh_layouts,
        default='EMBREE',
    )
+    debug_use_cpu_split_kernel: BoolProperty(name="Split Kernel", default=False)

    debug_use_cuda_adaptive_compile: BoolProperty(name="Adaptive Compile", default=False)
+    debug_use_cuda_split_kernel: BoolProperty(name="Split Kernel", default=False)

-    debug_use_optix_debug: BoolProperty(
-        name="OptiX Module Debug",
-        description="Load OptiX module in debug mode: lower logging verbosity level, enable validations, and lower optimization level",
-        default=False
+    debug_optix_cuda_streams: IntProperty(name="CUDA Streams", default=1, min=1)
+    debug_optix_curves_api: BoolProperty(name="Native OptiX Curve Primitive", default=False)
+
+    debug_opencl_kernel_type: EnumProperty(
+        name="OpenCL Kernel Type",
+        default='DEFAULT',
+        items=(
+            ('DEFAULT', "Default", ""),
+            ('MEGA', "Mega", ""),
+            ('SPLIT', "Split", ""),
+        ),
+        update=CyclesRenderSettings._devices_update_callback
+    )
+
+    debug_opencl_device_type: EnumProperty(
+        name="OpenCL Device Type",
+        default='ALL',
+        items=(
+            ('NONE', "None", ""),
+            ('ALL', "All", ""),
+            ('DEFAULT', "Default", ""),
+            ('CPU', "CPU", ""),
+            ('GPU', "GPU", ""),
+            ('ACCELERATOR', "Accelerator", ""),
+        ),
+        update=CyclesRenderSettings._devices_update_callback
+    )
+
+    debug_use_opencl_debug: BoolProperty(name="Debug OpenCL", default=False)
+
+    debug_opencl_mem_limit: IntProperty(
+        name="Memory limit",
+        default=0,
+        description="Artificial limit on OpenCL memory usage in MB (0 to disable limit)"
    )

    @classmethod
@@ -922,6 +1031,12 @@ class CyclesLightSettings(bpy.types.PropertyGroup):
        description="Light casts shadows",
        default=True,
    )
+    samples: IntProperty(
+        name="Samples",
+        description="Number of light samples to render for each AA sample",
+        min=1, max=10000,
+        default=1,
+    )
    max_bounces: IntProperty(
        name="Max Bounces",
        description="Maximum number of bounces the light will contribute to the render",
@@ -969,6 +1084,12 @@ class CyclesWorldSettings(bpy.types.PropertyGroup):
        min=4, max=8192,
        default=1024,
    )
+    samples: IntProperty(
+        name="Samples",
+        description="Number of light samples to render for each AA sample",
+        min=1, max=10000,
+        default=1,
+    )
    max_bounces: IntProperty(
        name="Max Bounces",
        description="Maximum number of bounces the background light will contribute to the render",
@@ -1197,6 +1318,12 @@ class CyclesCurveRenderSettings(bpy.types.PropertyGroup):

 class CyclesRenderLayerSettings(bpy.types.PropertyGroup):

+    pass_debug_render_time: BoolProperty(
+        name="Debug Render Time",
+        description="Render time in milliseconds per sample and pixel",
+        default=False,
+        update=update_render_passes,
+    )
    pass_debug_sample_count: BoolProperty(
        name="Debug Sample Count",
        description="Number of samples/camera rays per pixel",
@@ -1216,25 +1343,91 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
        update=update_render_passes,
    )

-    use_pass_shadow_catcher: BoolProperty(
-        name="Shadow Catcher",
-        description="Pass containing shadows and light which is to be multiplied into backdrop",
-        default=False,
-        update=update_render_passes,
-    )
-
    use_denoising: BoolProperty(
        name="Use Denoising",
        description="Denoise the rendered image",
        default=True,
        update=update_render_passes,
    )
+    denoising_diffuse_direct: BoolProperty(
+        name="Diffuse Direct",
+        description="Denoise the direct diffuse lighting",
+        default=True,
+    )
+    denoising_diffuse_indirect: BoolProperty(
+        name="Diffuse Indirect",
+        description="Denoise the indirect diffuse lighting",
+        default=True,
+    )
+    denoising_glossy_direct: BoolProperty(
+        name="Glossy Direct",
+        description="Denoise the direct glossy lighting",
+        default=True,
+    )
+    denoising_glossy_indirect: BoolProperty(
+        name="Glossy Indirect",
+        description="Denoise the indirect glossy lighting",
+        default=True,
+    )
+    denoising_transmission_direct: BoolProperty(
+        name="Transmission Direct",
+        description="Denoise the direct transmission lighting",
+        default=True,
+    )
+    denoising_transmission_indirect: BoolProperty(
+        name="Transmission Indirect",
+        description="Denoise the indirect transmission lighting",
+        default=True,
+    )
+    denoising_strength: FloatProperty(
+        name="Denoising Strength",
+        description="Controls neighbor pixel weighting for the denoising filter (lower values preserve more detail, but aren't as smooth)",
+        min=0.0, max=1.0,
+        default=0.5,
+    )
+    denoising_feature_strength: FloatProperty(
+        name="Denoising Feature Strength",
+        description="Controls removal of noisy image feature passes (lower values preserve more detail, but aren't as smooth)",
+        min=0.0, max=1.0,
+        default=0.5,
+    )
+    denoising_radius: IntProperty(
+        name="Denoising Radius",
+        description="Size of the image area that's used to denoise a pixel (higher values are smoother, but might lose detail and are slower)",
+        min=1, max=25,
+        default=8,
+        subtype="PIXEL",
+    )
+    denoising_relative_pca: BoolProperty(
+        name="Relative Filter",
+        description="When removing pixels that don't carry information, use a relative threshold instead of an absolute one (can help to reduce artifacts, but might cause detail loss around edges)",
+        default=False,
+    )
    denoising_store_passes: BoolProperty(
        name="Store Denoising Passes",
        description="Store the denoising feature passes and the noisy image. The passes adapt to the denoiser selected for rendering",
        default=False,
        update=update_render_passes,
    )
+    denoising_neighbor_frames: IntProperty(
+        name="Neighbor Frames",
+        description="Number of neighboring frames to use for denoising animations (more frames produce smoother results at the cost of performance)",
+        min=0, max=7,
+        default=0,
+    )
+
+    denoising_optix_input_passes: EnumProperty(
+        name="Input Passes",
+        description="Passes used by the denoiser to distinguish noise from shader and geometry detail",
+        items=enum_denoising_input_passes,
+        default='RGB_ALBEDO',
+    )
+    denoising_openimagedenoise_input_passes: EnumProperty(
+        name="Input Passes",
+        description="Passes used by the denoiser to distinguish noise from shader and geometry detail",
+        items=enum_denoising_input_passes,
+        default='RGB_ALBEDO_NORMAL',
+    )

    @classmethod
    def register(cls):
@@ -1261,16 +1454,14 @@ class CyclesPreferences(bpy.types.AddonPreferences):

    def get_device_types(self, context):
        import _cycles
-        has_cuda, has_optix, has_hip = _cycles.get_device_types()
-
+        has_cuda, has_optix, has_opencl = _cycles.get_device_types()
        list = [('NONE', "None", "Don't use compute device", 0)]
        if has_cuda:
            list.append(('CUDA', "CUDA", "Use CUDA for GPU acceleration", 1))
        if has_optix:
            list.append(('OPTIX', "OptiX", "Use OptiX for GPU acceleration", 3))
-        if has_hip:
-            list.append(('HIP', "HIP", "Use HIP for GPU acceleration", 4))
-
+        if has_opencl:
+            list.append(('OPENCL', "OpenCL", "Use OpenCL for GPU acceleration", 2))
        return list

    compute_device_type: EnumProperty(
@@ -1295,7 +1486,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):

    def update_device_entries(self, device_list):
        for device in device_list:
-            if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP'}:
+            if not device[1] in {'CUDA', 'OPTIX', 'OPENCL', 'CPU'}:
                continue
            # Try to find existing Device entry
            entry = self.find_existing_device_entry(device)
@@ -1329,23 +1520,22 @@ class CyclesPreferences(bpy.types.AddonPreferences):
            elif entry.type == 'CPU':
                cpu_devices.append(entry)
        # Extend all GPU devices with CPU.
-        if compute_device_type != 'CPU' and compute_device_type != 'HIP':
+        if compute_device_type in {'CUDA', 'OPTIX', 'OPENCL'}:
            devices.extend(cpu_devices)
        return devices

-    # Refresh device list. This does not happen automatically on Blender
-    # startup due to unstable OpenCL implementations that can cause crashes.
-    def refresh_devices(self):
+    # For backwards compatibility, only returns CUDA and OpenCL but still
+    # refreshes all devices.
+    def get_devices(self, compute_device_type=''):
        import _cycles
        # Ensure `self.devices` is not re-allocated when the second call to
        # get_devices_for_type is made, freeing items from the first list.
-        for device_type in ('CUDA', 'OPTIX', 'HIP'):
+        for device_type in ('CUDA', 'OPTIX', 'OPENCL'):
            self.update_device_entries(_cycles.available_devices(device_type))

-    # Deprecated: use refresh_devices instead.
-    def get_devices(self, compute_device_type=''):
-        self.refresh_devices()
-        return None
+        cuda_devices = self.get_devices_for_type('CUDA')
+        opencl_devices = self.get_devices_for_type('OPENCL')
+        return cuda_devices, opencl_devices

    def get_num_gpu_devices(self):
        import _cycles
@@ -1411,10 +1601,6 @@ class CyclesView3DShadingSettings(bpy.types.PropertyGroup):
        items=enum_view3d_shading_render_pass,
        default='COMBINED',
    )
-    show_active_pixels: BoolProperty(
-        name="Show Active Pixels",
-        description="When using adaptive sampling highlight pixels which are being sampled",
-    )


 def register():
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -34,12 +34,6 @@ class CYCLES_PT_sampling_presets(PresetPanel, Panel):
    preset_add_operator = "render.cycles_sampling_preset_add"
    COMPAT_ENGINES = {'CYCLES'}

-class CYCLES_PT_viewport_sampling_presets(PresetPanel, Panel):
-    bl_label = "Viewport Sampling Presets"
-    preset_subdir = "cycles/viewport_sampling"
-    preset_operator = "script.execute_preset"
-    preset_add_operator = "render.cycles_viewport_sampling_preset_add"
-    COMPAT_ENGINES = {'CYCLES'}

 class CYCLES_PT_integrator_presets(PresetPanel, Panel):
    bl_label = "Integrator Presets"
@@ -60,15 +54,6 @@ class CyclesButtonsPanel:
        return context.engine in cls.COMPAT_ENGINES


-class CyclesDebugButtonsPanel(CyclesButtonsPanel):
-    @classmethod
-    def poll(cls, context):
-        prefs = bpy.context.preferences
-        return (CyclesButtonsPanel.poll(context)
-                and prefs.experimental.use_cycles_debug
-                and prefs.view.show_developer_ui)
-
-
 # Adapt properties editor panel to display in node editor. We have to
 # copy the class rather than inherit due to the way bpy registration works.
 def node_panel(cls):
@@ -93,23 +78,30 @@ def use_cpu(context):
    return (get_device_type(context) == 'NONE' or cscene.device == 'CPU')


+def use_opencl(context):
+    cscene = context.scene.cycles
+
+    return (get_device_type(context) == 'OPENCL' and cscene.device == 'GPU')
+
+
 def use_cuda(context):
    cscene = context.scene.cycles

    return (get_device_type(context) == 'CUDA' and cscene.device == 'GPU')


-def use_hip(context):
-    cscene = context.scene.cycles
-
-    return (get_device_type(context) == 'HIP' and cscene.device == 'GPU')
-
 def use_optix(context):
    cscene = context.scene.cycles

    return (get_device_type(context) == 'OPTIX' and cscene.device == 'GPU')


+def use_branched_path(context):
+    cscene = context.scene.cycles
+
+    return (cscene.progressive == 'BRANCHED_PATH' and not use_optix(context))
+
+
 def use_sample_all_lights(context):
    cscene = context.scene.cycles

@@ -123,93 +115,55 @@ def show_device_active(context):
    return context.preferences.addons[__package__].preferences.has_active_device()


-def get_effective_preview_denoiser(context):
-    scene = context.scene
-    cscene = scene.cycles
+def draw_samples_info(layout, context):
+    cscene = context.scene.cycles
+    integrator = cscene.progressive

-    if cscene.preview_denoiser != "AUTO":
-        return cscene.preview_denoiser
+    # Calculate sample values
+    if integrator == 'PATH':
+        aa = cscene.samples
+        if cscene.use_square_samples:
+            aa = aa * aa
+    else:
+        aa = cscene.aa_samples
+        d = cscene.diffuse_samples
+        g = cscene.glossy_samples
+        t = cscene.transmission_samples
+        ao = cscene.ao_samples
+        ml = cscene.mesh_light_samples
+        sss = cscene.subsurface_samples
+        vol = cscene.volume_samples

-    if context.preferences.addons[__package__].preferences.get_devices_for_type('OPTIX'):
-        return 'OPTIX'
-
-    return 'OIDN'
+        if cscene.use_square_samples:
+            aa = aa * aa
+            d = d * d
+            g = g * g
+            t = t * t
+            ao = ao * ao
+            ml = ml * ml
+            sss = sss * sss
+            vol = vol * vol

+    # Draw interface
+    # Do not draw for progressive, when Square Samples are disabled
+    if use_branched_path(context) or (cscene.use_square_samples and integrator == 'PATH'):
+        col = layout.column(align=True)
+        col.scale_y = 0.6
+        col.label(text="Total Samples:")
+        col.separator()
+        if integrator == 'PATH':
+            col.label(text="%s AA" % aa)
+        else:
+            col.label(text="%s AA, %s Diffuse, %s Glossy, %s Transmission" %
+                      (aa, d * aa, g * aa, t * aa))
+            col.separator()
+            col.label(text="%s AO, %s Mesh Light, %s Subsurface, %s Volume" %
+                      (ao * aa, ml * aa, sss * aa, vol * aa))


 class CYCLES_RENDER_PT_sampling(CyclesButtonsPanel, Panel):
    bl_label = "Sampling"

-    def draw(self, context):
-        pass
-
-
-class CYCLES_RENDER_PT_sampling_viewport(CyclesButtonsPanel, Panel):
-    bl_label = "Viewport"
-    bl_parent_id = "CYCLES_RENDER_PT_sampling"
-
-    def draw_header_preset(self, context):
-        CYCLES_PT_viewport_sampling_presets.draw_panel_header(self.layout)
-
-    def draw(self, context):
-        layout = self.layout
-
-        scene = context.scene
-        cscene = scene.cycles
-
-        layout.use_property_split = True
-        layout.use_property_decorate = False
-
-        heading = layout.column(align=True, heading="Noise Threshold")
-        row = heading.row(align=True)
-        row.prop(cscene, "use_preview_adaptive_sampling", text="")
-        sub = row.row()
-        sub.active = cscene.use_preview_adaptive_sampling
-        sub.prop(cscene, "preview_adaptive_threshold", text="")
-
-        if cscene.use_preview_adaptive_sampling:
-            col = layout.column(align=True)
-            col.prop(cscene, "preview_samples", text=" Max Samples")
-            col.prop(cscene, "preview_adaptive_min_samples", text="Min Samples")
-        else:
-            layout.prop(cscene, "preview_samples", text="Samples")
-
-
-class CYCLES_RENDER_PT_sampling_viewport_denoise(CyclesButtonsPanel, Panel):
-    bl_label = "Denoise"
-    bl_parent_id = 'CYCLES_RENDER_PT_sampling_viewport'
-    bl_options = {'DEFAULT_CLOSED'}
-
-    def draw_header(self, context):
-        scene = context.scene
-        cscene = scene.cycles
-
-        self.layout.prop(context.scene.cycles, "use_preview_denoising", text="")
-
-    def draw(self, context):
-        layout = self.layout
-        layout.use_property_split = True
-        layout.use_property_decorate = False
-
-        scene = context.scene
-        cscene = scene.cycles
-
-        col = layout.column()
-        col.active = cscene.use_preview_denoising
-        col.prop(cscene, "preview_denoiser", text="Denoiser")
-        col.prop(cscene, "preview_denoising_input_passes", text="Passes")
-
-        effective_preview_denoiser = get_effective_preview_denoiser(context)
-        if effective_preview_denoiser == 'OPENIMAGEDENOISE':
-            col.prop(cscene, "preview_denoising_prefilter", text="Prefilter")
-
-        col.prop(cscene, "preview_denoising_start_sample", text="Start Sample")
-
-
-class CYCLES_RENDER_PT_sampling_render(CyclesButtonsPanel, Panel):
-    bl_label = "Render"
-    bl_parent_id = "CYCLES_RENDER_PT_sampling"
-
    def draw_header_preset(self, context):
        CYCLES_PT_sampling_presets.draw_panel_header(self.layout)

@@ -222,32 +176,29 @@ class CYCLES_RENDER_PT_sampling_render(CyclesButtonsPanel, Panel):
        layout.use_property_split = True
        layout.use_property_decorate = False

-        heading = layout.column(align=True, heading="Noise Threshold")
-        row = heading.row(align=True)
-        row.prop(cscene, "use_adaptive_sampling", text="")
-        sub = row.row()
-        sub.active = cscene.use_adaptive_sampling
-        sub.prop(cscene, "adaptive_threshold", text="")
+        if not use_optix(context):
+            layout.prop(cscene, "progressive")

-        col = layout.column(align=True)
-        if cscene.use_adaptive_sampling:
-            col.prop(cscene, "samples", text=" Max Samples")
-            col.prop(cscene, "adaptive_min_samples", text="Min Samples")
+        if not use_branched_path(context):
+            col = layout.column(align=True)
+            col.prop(cscene, "samples", text="Render")
+            col.prop(cscene, "preview_samples", text="Viewport")
        else:
-            col.prop(cscene, "samples", text="Samples")
-        col.prop(cscene, "time_limit")
+            col = layout.column(align=True)
+            col.prop(cscene, "aa_samples", text="Render")
+            col.prop(cscene, "preview_aa_samples", text="Viewport")
+
+        if not use_branched_path(context):
+            draw_samples_info(layout, context)


-class CYCLES_RENDER_PT_sampling_render_denoise(CyclesButtonsPanel, Panel):
-    bl_label = "Denoise"
-    bl_parent_id = 'CYCLES_RENDER_PT_sampling_render'
-    bl_options = {'DEFAULT_CLOSED'}
+class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
+    bl_label = "Sub Samples"
+    bl_parent_id = "CYCLES_RENDER_PT_sampling"

-    def draw_header(self, context):
-        scene = context.scene
-        cscene = scene.cycles
-
-        self.layout.prop(context.scene.cycles, "use_denoising", text="")
+    @classmethod
+    def poll(cls, context):
+        return use_branched_path(context)

    def draw(self, context):
        layout = self.layout
@@ -257,12 +208,88 @@ class CYCLES_RENDER_PT_sampling_render_denoise(CyclesButtonsPanel, Panel):
        scene = context.scene
        cscene = scene.cycles

-        col = layout.column()
-        col.active = cscene.use_denoising
-        col.prop(cscene, "denoiser", text="Denoiser")
-        col.prop(cscene, "denoising_input_passes", text="Passes")
-        if cscene.denoiser == 'OPENIMAGEDENOISE':
-            col.prop(cscene, "denoising_prefilter", text="Prefilter")
+        col = layout.column(align=True)
+        col.prop(cscene, "diffuse_samples", text="Diffuse")
+        col.prop(cscene, "glossy_samples", text="Glossy")
+        col.prop(cscene, "transmission_samples", text="Transmission")
+        col.prop(cscene, "ao_samples", text="AO")
+
+        sub = col.row(align=True)
+        sub.active = use_sample_all_lights(context)
+        sub.prop(cscene, "mesh_light_samples", text="Mesh Light")
+        col.prop(cscene, "subsurface_samples", text="Subsurface")
+        col.prop(cscene, "volume_samples", text="Volume")
+
+        draw_samples_info(layout, context)
+
+
+class CYCLES_RENDER_PT_sampling_adaptive(CyclesButtonsPanel, Panel):
+    bl_label = "Adaptive Sampling"
+    bl_parent_id = "CYCLES_RENDER_PT_sampling"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    def draw_header(self, context):
+        layout = self.layout
+        scene = context.scene
+        cscene = scene.cycles
+
+        layout.prop(cscene, "use_adaptive_sampling", text="")
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        cscene = scene.cycles
+
+        layout.active = cscene.use_adaptive_sampling
+
+        col = layout.column(align=True)
+        col.prop(cscene, "adaptive_threshold", text="Noise Threshold")
+        col.prop(cscene, "adaptive_min_samples", text="Min Samples")
+
+
+class CYCLES_RENDER_PT_sampling_denoising(CyclesButtonsPanel, Panel):
+    bl_label = "Denoising"
+    bl_parent_id = "CYCLES_RENDER_PT_sampling"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        cscene = scene.cycles
+
+        heading = layout.column(align=True, heading="Render")
+        row = heading.row(align=True)
+        row.prop(cscene, "use_denoising", text="")
+        sub = row.row()
+
+        sub.active = cscene.use_denoising
+        for view_layer in scene.view_layers:
+            if view_layer.cycles.denoising_store_passes:
+                sub.active = True
+
+        sub.prop(cscene, "denoiser", text="")
+
+        layout.separator()
+
+        heading = layout.column(align=False, heading="Viewport")
+        row = heading.row(align=True)
+        row.prop(cscene, "use_preview_denoising", text="")
+        sub = row.row()
+        sub.active = cscene.use_preview_denoising
+        sub.prop(cscene, "preview_denoiser", text="")
+
+        sub = heading.row(align=True)
+        sub.active = cscene.use_preview_denoising
+        sub.prop(cscene, "preview_denoising_start_sample", text="Start Sample")
+        sub = heading.row(align=True)
+        sub.active = cscene.use_preview_denoising
+        sub.prop(cscene, "preview_denoising_input_passes", text="Input Passes")


 class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
@@ -286,6 +313,8 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
        col.active = not(cscene.use_adaptive_sampling)
        col.prop(cscene, "sampling_pattern", text="Pattern")

+        layout.prop(cscene, "use_square_samples")
+
        layout.separator()

        col = layout.column(align=True)
@@ -293,6 +322,11 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
        col.prop(cscene, "min_transparent_bounces")
        col.prop(cscene, "light_sampling_threshold", text="Light Threshold")

+        if cscene.progressive != 'PATH' and use_branched_path(context):
+            col = layout.column(align=True)
+            col.prop(cscene, "sample_all_lights_direct")
+            col.prop(cscene, "sample_all_lights_indirect")
+
        for view_layer in scene.view_layers:
            if view_layer.samples > 0:
                layout.separator()
@@ -300,6 +334,62 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
                break


+class CYCLES_RENDER_PT_sampling_total(CyclesButtonsPanel, Panel):
+    bl_label = "Total Samples"
+    bl_parent_id = "CYCLES_RENDER_PT_sampling"
+
+    @classmethod
+    def poll(cls, context):
+        scene = context.scene
+        cscene = scene.cycles
+
+        if cscene.use_square_samples:
+            return True
+
+        return cscene.progressive != 'PATH' and use_branched_path(context)
+
+    def draw(self, context):
+        layout = self.layout
+        cscene = context.scene.cycles
+        integrator = cscene.progressive
+
+        # Calculate sample values
+        if integrator == 'PATH':
+            aa = cscene.samples
+            if cscene.use_square_samples:
+                aa = aa * aa
+        else:
+            aa = cscene.aa_samples
+            d = cscene.diffuse_samples
+            g = cscene.glossy_samples
+            t = cscene.transmission_samples
+            ao = cscene.ao_samples
+            ml = cscene.mesh_light_samples
+            sss = cscene.subsurface_samples
+            vol = cscene.volume_samples
+
+            if cscene.use_square_samples:
+                aa = aa * aa
+                d = d * d
+                g = g * g
+                t = t * t
+                ao = ao * ao
+                ml = ml * ml
+                sss = sss * sss
+                vol = vol * vol
+
+        col = layout.column(align=True)
+        col.scale_y = 0.6
+        if integrator == 'PATH':
+            col.label(text="%s AA" % aa)
+        else:
+            col.label(text="%s AA, %s Diffuse, %s Glossy, %s Transmission" %
+                      (aa, d * aa, g * aa, t * aa))
+            col.separator()
+            col.label(text="%s AO, %s Mesh Light, %s Subsurface, %s Volume" %
+                      (ao * aa, ml * aa, sss * aa, vol * aa))
+
+
 class CYCLES_RENDER_PT_subdivision(CyclesButtonsPanel, Panel):
    bl_label = "Subdivision"
    bl_options = {'DEFAULT_CLOSED'}
@@ -458,8 +548,6 @@ class CYCLES_RENDER_PT_light_paths_fast_gi(CyclesButtonsPanel, Panel):
        layout.use_property_split = True
        layout.use_property_decorate = False

-        layout.active = cscene.use_fast_gi
-
        col = layout.column(align=True)
        col.prop(cscene, "ao_bounces", text="Viewport Bounces")
        col.prop(cscene, "ao_bounces_render", text="Render Bounces")
@@ -618,8 +706,8 @@ class CYCLES_RENDER_PT_performance_threads(CyclesButtonsPanel, Panel):
        sub.prop(rd, "threads")


-class CYCLES_RENDER_PT_performance_memory(CyclesButtonsPanel, Panel):
-    bl_label = "Memory"
+class CYCLES_RENDER_PT_performance_tiles(CyclesButtonsPanel, Panel):
+    bl_label = "Tiles"
    bl_parent_id = "CYCLES_RENDER_PT_performance"

    def draw(self, context):
@@ -628,13 +716,19 @@ class CYCLES_RENDER_PT_performance_memory(CyclesButtonsPanel, Panel):
        layout.use_property_decorate = False

        scene = context.scene
+        rd = scene.render
        cscene = scene.cycles

        col = layout.column()
-        col.prop(cscene, "use_auto_tile")
+
+        sub = col.column(align=True)
+        sub.prop(rd, "tile_x", text="Tiles X")
+        sub.prop(rd, "tile_y", text="Y")
+        col.prop(cscene, "tile_order", text="Order")
+
        sub = col.column()
-        sub.active = cscene.use_auto_tile
-        sub.prop(cscene, "tile_size")
+        sub.active = not rd.use_save_buffers and not cscene.use_adaptive_sampling
+        sub.prop(cscene, "use_progressive_refine")


 class CYCLES_RENDER_PT_performance_acceleration_structure(CyclesButtonsPanel, Panel):
@@ -684,6 +778,7 @@ class CYCLES_RENDER_PT_performance_final_render(CyclesButtonsPanel, Panel):

        col = layout.column()

+        col.prop(rd, "use_save_buffers")
        col.prop(rd, "use_persistent_data", text="Persistent Data")


@@ -702,6 +797,7 @@ class CYCLES_RENDER_PT_performance_viewport(CyclesButtonsPanel, Panel):

        col = layout.column()
        col.prop(rd, "preview_pixel_size", text="Pixel Size")
+        col.prop(cscene, "preview_start_resolution", text="Start Pixels")


 class CYCLES_RENDER_PT_filter(CyclesButtonsPanel, Panel):
@@ -722,6 +818,7 @@ class CYCLES_RENDER_PT_filter(CyclesButtonsPanel, Panel):

        col = layout.column(heading="Include")
        col.prop(view_layer, "use_sky", text="Environment")
+        col.prop(view_layer, "use_ao", text="Ambient Occlusion")
        col.prop(view_layer, "use_solid", text="Surfaces")
        col.prop(view_layer, "use_strand", text="Hair")
        col.prop(view_layer, "use_volumes", text="Volumes")
@@ -730,9 +827,6 @@ class CYCLES_RENDER_PT_filter(CyclesButtonsPanel, Panel):
        sub = col.row()
        sub.prop(view_layer, "use_motion_blur", text="Motion Blur")
        sub.active = rd.use_motion_blur
-        sub = col.row()
-        sub.prop(view_layer.cycles, 'use_denoising', text='Denoising')
-        sub.active = scene.cycles.use_denoising


 class CYCLES_RENDER_PT_override(CyclesButtonsPanel, Panel):
@@ -778,7 +872,6 @@ class CYCLES_RENDER_PT_passes_data(CyclesButtonsPanel, Panel):
        col.prop(view_layer, "use_pass_combined")
        col.prop(view_layer, "use_pass_z")
        col.prop(view_layer, "use_pass_mist")
-        col.prop(view_layer, "use_pass_position")
        col.prop(view_layer, "use_pass_normal")
        sub = col.column()
        sub.active = not rd.use_motion_blur
@@ -792,6 +885,7 @@ class CYCLES_RENDER_PT_passes_data(CyclesButtonsPanel, Panel):
        col.prop(view_layer, "use_pass_material_index")

        col = layout.column(heading="Debug", align=True)
+        col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time")
        col.prop(cycles_view_layer, "pass_debug_sample_count", text="Sample Count")

        layout.prop(view_layer, "pass_alpha_threshold")
@@ -834,7 +928,6 @@ class CYCLES_RENDER_PT_passes_light(CyclesButtonsPanel, Panel):
        col.prop(view_layer, "use_pass_environment")
        col.prop(view_layer, "use_pass_shadow")
        col.prop(view_layer, "use_pass_ambient_occlusion", text="Ambient Occlusion")
-        col.prop(cycles_view_layer, "use_pass_shadow_catcher")


 class CYCLES_RENDER_PT_passes_crypto(CyclesButtonsPanel, ViewLayerCryptomattePanel, Panel):
@@ -849,6 +942,70 @@ class CYCLES_RENDER_PT_passes_aov(CyclesButtonsPanel, ViewLayerAOVPanel):
    bl_parent_id = "CYCLES_RENDER_PT_passes"


+class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel):
+    bl_label = "Denoising"
+    bl_context = "view_layer"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    @classmethod
+    def poll(cls, context):
+        cscene = context.scene.cycles
+        return CyclesButtonsPanel.poll(context) and cscene.use_denoising
+
+    def draw_header(self, context):
+        scene = context.scene
+        view_layer = context.view_layer
+        cycles_view_layer = view_layer.cycles
+
+        layout = self.layout
+        layout.prop(cycles_view_layer, "use_denoising", text="")
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        view_layer = context.view_layer
+        cycles_view_layer = view_layer.cycles
+        denoiser = scene.cycles.denoiser
+
+        layout.active = denoiser != 'NONE' and cycles_view_layer.use_denoising
+
+        col = layout.column()
+
+        if denoiser == 'OPTIX':
+            col.prop(cycles_view_layer, "denoising_optix_input_passes")
+            return
+        elif denoiser == 'OPENIMAGEDENOISE':
+            col.prop(cycles_view_layer, "denoising_openimagedenoise_input_passes")
+            return
+
+        col.prop(cycles_view_layer, "denoising_radius", text="Radius")
+
+        col = layout.column()
+        col.prop(cycles_view_layer, "denoising_strength", slider=True, text="Strength")
+        col.prop(cycles_view_layer, "denoising_feature_strength", slider=True, text="Feature Strength")
+        col.prop(cycles_view_layer, "denoising_relative_pca")
+
+        layout.separator()
+
+        col = layout.column()
+        col.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes
+
+        row = col.row(heading="Diffuse", align=True)
+        row.prop(cycles_view_layer, "denoising_diffuse_direct", text="Direct", toggle=True)
+        row.prop(cycles_view_layer, "denoising_diffuse_indirect", text="Indirect", toggle=True)
+
+        row = col.row(heading="Glossy", align=True)
+        row.prop(cycles_view_layer, "denoising_glossy_direct", text="Direct", toggle=True)
+        row.prop(cycles_view_layer, "denoising_glossy_indirect", text="Indirect", toggle=True)
+
+        row = col.row(heading="Transmission", align=True)
+        row.prop(cycles_view_layer, "denoising_transmission_direct", text="Direct", toggle=True)
+        row.prop(cycles_view_layer, "denoising_transmission_indirect", text="Indirect", toggle=True)
+
+
 class CYCLES_PT_post_processing(CyclesButtonsPanel, Panel):
    bl_label = "Post Processing"
    bl_options = {'DEFAULT_CLOSED'}
@@ -1260,6 +1417,10 @@ class CYCLES_LIGHT_PT_light(CyclesButtonsPanel, Panel):

        if not (light.type == 'AREA' and clamp.is_portal):
            sub = col.column()
+            if use_branched_path(context):
+                subsub = sub.row(align=True)
+                subsub.active = use_sample_all_lights(context)
+                subsub.prop(clamp, "samples")
            sub.prop(clamp, "max_bounces")

        sub = col.column(align=True)
@@ -1365,6 +1526,34 @@ class CYCLES_WORLD_PT_volume(CyclesButtonsPanel, Panel):
        panel_node_draw(layout, world, 'OUTPUT_WORLD', 'Volume')


+class CYCLES_WORLD_PT_ambient_occlusion(CyclesButtonsPanel, Panel):
+    bl_label = "Ambient Occlusion"
+    bl_context = "world"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    @classmethod
+    def poll(cls, context):
+        return context.world and CyclesButtonsPanel.poll(context)
+
+    def draw_header(self, context):
+        light = context.world.light_settings
+        self.layout.prop(light, "use_ambient_occlusion", text="")
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        light = context.world.light_settings
+        scene = context.scene
+
+        col = layout.column()
+        sub = col.column()
+        sub.active = light.use_ambient_occlusion or scene.render.use_simplify
+        sub.prop(light, "ao_factor", text="Factor")
+        col.prop(light, "distance", text="Distance")
+
+
 class CYCLES_WORLD_PT_mist(CyclesButtonsPanel, Panel):
    bl_label = "Mist Pass"
    bl_context = "world"
@@ -1461,6 +1650,10 @@ class CYCLES_WORLD_PT_settings_surface(CyclesButtonsPanel, Panel):
        subsub = sub.row(align=True)
        subsub.active = cworld.sampling_method == 'MANUAL'
        subsub.prop(cworld, "sample_map_resolution")
+        if use_branched_path(context):
+            subsub = sub.column(align=True)
+            subsub.active = use_sample_all_lights(context)
+            subsub.prop(cworld, "samples")
        sub.prop(cworld, "max_bounces")


@@ -1484,7 +1677,8 @@ class CYCLES_WORLD_PT_settings_volume(CyclesButtonsPanel, Panel):
        col = layout.column()

        sub = col.column()
-        col.prop(cworld, "volume_sampling", text="Sampling")
+        sub.active = use_cpu(context)
+        sub.prop(cworld, "volume_sampling", text="Sampling")
        col.prop(cworld, "volume_interpolation", text="Interpolation")
        col.prop(cworld, "homogeneous_volume", text="Homogeneous")
        sub = col.column()
@@ -1623,7 +1817,8 @@ class CYCLES_MATERIAL_PT_settings_volume(CyclesButtonsPanel, Panel):

        col = layout.column()
        sub = col.column()
-        col.prop(cmat, "volume_sampling", text="Sampling")
+        sub.active = use_cpu(context)
+        sub.prop(cmat, "volume_sampling", text="Sampling")
        col.prop(cmat, "volume_interpolation", text="Interpolation")
        col.prop(cmat, "homogeneous_volume", text="Homogeneous")
        sub = col.column()
@@ -1650,6 +1845,9 @@ class CYCLES_RENDER_PT_bake(CyclesButtonsPanel, Panel):
        cbk = scene.render.bake
        rd = scene.render

+        if use_optix(context):
+            layout.label(text="Baking is performed using CUDA instead of OptiX", icon='INFO')
+
        if rd.use_bake_multires:
            layout.operator("object.bake_image", icon='RENDER_STILL')
            layout.prop(rd, "use_bake_multires")
@@ -1707,6 +1905,7 @@ class CYCLES_RENDER_PT_bake_influence(CyclesButtonsPanel, Panel):
            col.prop(cbk, "use_pass_diffuse")
            col.prop(cbk, "use_pass_glossy")
            col.prop(cbk, "use_pass_transmission")
+            col.prop(cbk, "use_pass_ambient_occlusion")
            col.prop(cbk, "use_pass_emit")

        elif cscene.bake_type in {'DIFFUSE', 'GLOSSY', 'TRANSMISSION'}:
@@ -1790,12 +1989,19 @@ class CYCLES_RENDER_PT_bake_output(CyclesButtonsPanel, Panel):
                layout.prop(cbk, "use_clear", text="Clear Image")


-class CYCLES_RENDER_PT_debug(CyclesDebugButtonsPanel, Panel):
+class CYCLES_RENDER_PT_debug(CyclesButtonsPanel, Panel):
    bl_label = "Debug"
    bl_context = "render"
    bl_options = {'DEFAULT_CLOSED'}
    COMPAT_ENGINES = {'CYCLES'}

+    @classmethod
+    def poll(cls, context):
+        prefs = bpy.context.preferences
+        return (CyclesButtonsPanel.poll(context)
+                and prefs.experimental.use_cycles_debug
+                and prefs.view.show_developer_ui)
+
    def draw(self, context):
        layout = self.layout

@@ -1812,18 +2018,29 @@ class CYCLES_RENDER_PT_debug(CyclesDebugButtonsPanel, Panel):
        row.prop(cscene, "debug_use_cpu_avx", toggle=True)
        row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
        col.prop(cscene, "debug_bvh_layout")
+        col.prop(cscene, "debug_use_cpu_split_kernel")

        col.separator()

        col = layout.column()
        col.label(text="CUDA Flags:")
        col.prop(cscene, "debug_use_cuda_adaptive_compile")
+        col.prop(cscene, "debug_use_cuda_split_kernel")

        col.separator()

        col = layout.column()
        col.label(text="OptiX Flags:")
-        col.prop(cscene, "debug_use_optix_debug")
+        col.prop(cscene, "debug_optix_cuda_streams")
+        col.prop(cscene, "debug_optix_curves_api")
+
+        col.separator()
+
+        col = layout.column()
+        col.label(text="OpenCL Flags:")
+        col.prop(cscene, "debug_opencl_device_type", text="Device")
+        col.prop(cscene, "debug_use_opencl_debug", text="Debug")
+        col.prop(cscene, "debug_opencl_mem_limit")

        col.separator()

@@ -1924,22 +2141,20 @@ class CYCLES_RENDER_PT_simplify_culling(CyclesButtonsPanel, Panel):
        sub.prop(cscene, "distance_cull_margin", text="")


-class CyclesShadingButtonsPanel(CyclesButtonsPanel):
+class CYCLES_VIEW3D_PT_shading_render_pass(Panel):
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'HEADER'
+    bl_label = "Render Pass"
    bl_parent_id = 'VIEW3D_PT_shading'
+    COMPAT_ENGINES = {'CYCLES'}

    @classmethod
    def poll(cls, context):
        return (
-            CyclesButtonsPanel.poll(context) and
+            context.engine in cls.COMPAT_ENGINES and
            context.space_data.shading.type == 'RENDERED'
        )

-
-class CYCLES_VIEW3D_PT_shading_render_pass(CyclesShadingButtonsPanel, Panel):
-    bl_label = "Render Pass"
-
    def draw(self, context):
        shading = context.space_data.shading

@@ -1947,26 +2162,6 @@ class CYCLES_VIEW3D_PT_shading_render_pass(CyclesShadingButtonsPanel, Panel):
        layout.prop(shading.cycles, "render_pass", text="")


-class CYCLES_VIEW3D_PT_shading_debug(CyclesDebugButtonsPanel,
-                                     CyclesShadingButtonsPanel,
-                                     Panel):
-    bl_label = "Debug"
-
-    @classmethod
-    def poll(cls, context):
-        return (
-            CyclesDebugButtonsPanel.poll(context) and
-            CyclesShadingButtonsPanel.poll(context)
-        )
-
-    def draw(self, context):
-        shading = context.space_data.shading
-
-        layout = self.layout
-        layout.active = context.scene.cycles.use_preview_adaptive_sampling
-        layout.prop(shading.cycles, "show_active_pixels")
-
-
 class CYCLES_VIEW3D_PT_shading_lighting(Panel):
    bl_space_type = 'VIEW_3D'
    bl_region_type = 'HEADER'
@@ -2080,13 +2275,11 @@ def get_panels():

 classes = (
    CYCLES_PT_sampling_presets,
-    CYCLES_PT_viewport_sampling_presets,
    CYCLES_PT_integrator_presets,
    CYCLES_RENDER_PT_sampling,
-    CYCLES_RENDER_PT_sampling_viewport,
-    CYCLES_RENDER_PT_sampling_viewport_denoise,
-    CYCLES_RENDER_PT_sampling_render,
-    CYCLES_RENDER_PT_sampling_render_denoise,
+    CYCLES_RENDER_PT_sampling_sub_samples,
+    CYCLES_RENDER_PT_sampling_adaptive,
+    CYCLES_RENDER_PT_sampling_denoising,
    CYCLES_RENDER_PT_sampling_advanced,
    CYCLES_RENDER_PT_light_paths,
    CYCLES_RENDER_PT_light_paths_max_bounces,
@@ -2103,7 +2296,6 @@ classes = (
    CYCLES_VIEW3D_PT_simplify_greasepencil,
    CYCLES_VIEW3D_PT_shading_lighting,
    CYCLES_VIEW3D_PT_shading_render_pass,
-    CYCLES_VIEW3D_PT_shading_debug,
    CYCLES_RENDER_PT_motion_blur,
    CYCLES_RENDER_PT_motion_blur_curve,
    CYCLES_RENDER_PT_film,
@@ -2111,7 +2303,7 @@ classes = (
    CYCLES_RENDER_PT_film_transparency,
    CYCLES_RENDER_PT_performance,
    CYCLES_RENDER_PT_performance_threads,
-    CYCLES_RENDER_PT_performance_memory,
+    CYCLES_RENDER_PT_performance_tiles,
    CYCLES_RENDER_PT_performance_acceleration_structure,
    CYCLES_RENDER_PT_performance_final_render,
    CYCLES_RENDER_PT_performance_viewport,
@@ -2122,6 +2314,7 @@ classes = (
    CYCLES_RENDER_PT_passes_aov,
    CYCLES_RENDER_PT_filter,
    CYCLES_RENDER_PT_override,
+    CYCLES_RENDER_PT_denoising,
    CYCLES_PT_post_processing,
    CYCLES_CAMERA_PT_dof,
    CYCLES_CAMERA_PT_dof_aperture,
@@ -2140,6 +2333,7 @@ classes = (
    CYCLES_WORLD_PT_preview,
    CYCLES_WORLD_PT_surface,
    CYCLES_WORLD_PT_volume,
+    CYCLES_WORLD_PT_ambient_occlusion,
    CYCLES_WORLD_PT_mist,
    CYCLES_WORLD_PT_ray_visibility,
    CYCLES_WORLD_PT_settings,
--- a/intern/cycles/blender/addon/version_update.py
+++ b/intern/cycles/blender/addon/version_update.py
@@ -109,7 +109,7 @@ def do_versions(self):
        library_versions.setdefault(library.version, []).append(library)

    # Do versioning per library, since they might have different versions.
-    max_need_versioning = (3, 0, 25)
+    max_need_versioning = (2, 93, 7)
    for version, libraries in library_versions.items():
        if version > max_need_versioning:
            continue
@@ -166,6 +166,10 @@ def do_versions(self):
                if not cscene.is_property_set("filter_type"):
                    cscene.pixel_filter_type = 'GAUSSIAN'

+                # Tile Order
+                if not cscene.is_property_set("tile_order"):
+                    cscene.tile_order = 'CENTER'
+
            if version <= (2, 76, 10):
                cscene = scene.cycles
                if cscene.is_property_set("filter_type"):
@@ -182,6 +186,10 @@ def do_versions(self):
            if version <= (2, 79, 0):
                cscene = scene.cycles
                # Default changes
+                if not cscene.is_property_set("aa_samples"):
+                    cscene.aa_samples = 4
+                if not cscene.is_property_set("preview_aa_samples"):
+                    cscene.preview_aa_samples = 4
                if not cscene.is_property_set("blur_glossy"):
                    cscene.blur_glossy = 0.0
                if not cscene.is_property_set("sample_clamp_indirect"):
@@ -195,6 +203,7 @@ def do_versions(self):
                    view_layer.use_pass_cryptomatte_material = cview_layer.get("use_pass_crypto_material", False)
                    view_layer.use_pass_cryptomatte_asset = cview_layer.get("use_pass_crypto_asset", False)
                    view_layer.pass_cryptomatte_depth = cview_layer.get("pass_crypto_depth", 6)
+                    view_layer.use_pass_cryptomatte_accurate = cview_layer.get("pass_crypto_accurate", True)

            if version <= (2, 93, 7):
                if scene.render.engine == 'CYCLES':
@@ -220,35 +229,6 @@ def do_versions(self):
                    cscene.ao_bounces = 1
                    cscene.ao_bounces_render = 1

-            if version <= (3, 0, 25):
-                cscene = scene.cycles
-
-                # Default changes.
-                if not cscene.is_property_set("samples"):
-                    cscene.samples = 128
-                if not cscene.is_property_set("preview_samples"):
-                    cscene.preview_samples = 32
-                if not cscene.is_property_set("use_adaptive_sampling"):
-                    cscene.use_adaptive_sampling = False
-                    cscene.use_preview_adaptive_sampling = False
-                if not cscene.is_property_set("use_denoising"):
-                    cscene.use_denoising = False
-                if not cscene.is_property_set("use_preview_denoising"):
-                    cscene.use_preview_denoising = False
-                if not cscene.is_property_set("sampling_pattern"):
-                    cscene.sampling_pattern = 'PROGRESSIVE_MUTI_JITTER'
-
-                # Removal of square samples.
-                cscene = scene.cycles
-                use_square_samples = cscene.get("use_square_samples", False)
-
-                if use_square_samples:
-                    cscene.samples *= cscene.samples
-                    cscene.preview_samples *= cscene.preview_samples
-                    for layer in scene.view_layers:
-                        layer.samples *= layer.samples
-                    cscene["use_square_samples"] = False
-
        # Lamps
        for light in bpy.data.lights:
            if light.library not in libraries:
@@ -269,6 +249,10 @@ def do_versions(self):
            if version <= (2, 76, 9):
                cworld = world.cycles

+                # World MIS Samples
+                if not cworld.is_property_set("samples"):
+                    cworld.samples = 4
+
                # World MIS Resolution
                if not cworld.is_property_set("sample_map_resolution"):
                    cworld.sample_map_resolution = 256
--- a/intern/cycles/blender/blender_camera.cpp
+++ b/intern/cycles/blender/blender_camera.cpp
@@ -894,8 +894,12 @@ void BlenderSync::sync_view(BL::SpaceView3D &b_v3d,
  }
 }

-BufferParams BlenderSync::get_buffer_params(
-    BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, Camera *cam, int width, int height)
+BufferParams BlenderSync::get_buffer_params(BL::SpaceView3D &b_v3d,
+                                            BL::RegionView3D &b_rv3d,
+                                            Camera *cam,
+                                            int width,
+                                            int height,
+                                            const bool use_denoiser)
 {
  BufferParams params;
  bool use_border = false;
@@ -927,6 +931,11 @@ BufferParams BlenderSync::get_buffer_params(
    params.height = height;
  }

+  PassType display_pass = update_viewport_display_passes(b_v3d, params.passes);
+
+  /* Can only denoise the combined image pass */
+  params.denoising_data_pass = display_pass == PASS_COMBINED && use_denoiser;
+
  return params;
 }

--- a/intern/cycles/blender/blender_curves.cpp
+++ b/intern/cycles/blender/blender_curves.cpp
@@ -283,13 +283,10 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
    return;

  Attribute *attr_intercept = NULL;
-  Attribute *attr_length = NULL;
  Attribute *attr_random = NULL;

  if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT))
    attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
-  if (hair->need_attribute(scene, ATTR_STD_CURVE_LENGTH))
-    attr_length = hair->attributes.add(ATTR_STD_CURVE_LENGTH);
  if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM))
    attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM);

@@ -339,10 +336,6 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
        num_curve_keys++;
      }

-      if (attr_length != NULL) {
-        attr_length->add(CData->curve_length[curve]);
-      }
-
      if (attr_random != NULL) {
        attr_random->add(hash_uint2_to_float(num_curves, 0));
      }
@@ -664,15 +657,11 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)

  /* Add requested attributes. */
  Attribute *attr_intercept = NULL;
-  Attribute *attr_length = NULL;
  Attribute *attr_random = NULL;

  if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT)) {
    attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
  }
-  if (hair->need_attribute(scene, ATTR_STD_CURVE_LENGTH)) {
-    attr_length = hair->attributes.add(ATTR_STD_CURVE_LENGTH);
-  }
  if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM)) {
    attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM);
  }
@@ -725,10 +714,6 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)
      }
    }

-    if (attr_length) {
-      attr_length->add(length);
-    }
-
    /* Random number per curve. */
    if (attr_random != NULL) {
      attr_random->add(hash_uint2_to_float(b_curve.index(), 0));
--- a/intern/cycles/blender/blender_device.cpp
+++ b/intern/cycles/blender/blender_device.cpp
@@ -25,8 +25,8 @@ CCL_NAMESPACE_BEGIN
 enum ComputeDevice {
  COMPUTE_DEVICE_CPU = 0,
  COMPUTE_DEVICE_CUDA = 1,
+  COMPUTE_DEVICE_OPENCL = 2,
  COMPUTE_DEVICE_OPTIX = 3,
-  COMPUTE_DEVICE_HIP = 4,

  COMPUTE_DEVICE_NUM
 };
@@ -68,6 +68,13 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
      device = Device::get_multi_device(devices, threads, background);
    }
  }
+  else if (get_enum(cscene, "device") == 2) {
+    /* Find network device. */
+    vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK_NETWORK);
+    if (!devices.empty()) {
+      device = devices.front();
+    }
+  }
  else if (get_enum(cscene, "device") == 1) {
    /* Test if we are using GPU devices. */
    ComputeDevice compute_device = (ComputeDevice)get_enum(
@@ -82,8 +89,8 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
      else if (compute_device == COMPUTE_DEVICE_OPTIX) {
        mask |= DEVICE_MASK_OPTIX;
      }
-      else if (compute_device == COMPUTE_DEVICE_HIP) {
-        mask |= DEVICE_MASK_HIP;
+      else if (compute_device == COMPUTE_DEVICE_OPENCL) {
+        mask |= DEVICE_MASK_OPENCL;
      }
      vector<DeviceInfo> devices = Device::available_devices(mask);

--- a/intern/cycles/blender/blender_display_driver.cpp
+++ b/intern/cycles/blender/blender_display_driver.cpp
@@ -1,754 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "blender/blender_display_driver.h"
-
-#include "device/device.h"
-#include "util/util_logging.h"
-#include "util/util_opengl.h"
-
-extern "C" {
-struct RenderEngine;
-
-bool RE_engine_has_render_context(struct RenderEngine *engine);
-void RE_engine_render_context_enable(struct RenderEngine *engine);
-void RE_engine_render_context_disable(struct RenderEngine *engine);
-
-bool DRW_opengl_context_release();
-void DRW_opengl_context_activate(bool drw_state);
-
-void *WM_opengl_context_create();
-void WM_opengl_context_activate(void *gl_context);
-void WM_opengl_context_dispose(void *gl_context);
-void WM_opengl_context_release(void *context);
-}
-
-CCL_NAMESPACE_BEGIN
-
-/* --------------------------------------------------------------------
- * BlenderDisplayShader.
- */
-
-unique_ptr<BlenderDisplayShader> BlenderDisplayShader::create(BL::RenderEngine &b_engine,
-                                                              BL::Scene &b_scene)
-{
-  if (b_engine.support_display_space_shader(b_scene)) {
-    return make_unique<BlenderDisplaySpaceShader>(b_engine, b_scene);
-  }
-
-  return make_unique<BlenderFallbackDisplayShader>();
-}
-
-int BlenderDisplayShader::get_position_attrib_location()
-{
-  if (position_attribute_location_ == -1) {
-    const uint shader_program = get_shader_program();
-    position_attribute_location_ = glGetAttribLocation(shader_program, position_attribute_name);
-  }
-  return position_attribute_location_;
-}
-
-int BlenderDisplayShader::get_tex_coord_attrib_location()
-{
-  if (tex_coord_attribute_location_ == -1) {
-    const uint shader_program = get_shader_program();
-    tex_coord_attribute_location_ = glGetAttribLocation(shader_program, tex_coord_attribute_name);
-  }
-  return tex_coord_attribute_location_;
-}
-
-/* --------------------------------------------------------------------
- * BlenderFallbackDisplayShader.
- */
-
-/* TODO move shaders to standalone .glsl file. */
-static const char *FALLBACK_VERTEX_SHADER =
-    "#version 330\n"
-    "uniform vec2 fullscreen;\n"
-    "in vec2 texCoord;\n"
-    "in vec2 pos;\n"
-    "out vec2 texCoord_interp;\n"
-    "\n"
-    "vec2 normalize_coordinates()\n"
-    "{\n"
-    "   return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
-    "}\n"
-    "\n"
-    "void main()\n"
-    "{\n"
-    "   gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
-    "   texCoord_interp = texCoord;\n"
-    "}\n\0";
-
-static const char *FALLBACK_FRAGMENT_SHADER =
-    "#version 330\n"
-    "uniform sampler2D image_texture;\n"
-    "in vec2 texCoord_interp;\n"
-    "out vec4 fragColor;\n"
-    "\n"
-    "void main()\n"
-    "{\n"
-    "   fragColor = texture(image_texture, texCoord_interp);\n"
-    "}\n\0";
-
-static void shader_print_errors(const char *task, const char *log, const char *code)
-{
-  LOG(ERROR) << "Shader: " << task << " error:";
-  LOG(ERROR) << "===== shader string ====";
-
-  stringstream stream(code);
-  string partial;
-
-  int line = 1;
-  while (getline(stream, partial, '\n')) {
-    if (line < 10) {
-      LOG(ERROR) << " " << line << " " << partial;
-    }
-    else {
-      LOG(ERROR) << line << " " << partial;
-    }
-    line++;
-  }
-  LOG(ERROR) << log;
-}
-
-static int compile_fallback_shader(void)
-{
-  const struct Shader {
-    const char *source;
-    const GLenum type;
-  } shaders[2] = {{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
-                  {FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}};
-
-  const GLuint program = glCreateProgram();
-
-  for (int i = 0; i < 2; i++) {
-    const GLuint shader = glCreateShader(shaders[i].type);
-
-    string source_str = shaders[i].source;
-    const char *c_str = source_str.c_str();
-
-    glShaderSource(shader, 1, &c_str, NULL);
-    glCompileShader(shader);
-
-    GLint compile_status;
-    glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_status);
-
-    if (!compile_status) {
-      GLchar log[5000];
-      GLsizei length = 0;
-      glGetShaderInfoLog(shader, sizeof(log), &length, log);
-      shader_print_errors("compile", log, c_str);
-      return 0;
-    }
-
-    glAttachShader(program, shader);
-  }
-
-  /* Link output. */
-  glBindFragDataLocation(program, 0, "fragColor");
-
-  /* Link and error check. */
-  glLinkProgram(program);
-
-  /* TODO(sergey): Find a way to nicely de-duplicate the error checking. */
-  GLint link_status;
-  glGetProgramiv(program, GL_LINK_STATUS, &link_status);
-  if (!link_status) {
-    GLchar log[5000];
-    GLsizei length = 0;
-    /* TODO(sergey): Is it really program passed to glGetShaderInfoLog? */
-    glGetShaderInfoLog(program, sizeof(log), &length, log);
-    shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER);
-    shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER);
-    return 0;
-  }
-
-  return program;
-}
-
-void BlenderFallbackDisplayShader::bind(int width, int height)
-{
-  create_shader_if_needed();
-
-  if (!shader_program_) {
-    return;
-  }
-
-  glUseProgram(shader_program_);
-  glUniform1i(image_texture_location_, 0);
-  glUniform2f(fullscreen_location_, width, height);
-}
-
-void BlenderFallbackDisplayShader::unbind()
-{
-}
-
-uint BlenderFallbackDisplayShader::get_shader_program()
-{
-  return shader_program_;
-}
-
-void BlenderFallbackDisplayShader::create_shader_if_needed()
-{
-  if (shader_program_ || shader_compile_attempted_) {
-    return;
-  }
-
-  shader_compile_attempted_ = true;
-
-  shader_program_ = compile_fallback_shader();
-  if (!shader_program_) {
-    return;
-  }
-
-  glUseProgram(shader_program_);
-
-  image_texture_location_ = glGetUniformLocation(shader_program_, "image_texture");
-  if (image_texture_location_ < 0) {
-    LOG(ERROR) << "Shader doesn't contain the 'image_texture' uniform.";
-    destroy_shader();
-    return;
-  }
-
-  fullscreen_location_ = glGetUniformLocation(shader_program_, "fullscreen");
-  if (fullscreen_location_ < 0) {
-    LOG(ERROR) << "Shader doesn't contain the 'fullscreen' uniform.";
-    destroy_shader();
-    return;
-  }
-}
-
-void BlenderFallbackDisplayShader::destroy_shader()
-{
-  glDeleteProgram(shader_program_);
-  shader_program_ = 0;
-}
-
-/* --------------------------------------------------------------------
- * BlenderDisplaySpaceShader.
- */
-
-BlenderDisplaySpaceShader::BlenderDisplaySpaceShader(BL::RenderEngine &b_engine,
-                                                     BL::Scene &b_scene)
-    : b_engine_(b_engine), b_scene_(b_scene)
-{
-  DCHECK(b_engine_.support_display_space_shader(b_scene_));
-}
-
-void BlenderDisplaySpaceShader::bind(int /*width*/, int /*height*/)
-{
-  b_engine_.bind_display_space_shader(b_scene_);
-}
-
-void BlenderDisplaySpaceShader::unbind()
-{
-  b_engine_.unbind_display_space_shader();
-}
-
-uint BlenderDisplaySpaceShader::get_shader_program()
-{
-  if (!shader_program_) {
-    glGetIntegerv(GL_CURRENT_PROGRAM, reinterpret_cast<int *>(&shader_program_));
-  }
-
-  if (!shader_program_) {
-    LOG(ERROR) << "Error retrieving shader program for display space shader.";
-  }
-
-  return shader_program_;
-}
-
-/* --------------------------------------------------------------------
- * BlenderDisplayDriver.
- */
-
-BlenderDisplayDriver::BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene)
-    : b_engine_(b_engine), display_shader_(BlenderDisplayShader::create(b_engine, b_scene))
-{
-  /* Create context while on the main thread. */
-  gl_context_create();
-}
-
-BlenderDisplayDriver::~BlenderDisplayDriver()
-{
-  gl_resources_destroy();
-}
-
-/* --------------------------------------------------------------------
- * Update procedure.
- */
-
-bool BlenderDisplayDriver::update_begin(const Params &params,
-                                        int texture_width,
-                                        int texture_height)
-{
-  /* Note that it's the responsibility of BlenderDisplayDriver to ensure updating and drawing
-   * the texture does not happen at the same time. This is achieved indirectly.
-   *
-   * When enabling the OpenGL context, it uses an internal mutex lock DST.gl_context_lock.
-   * This same lock is also held when do_draw() is called, which together ensure mutual
-   * exclusion.
-   *
-   * This locking is not performed on the Cycles side, because that would cause lock inversion. */
-  if (!gl_context_enable()) {
-    return false;
-  }
-
-  if (gl_render_sync_) {
-    glWaitSync((GLsync)gl_render_sync_, 0, GL_TIMEOUT_IGNORED);
-  }
-
-  if (!gl_texture_resources_ensure()) {
-    gl_context_disable();
-    return false;
-  }
-
-  /* Update texture dimensions if needed. */
-  if (texture_.width != texture_width || texture_.height != texture_height) {
-    glActiveTexture(GL_TEXTURE0);
-    glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
-    glTexImage2D(
-        GL_TEXTURE_2D, 0, GL_RGBA16F, texture_width, texture_height, 0, GL_RGBA, GL_HALF_FLOAT, 0);
-    texture_.width = texture_width;
-    texture_.height = texture_height;
-    glBindTexture(GL_TEXTURE_2D, 0);
-
-    /* Texture did change, and no pixel storage was provided. Tag for an explicit zeroing out to
-     * avoid undefined content. */
-    texture_.need_clear = true;
-  }
-
-  /* Update PBO dimensions if needed.
-   *
-   * NOTE: Allocate the PBO for the the size which will fit the final render resolution (as in,
-   * at a resolution divider 1. This was we don't need to recreate graphics interoperability
-   * objects which are costly and which are tied to the specific underlying buffer size.
-   * The downside of this approach is that when graphics interoperability is not used we are
-   * sending too much data to GPU when resolution divider is not 1. */
-  /* TODO(sergey): Investigate whether keeping the PBO exact size of the texture makes non-interop
-   * mode faster. */
-  const int buffer_width = params.full_size.x;
-  const int buffer_height = params.full_size.y;
-  if (texture_.buffer_width != buffer_width || texture_.buffer_height != buffer_height) {
-    const size_t size_in_bytes = sizeof(half4) * buffer_width * buffer_height;
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
-    glBufferData(GL_PIXEL_UNPACK_BUFFER, size_in_bytes, 0, GL_DYNAMIC_DRAW);
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-
-    texture_.buffer_width = buffer_width;
-    texture_.buffer_height = buffer_height;
-  }
-
-  /* New content will be provided to the texture in one way or another, so mark this in a
-   * centralized place. */
-  texture_.need_update = true;
-
-  return true;
-}
-
-void BlenderDisplayDriver::update_end()
-{
-  gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-  glFlush();
-
-  gl_context_disable();
-}
-
-/* --------------------------------------------------------------------
- * Texture buffer mapping.
- */
-
-half4 *BlenderDisplayDriver::map_texture_buffer()
-{
-  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
-
-  half4 *mapped_rgba_pixels = reinterpret_cast<half4 *>(
-      glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY));
-  if (!mapped_rgba_pixels) {
-    LOG(ERROR) << "Error mapping BlenderDisplayDriver pixel buffer object.";
-  }
-
-  if (texture_.need_clear) {
-    const int64_t texture_width = texture_.width;
-    const int64_t texture_height = texture_.height;
-    memset(reinterpret_cast<void *>(mapped_rgba_pixels),
-           0,
-           texture_width * texture_height * sizeof(half4));
-    texture_.need_clear = false;
-  }
-
-  return mapped_rgba_pixels;
-}
-
-void BlenderDisplayDriver::unmap_texture_buffer()
-{
-  glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
-
-  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-}
-
-/* --------------------------------------------------------------------
- * Graphics interoperability.
- */
-
-BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get()
-{
-  GraphicsInterop interop_dst;
-
-  interop_dst.buffer_width = texture_.buffer_width;
-  interop_dst.buffer_height = texture_.buffer_height;
-  interop_dst.opengl_pbo_id = texture_.gl_pbo_id;
-
-  interop_dst.need_clear = texture_.need_clear;
-  texture_.need_clear = false;
-
-  return interop_dst;
-}
-
-void BlenderDisplayDriver::graphics_interop_activate()
-{
-  gl_context_enable();
-}
-
-void BlenderDisplayDriver::graphics_interop_deactivate()
-{
-  gl_context_disable();
-}
-
-/* --------------------------------------------------------------------
- * Drawing.
- */
-
-void BlenderDisplayDriver::clear()
-{
-  texture_.need_clear = true;
-}
-
-void BlenderDisplayDriver::set_zoom(float zoom_x, float zoom_y)
-{
-  zoom_ = make_float2(zoom_x, zoom_y);
-}
-
-void BlenderDisplayDriver::draw(const Params &params)
-{
-  /* See do_update_begin() for why no locking is required here. */
-  const bool transparent = true;  // TODO(sergey): Derive this from Film.
-
-  if (!gl_draw_resources_ensure()) {
-    return;
-  }
-
-  if (use_gl_context_) {
-    gl_context_mutex_.lock();
-  }
-
-  if (texture_.need_clear) {
-    /* Texture is requested to be cleared and was not yet cleared.
-     *
-     * Do early return which should be equivalent of drawing all-zero texture.
-     * Watch out for the lock though so that the clear happening during update is properly
-     * synchronized here. */
-    gl_context_mutex_.unlock();
-    return;
-  }
-
-  if (gl_upload_sync_) {
-    glWaitSync((GLsync)gl_upload_sync_, 0, GL_TIMEOUT_IGNORED);
-  }
-
-  if (transparent) {
-    glEnable(GL_BLEND);
-    glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
-  }
-
-  display_shader_->bind(params.full_size.x, params.full_size.y);
-
-  glActiveTexture(GL_TEXTURE0);
-  glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
-
-  /* Trick to keep sharp rendering without jagged edges on all GPUs.
-   *
-   * The idea here is to enforce driver to use linear interpolation when the image is not zoomed
-   * in.
-   * For the render result with a resolution divider in effect we always use nearest interpolation.
-   *
-   * Use explicit MIN assignment to make sure the driver does not have an undefined behavior at
-   * the zoom level 1. The MAG filter is always NEAREST. */
-  const float zoomed_width = params.size.x * zoom_.x;
-  const float zoomed_height = params.size.y * zoom_.y;
-  if (texture_.width != params.size.x || texture_.height != params.size.y) {
-    /* Resolution divider is different from 1, force nearest interpolation. */
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-  }
-  else if (zoomed_width - params.size.x > 0.5f || zoomed_height - params.size.y > 0.5f) {
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-  }
-  else {
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-  }
-
-  glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_);
-
-  texture_update_if_needed();
-  vertex_buffer_update(params);
-
-  /* TODO(sergey): Does it make sense/possible to cache/reuse the VAO? */
-  GLuint vertex_array_object;
-  glGenVertexArrays(1, &vertex_array_object);
-  glBindVertexArray(vertex_array_object);
-
-  const int texcoord_attribute = display_shader_->get_tex_coord_attrib_location();
-  const int position_attribute = display_shader_->get_position_attrib_location();
-
-  glEnableVertexAttribArray(texcoord_attribute);
-  glEnableVertexAttribArray(position_attribute);
-
-  glVertexAttribPointer(
-      texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
-  glVertexAttribPointer(position_attribute,
-                        2,
-                        GL_FLOAT,
-                        GL_FALSE,
-                        4 * sizeof(float),
-                        (const GLvoid *)(sizeof(float) * 2));
-
-  glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
-
-  glBindBuffer(GL_ARRAY_BUFFER, 0);
-  glBindTexture(GL_TEXTURE_2D, 0);
-
-  glDeleteVertexArrays(1, &vertex_array_object);
-
-  display_shader_->unbind();
-
-  if (transparent) {
-    glDisable(GL_BLEND);
-  }
-
-  gl_render_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-  glFlush();
-
-  if (use_gl_context_) {
-    gl_context_mutex_.unlock();
-  }
-}
-
-void BlenderDisplayDriver::gl_context_create()
-{
-  /* When rendering in viewport there is no render context available via engine.
-   * Check whether own context is to be created here.
-   *
-   * NOTE: If the `b_engine_`'s context is not available, we are expected to be on a main thread
-   * here. */
-  use_gl_context_ = !RE_engine_has_render_context(
-      reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
-
-  if (use_gl_context_) {
-    const bool drw_state = DRW_opengl_context_release();
-    gl_context_ = WM_opengl_context_create();
-    if (gl_context_) {
-      /* On Windows an old context is restored after creation, and subsequent release of context
-       * generates a Win32 error. Harmless for users, but annoying to have possible misleading
-       * error prints in the console. */
-#ifndef _WIN32
-      WM_opengl_context_release(gl_context_);
-#endif
-    }
-    else {
-      LOG(ERROR) << "Error creating OpenGL context.";
-    }
-
-    DRW_opengl_context_activate(drw_state);
-  }
-}
-
-bool BlenderDisplayDriver::gl_context_enable()
-{
-  if (use_gl_context_) {
-    if (!gl_context_) {
-      return false;
-    }
-    gl_context_mutex_.lock();
-    WM_opengl_context_activate(gl_context_);
-    return true;
-  }
-
-  RE_engine_render_context_enable(reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
-  return true;
-}
-
-void BlenderDisplayDriver::gl_context_disable()
-{
-  if (use_gl_context_) {
-    if (gl_context_) {
-      WM_opengl_context_release(gl_context_);
-      gl_context_mutex_.unlock();
-    }
-    return;
-  }
-
-  RE_engine_render_context_disable(reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
-}
-
-void BlenderDisplayDriver::gl_context_dispose()
-{
-  if (gl_context_) {
-    const bool drw_state = DRW_opengl_context_release();
-
-    WM_opengl_context_activate(gl_context_);
-    WM_opengl_context_dispose(gl_context_);
-
-    DRW_opengl_context_activate(drw_state);
-  }
-}
-
-bool BlenderDisplayDriver::gl_draw_resources_ensure()
-{
-  if (!texture_.gl_id) {
-    /* If there is no texture allocated, there is nothing to draw. Inform the draw call that it can
-     * can not continue. Note that this is not an unrecoverable error, so once the texture is known
-     * we will come back here and create all the GPU resources needed for draw. */
-    return false;
-  }
-
-  if (gl_draw_resource_creation_attempted_) {
-    return gl_draw_resources_created_;
-  }
-  gl_draw_resource_creation_attempted_ = true;
-
-  if (!vertex_buffer_) {
-    glGenBuffers(1, &vertex_buffer_);
-    if (!vertex_buffer_) {
-      LOG(ERROR) << "Error creating vertex buffer.";
-      return false;
-    }
-  }
-
-  gl_draw_resources_created_ = true;
-
-  return true;
-}
-
-void BlenderDisplayDriver::gl_resources_destroy()
-{
-  gl_context_enable();
-
-  if (vertex_buffer_ != 0) {
-    glDeleteBuffers(1, &vertex_buffer_);
-  }
-
-  if (texture_.gl_pbo_id) {
-    glDeleteBuffers(1, &texture_.gl_pbo_id);
-    texture_.gl_pbo_id = 0;
-  }
-
-  if (texture_.gl_id) {
-    glDeleteTextures(1, &texture_.gl_id);
-    texture_.gl_id = 0;
-  }
-
-  gl_context_disable();
-
-  gl_context_dispose();
-}
-
-bool BlenderDisplayDriver::gl_texture_resources_ensure()
-{
-  if (texture_.creation_attempted) {
-    return texture_.is_created;
-  }
-  texture_.creation_attempted = true;
-
-  DCHECK(!texture_.gl_id);
-  DCHECK(!texture_.gl_pbo_id);
-
-  /* Create texture. */
-  glGenTextures(1, &texture_.gl_id);
-  if (!texture_.gl_id) {
-    LOG(ERROR) << "Error creating texture.";
-    return false;
-  }
-
-  /* Configure the texture. */
-  glActiveTexture(GL_TEXTURE0);
-  glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
-  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-  glBindTexture(GL_TEXTURE_2D, 0);
-
-  /* Create PBO for the texture. */
-  glGenBuffers(1, &texture_.gl_pbo_id);
-  if (!texture_.gl_pbo_id) {
-    LOG(ERROR) << "Error creating texture pixel buffer object.";
-    return false;
-  }
-
-  /* Creation finished with a success. */
-  texture_.is_created = true;
-
-  return true;
-}
-
-void BlenderDisplayDriver::texture_update_if_needed()
-{
-  if (!texture_.need_update) {
-    return;
-  }
-
-  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
-  glTexSubImage2D(
-      GL_TEXTURE_2D, 0, 0, 0, texture_.width, texture_.height, GL_RGBA, GL_HALF_FLOAT, 0);
-  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-
-  texture_.need_update = false;
-}
-
-void BlenderDisplayDriver::vertex_buffer_update(const Params &params)
-{
-  /* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be
-   * rendered. */
-  glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
-
-  float *vpointer = reinterpret_cast<float *>(glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY));
-  if (!vpointer) {
-    return;
-  }
-
-  vpointer[0] = 0.0f;
-  vpointer[1] = 0.0f;
-  vpointer[2] = params.full_offset.x;
-  vpointer[3] = params.full_offset.y;
-
-  vpointer[4] = 1.0f;
-  vpointer[5] = 0.0f;
-  vpointer[6] = (float)params.size.x + params.full_offset.x;
-  vpointer[7] = params.full_offset.y;
-
-  vpointer[8] = 1.0f;
-  vpointer[9] = 1.0f;
-  vpointer[10] = (float)params.size.x + params.full_offset.x;
-  vpointer[11] = (float)params.size.y + params.full_offset.y;
-
-  vpointer[12] = 0.0f;
-  vpointer[13] = 1.0f;
-  vpointer[14] = params.full_offset.x;
-  vpointer[15] = (float)params.size.y + params.full_offset.y;
-
-  glUnmapBuffer(GL_ARRAY_BUFFER);
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/blender/blender_display_driver.h
+++ b/intern/cycles/blender/blender_display_driver.h
@@ -1,210 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <atomic>
-
-#include "MEM_guardedalloc.h"
-
-#include "RNA_blender_cpp.h"
-
-#include "render/display_driver.h"
-
-#include "util/util_thread.h"
-#include "util/util_unique_ptr.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Base class of shader used for display driver rendering. */
-class BlenderDisplayShader {
- public:
-  static constexpr const char *position_attribute_name = "pos";
-  static constexpr const char *tex_coord_attribute_name = "texCoord";
-
-  /* Create shader implementation suitable for the given render engine and scene configuration. */
-  static unique_ptr<BlenderDisplayShader> create(BL::RenderEngine &b_engine, BL::Scene &b_scene);
-
-  BlenderDisplayShader() = default;
-  virtual ~BlenderDisplayShader() = default;
-
-  virtual void bind(int width, int height) = 0;
-  virtual void unbind() = 0;
-
-  /* Get attribute location for position and texture coordinate respectively.
-   * NOTE: The shader needs to be bound to have access to those. */
-  virtual int get_position_attrib_location();
-  virtual int get_tex_coord_attrib_location();
-
- protected:
-  /* Get program of this display shader.
-   * NOTE: The shader needs to be bound to have access to this. */
-  virtual uint get_shader_program() = 0;
-
-  /* Cached values of various OpenGL resources. */
-  int position_attribute_location_ = -1;
-  int tex_coord_attribute_location_ = -1;
-};
-
-/* Implementation of display rendering shader used in the case when render engine does not support
- * display space shader. */
-class BlenderFallbackDisplayShader : public BlenderDisplayShader {
- public:
-  virtual void bind(int width, int height) override;
-  virtual void unbind() override;
-
- protected:
-  virtual uint get_shader_program() override;
-
-  void create_shader_if_needed();
-  void destroy_shader();
-
-  uint shader_program_ = 0;
-  int image_texture_location_ = -1;
-  int fullscreen_location_ = -1;
-
-  /* Shader compilation attempted. Which means, that if the shader program is 0 then compilation or
-   * linking has failed. Do not attempt to re-compile the shader. */
-  bool shader_compile_attempted_ = false;
-};
-
-class BlenderDisplaySpaceShader : public BlenderDisplayShader {
- public:
-  BlenderDisplaySpaceShader(BL::RenderEngine &b_engine, BL::Scene &b_scene);
-
-  virtual void bind(int width, int height) override;
-  virtual void unbind() override;
-
- protected:
-  virtual uint get_shader_program() override;
-
-  BL::RenderEngine b_engine_;
-  BL::Scene &b_scene_;
-
-  /* Cached values of various OpenGL resources. */
-  uint shader_program_ = 0;
-};
-
-/* Display driver implementation which is specific for Blender viewport integration. */
-class BlenderDisplayDriver : public DisplayDriver {
- public:
-  BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene);
-  ~BlenderDisplayDriver();
-
-  virtual void graphics_interop_activate() override;
-  virtual void graphics_interop_deactivate() override;
-
-  virtual void clear() override;
-
-  void set_zoom(float zoom_x, float zoom_y);
-
- protected:
-  virtual bool update_begin(const Params &params, int texture_width, int texture_height) override;
-  virtual void update_end() override;
-
-  virtual half4 *map_texture_buffer() override;
-  virtual void unmap_texture_buffer() override;
-
-  virtual GraphicsInterop graphics_interop_get() override;
-
-  virtual void draw(const Params &params) override;
-
-  /* Helper function which allocates new GPU context. */
-  void gl_context_create();
-  bool gl_context_enable();
-  void gl_context_disable();
-  void gl_context_dispose();
-
-  /* Make sure texture is allocated and its initial configuration is performed. */
-  bool gl_texture_resources_ensure();
-
-  /* Ensure all runtime GPU resources needed for drawing are allocated.
-   * Returns true if all resources needed for drawing are available. */
-  bool gl_draw_resources_ensure();
-
-  /* Destroy all GPU resources which are being used by this object. */
-  void gl_resources_destroy();
-
-  /* Update GPU texture dimensions and content if needed (new pixel data was provided).
-   *
-   * NOTE: The texture needs to be bound. */
-  void texture_update_if_needed();
-
-  /* Update vertex buffer with new coordinates of vertex positions and texture coordinates.
-   * This buffer is used to render texture in the viewport.
-   *
-   * NOTE: The buffer needs to be bound. */
-  void vertex_buffer_update(const Params &params);
-
-  BL::RenderEngine b_engine_;
-
-  /* OpenGL context which is used the render engine doesn't have its own. */
-  void *gl_context_ = nullptr;
-  /* The when Blender RenderEngine side context is not available and the DisplayDriver is to create
-   * its own context. */
-  bool use_gl_context_ = false;
-  /* Mutex used to guard the `gl_context_`. */
-  thread_mutex gl_context_mutex_;
-
-  /* Texture which contains pixels of the render result. */
-  struct {
-    /* Indicates whether texture creation was attempted and succeeded.
-     * Used to avoid multiple attempts of texture creation on GPU issues or GPU context
-     * misconfiguration. */
-    bool creation_attempted = false;
-    bool is_created = false;
-
-    /* OpenGL resource IDs of the texture itself and Pixel Buffer Object (PBO) used to write
-     * pixels to it.
-     *
-     * NOTE: Allocated on the engine's context. */
-    uint gl_id = 0;
-    uint gl_pbo_id = 0;
-
-    /* Is true when new data was written to the PBO, meaning, the texture might need to be resized
-     * and new data is to be uploaded to the GPU. */
-    bool need_update = false;
-
-    /* Content of the texture is to be filled with zeroes. */
-    std::atomic<bool> need_clear = true;
-
-    /* Dimensions of the texture in pixels. */
-    int width = 0;
-    int height = 0;
-
-    /* Dimensions of the underlying PBO. */
-    int buffer_width = 0;
-    int buffer_height = 0;
-  } texture_;
-
-  unique_ptr<BlenderDisplayShader> display_shader_;
-
-  /* Special track of whether GPU resources were attempted to be created, to avoid attempts of
-   * their re-creation on failure on every redraw. */
-  bool gl_draw_resource_creation_attempted_ = false;
-  bool gl_draw_resources_created_ = false;
-
-  /* Vertex buffer which hold vertices of a triangle fan which is textures with the texture
-   * holding the render result. */
-  uint vertex_buffer_ = 0;
-
-  void *gl_render_sync_ = nullptr;
-  void *gl_upload_sync_ = nullptr;
-
-  float2 zoom_ = make_float2(1.0f, 1.0f);
-};
-
-CCL_NAMESPACE_END
--- a/intern/cycles/blender/blender_geometry.cpp
+++ b/intern/cycles/blender/blender_geometry.cpp
@@ -80,11 +80,7 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
 {
  /* Test if we can instance or if the object is modified. */
  Geometry::Type geom_type = determine_geom_type(b_ob_info, use_particle_hair);
-  BL::ID b_key_id = (b_ob_info.is_real_object_data() &&
-                     BKE_object_is_modified(b_ob_info.real_object)) ?
-                        b_ob_info.real_object :
-                        b_ob_info.object_data;
-  GeometryKey key(b_key_id.ptr.data, geom_type);
+  GeometryKey key(b_ob_info.object_data, geom_type);

  /* Find shader indices. */
  array<Node *> used_shaders = find_used_shaders(b_ob_info.iter_object);
@@ -114,7 +110,7 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
  }
  else {
    /* Test if we need to update existing geometry. */
-    sync = geometry_map.update(geom, b_key_id);
+    sync = geometry_map.update(geom, b_ob_info.object_data);
  }

  if (!sync) {
--- a/intern/cycles/blender/blender_light.cpp
+++ b/intern/cycles/blender/blender_light.cpp
@@ -125,10 +125,17 @@ void BlenderSync::sync_light(BL::Object &b_parent,
  light->set_shader(static_cast<Shader *>(used_shaders[0]));

  /* shadow */
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
  PointerRNA clight = RNA_pointer_get(&b_light.ptr, "cycles");
  light->set_cast_shadow(get_boolean(clight, "cast_shadow"));
  light->set_use_mis(get_boolean(clight, "use_multiple_importance_sampling"));

+  int samples = get_int(clight, "samples");
+  if (get_boolean(cscene, "use_square_samples"))
+    light->set_samples(samples * samples);
+  else
+    light->set_samples(samples);
+
  light->set_max_bounces(get_int(clight, "max_bounces"));

  if (b_ob_info.real_object != b_ob_info.iter_object) {
@@ -148,12 +155,10 @@ void BlenderSync::sync_light(BL::Object &b_parent,

  /* visibility */
  uint visibility = object_ray_visibility(b_ob_info.real_object);
-  light->set_use_camera((visibility & PATH_RAY_CAMERA) != 0);
  light->set_use_diffuse((visibility & PATH_RAY_DIFFUSE) != 0);
  light->set_use_glossy((visibility & PATH_RAY_GLOSSY) != 0);
  light->set_use_transmission((visibility & PATH_RAY_TRANSMIT) != 0);
  light->set_use_scatter((visibility & PATH_RAY_VOLUME_SCATTER) != 0);
-  light->set_is_shadow_catcher(b_ob_info.real_object.is_shadow_catcher());

  /* tag */
  light->tag_update(scene);
@@ -164,6 +169,7 @@ void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal)
  BL::World b_world = b_scene.world();

  if (b_world) {
+    PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
    PointerRNA cworld = RNA_pointer_get(&b_world.ptr, "cycles");

    enum SamplingMethod { SAMPLING_NONE = 0, SAMPLING_AUTOMATIC, SAMPLING_MANUAL, SAMPLING_NUM };
@@ -191,6 +197,12 @@ void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal)
        /* force enable light again when world is resynced */
        light->set_is_enabled(true);

+        int samples = get_int(cworld, "samples");
+        if (get_boolean(cscene, "use_square_samples"))
+          light->set_samples(samples * samples);
+        else
+          light->set_samples(samples);
+
        light->tag_update(scene);
        light_map.set_recalc(b_world);
      }
@@ -199,7 +211,7 @@ void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal)

  world_map = b_world.ptr.data;
  world_recalc = false;
-  viewport_parameters = BlenderViewportParameters(b_v3d, use_developer_ui);
+  viewport_parameters = BlenderViewportParameters(b_v3d);
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/blender/blender_object.cpp
+++ b/intern/cycles/blender/blender_object.cpp
@@ -568,7 +568,7 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph,
  /* object loop */
  bool cancel = false;
  bool use_portal = false;
-  const bool show_lights = BlenderViewportParameters(b_v3d, use_developer_ui).use_scene_lights;
+  const bool show_lights = BlenderViewportParameters(b_v3d).use_scene_lights;

  BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
  BL::Depsgraph::object_instances_iterator b_instance_iter;
--- a/intern/cycles/blender/blender_output_driver.cpp
+++ b/intern/cycles/blender/blender_output_driver.cpp
@@ -1,127 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "blender/blender_output_driver.h"
-
-CCL_NAMESPACE_BEGIN
-
-BlenderOutputDriver::BlenderOutputDriver(BL::RenderEngine &b_engine) : b_engine_(b_engine)
-{
-}
-
-BlenderOutputDriver::~BlenderOutputDriver()
-{
-}
-
-bool BlenderOutputDriver::read_render_tile(const Tile &tile)
-{
-  /* Get render result. */
-  BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
-                                                 tile.offset.y,
-                                                 tile.size.x,
-                                                 tile.size.y,
-                                                 tile.layer.c_str(),
-                                                 tile.view.c_str());
-
-  /* Can happen if the intersected rectangle gives 0 width or height. */
-  if (b_rr.ptr.data == NULL) {
-    return false;
-  }
-
-  BL::RenderResult::layers_iterator b_single_rlay;
-  b_rr.layers.begin(b_single_rlay);
-
-  /* layer will be missing if it was disabled in the UI */
-  if (b_single_rlay == b_rr.layers.end()) {
-    return false;
-  }
-
-  BL::RenderLayer b_rlay = *b_single_rlay;
-
-  vector<float> pixels(tile.size.x * tile.size.y * 4);
-
-  /* Copy each pass.
-   * TODO:copy only the required ones for better performance? */
-  for (BL::RenderPass &b_pass : b_rlay.passes) {
-    tile.set_pass_pixels(b_pass.name(), b_pass.channels(), (float *)b_pass.rect());
-  }
-
-  b_engine_.end_result(b_rr, false, false, false);
-
-  return true;
-}
-
-bool BlenderOutputDriver::update_render_tile(const Tile &tile)
-{
-  /* Use final write for preview renders, otherwise render result wouldn't be be updated
-   * quickly on Blender side. For all other cases we use the display driver. */
-  if (b_engine_.is_preview()) {
-    write_render_tile(tile);
-    return true;
-  }
-  else {
-    /* Don't highlight full-frame tile. */
-    if (!(tile.size == tile.full_size)) {
-      b_engine_.tile_highlight_clear_all();
-      b_engine_.tile_highlight_set(tile.offset.x, tile.offset.y, tile.size.x, tile.size.y, true);
-    }
-
-    return false;
-  }
-}
-
-void BlenderOutputDriver::write_render_tile(const Tile &tile)
-{
-  b_engine_.tile_highlight_clear_all();
-
-  /* Get render result. */
-  BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
-                                                 tile.offset.y,
-                                                 tile.size.x,
-                                                 tile.size.y,
-                                                 tile.layer.c_str(),
-                                                 tile.view.c_str());
-
-  /* Can happen if the intersected rectangle gives 0 width or height. */
-  if (b_rr.ptr.data == NULL) {
-    return;
-  }
-
-  BL::RenderResult::layers_iterator b_single_rlay;
-  b_rr.layers.begin(b_single_rlay);
-
-  /* Layer will be missing if it was disabled in the UI. */
-  if (b_single_rlay == b_rr.layers.end()) {
-    return;
-  }
-
-  BL::RenderLayer b_rlay = *b_single_rlay;
-
-  vector<float> pixels(tile.size.x * tile.size.y * 4);
-
-  /* Copy each pass. */
-  for (BL::RenderPass &b_pass : b_rlay.passes) {
-    if (!tile.get_pass_pixels(b_pass.name(), b_pass.channels(), &pixels[0])) {
-      memset(&pixels[0], 0, pixels.size() * sizeof(float));
-    }
-
-    b_pass.rect(&pixels[0]);
-  }
-
-  b_engine_.end_result(b_rr, true, false, true);
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/blender/blender_output_driver.h
+++ b/intern/cycles/blender/blender_output_driver.h
@@ -1,40 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "MEM_guardedalloc.h"
-
-#include "RNA_blender_cpp.h"
-
-#include "render/output_driver.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BlenderOutputDriver : public OutputDriver {
- public:
-  BlenderOutputDriver(BL::RenderEngine &b_engine);
-  ~BlenderOutputDriver();
-
-  virtual void write_render_tile(const Tile &tile) override;
-  virtual bool update_render_tile(const Tile &tile) override;
-  virtual bool read_render_tile(const Tile &tile) override;
-
- protected:
-  BL::RenderEngine b_engine_;
-};
-
-CCL_NAMESPACE_END
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -45,6 +45,10 @@
 #  include <OSL/oslquery.h>
 #endif

+#ifdef WITH_OPENCL
+#  include "device/device_intern.h"
+#endif
+
 CCL_NAMESPACE_BEGIN

 namespace {
@@ -68,10 +72,12 @@ PyObject *pyunicode_from_string(const char *str)
 /* Synchronize debug flags from a given Blender scene.
 * Return truth when device list needs invalidation.
 */
-static void debug_flags_sync_from_scene(BL::Scene b_scene)
+bool debug_flags_sync_from_scene(BL::Scene b_scene)
 {
  DebugFlagsRef flags = DebugFlags();
  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+  /* Backup some settings for comparison. */
+  DebugFlags::OpenCL::DeviceType opencl_device_type = flags.opencl.device_type;
  /* Synchronize shared flags. */
  flags.viewport_static_bvh = get_enum(cscene, "debug_bvh_type");
  /* Synchronize CPU flags. */
@@ -81,19 +87,50 @@ static void debug_flags_sync_from_scene(BL::Scene b_scene)
  flags.cpu.sse3 = get_boolean(cscene, "debug_use_cpu_sse3");
  flags.cpu.sse2 = get_boolean(cscene, "debug_use_cpu_sse2");
  flags.cpu.bvh_layout = (BVHLayout)get_enum(cscene, "debug_bvh_layout");
+  flags.cpu.split_kernel = get_boolean(cscene, "debug_use_cpu_split_kernel");
  /* Synchronize CUDA flags. */
  flags.cuda.adaptive_compile = get_boolean(cscene, "debug_use_cuda_adaptive_compile");
+  flags.cuda.split_kernel = get_boolean(cscene, "debug_use_cuda_split_kernel");
  /* Synchronize OptiX flags. */
-  flags.optix.use_debug = get_boolean(cscene, "debug_use_optix_debug");
+  flags.optix.cuda_streams = get_int(cscene, "debug_optix_cuda_streams");
+  flags.optix.curves_api = get_boolean(cscene, "debug_optix_curves_api");
+  /* Synchronize OpenCL device type. */
+  switch (get_enum(cscene, "debug_opencl_device_type")) {
+    case 0:
+      flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_NONE;
+      break;
+    case 1:
+      flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_ALL;
+      break;
+    case 2:
+      flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_DEFAULT;
+      break;
+    case 3:
+      flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_CPU;
+      break;
+    case 4:
+      flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_GPU;
+      break;
+    case 5:
+      flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_ACCELERATOR;
+      break;
+  }
+  /* Synchronize other OpenCL flags. */
+  flags.opencl.debug = get_boolean(cscene, "debug_use_opencl_debug");
+  flags.opencl.mem_limit = ((size_t)get_int(cscene, "debug_opencl_mem_limit")) * 1024 * 1024;
+  return flags.opencl.device_type != opencl_device_type;
 }

 /* Reset debug flags to default values.
 * Return truth when device list needs invalidation.
 */
-static void debug_flags_reset()
+bool debug_flags_reset()
 {
  DebugFlagsRef flags = DebugFlags();
+  /* Backup some settings for comparison. */
+  DebugFlags::OpenCL::DeviceType opencl_device_type = flags.opencl.device_type;
  flags.reset();
+  return flags.opencl.device_type != opencl_device_type;
 }

 } /* namespace */
@@ -138,20 +175,18 @@ static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce)

 static PyObject *init_func(PyObject * /*self*/, PyObject *args)
 {
-  PyObject *path, *user_path, *temp_path;
+  PyObject *path, *user_path;
  int headless;

-  if (!PyArg_ParseTuple(args, "OOOi", &path, &user_path, &temp_path, &headless)) {
-    return nullptr;
+  if (!PyArg_ParseTuple(args, "OOi", &path, &user_path, &headless)) {
+    return NULL;
  }

-  PyObject *path_coerce = nullptr, *user_path_coerce = nullptr, *temp_path_coerce = nullptr;
+  PyObject *path_coerce = NULL, *user_path_coerce = NULL;
  path_init(PyC_UnicodeAsByte(path, &path_coerce),
-            PyC_UnicodeAsByte(user_path, &user_path_coerce),
-            PyC_UnicodeAsByte(temp_path, &temp_path_coerce));
+            PyC_UnicodeAsByte(user_path, &user_path_coerce));
  Py_XDECREF(path_coerce);
  Py_XDECREF(user_path_coerce);
-  Py_XDECREF(temp_path_coerce);

  BlenderSession::headless = headless;

@@ -264,50 +299,6 @@ static PyObject *render_func(PyObject * /*self*/, PyObject *args)
  Py_RETURN_NONE;
 }

-static PyObject *render_frame_finish_func(PyObject * /*self*/, PyObject *args)
-{
-  PyObject *pysession;
-
-  if (!PyArg_ParseTuple(args, "O", &pysession)) {
-    return nullptr;
-  }
-
-  BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
-
-  /* Allow Blender to execute other Python scripts. */
-  python_thread_state_save(&session->python_thread_state);
-
-  session->render_frame_finish();
-
-  python_thread_state_restore(&session->python_thread_state);
-
-  Py_RETURN_NONE;
-}
-
-static PyObject *draw_func(PyObject * /*self*/, PyObject *args)
-{
-  PyObject *py_session, *py_graph, *py_screen, *py_space_image;
-
-  if (!PyArg_ParseTuple(args, "OOOO", &py_session, &py_graph, &py_screen, &py_space_image)) {
-    return nullptr;
-  }
-
-  BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(py_session);
-
-  ID *b_screen = (ID *)PyLong_AsVoidPtr(py_screen);
-
-  PointerRNA b_space_image_ptr;
-  RNA_pointer_create(b_screen,
-                     &RNA_SpaceImageEditor,
-                     pylong_as_voidptr_typesafe(py_space_image),
-                     &b_space_image_ptr);
-  BL::SpaceImageEditor b_space_image(b_space_image_ptr);
-
-  session->draw(b_space_image);
-
-  Py_RETURN_NONE;
-}
-
 /* pixel_array and result passed as pointers */
 static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
 {
@@ -345,7 +336,7 @@ static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
  Py_RETURN_NONE;
 }

-static PyObject *view_draw_func(PyObject * /*self*/, PyObject *args)
+static PyObject *draw_func(PyObject * /*self*/, PyObject *args)
 {
  PyObject *pysession, *pygraph, *pyv3d, *pyrv3d;

@@ -359,7 +350,7 @@ static PyObject *view_draw_func(PyObject * /*self*/, PyObject *args)
    int viewport[4];
    glGetIntegerv(GL_VIEWPORT, viewport);

-    session->view_draw(viewport[2], viewport[3]);
+    session->draw(viewport[2], viewport[3]);
  }

  Py_RETURN_NONE;
@@ -706,6 +697,40 @@ static PyObject *system_info_func(PyObject * /*self*/, PyObject * /*value*/)
  return pyunicode_from_string(system_info.c_str());
 }

+#ifdef WITH_OPENCL
+static PyObject *opencl_disable_func(PyObject * /*self*/, PyObject * /*value*/)
+{
+  VLOG(2) << "Disabling OpenCL platform.";
+  DebugFlags().opencl.device_type = DebugFlags::OpenCL::DEVICE_NONE;
+  Py_RETURN_NONE;
+}
+
+static PyObject *opencl_compile_func(PyObject * /*self*/, PyObject *args)
+{
+  PyObject *sequence = PySequence_Fast(args, "Arguments must be a sequence");
+  if (sequence == NULL) {
+    Py_RETURN_FALSE;
+  }
+
+  vector<string> parameters;
+  for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(sequence); i++) {
+    PyObject *item = PySequence_Fast_GET_ITEM(sequence, i);
+    PyObject *item_as_string = PyObject_Str(item);
+    const char *parameter_string = PyUnicode_AsUTF8(item_as_string);
+    parameters.push_back(parameter_string);
+    Py_DECREF(item_as_string);
+  }
+  Py_DECREF(sequence);
+
+  if (device_opencl_compile_kernel(parameters)) {
+    Py_RETURN_TRUE;
+  }
+  else {
+    Py_RETURN_FALSE;
+  }
+}
+#endif
+
 static bool image_parse_filepaths(PyObject *pyfilepaths, vector<string> &filepaths)
 {
  if (PyUnicode_Check(pyfilepaths)) {
@@ -737,10 +762,6 @@ static bool image_parse_filepaths(PyObject *pyfilepaths, vector<string> &filepat

 static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *keywords)
 {
-#if 1
-  (void)args;
-  (void)keywords;
-#else
  static const char *keyword_list[] = {
      "preferences", "scene", "view_layer", "input", "output", "tile_size", "samples", NULL};
  PyObject *pypreferences, *pyscene, *pyviewlayer;
@@ -814,7 +835,7 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
  }

  /* Create denoiser. */
-  DenoiserPipeline denoiser(device);
+  Denoiser denoiser(device);
  denoiser.params = params;
  denoiser.input = input;
  denoiser.output = output;
@@ -831,7 +852,6 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
    PyErr_SetString(PyExc_ValueError, denoiser.error.c_str());
    return NULL;
  }
-#endif

  Py_RETURN_NONE;
 }
@@ -883,7 +903,10 @@ static PyObject *debug_flags_update_func(PyObject * /*self*/, PyObject *args)
  RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyscene), &sceneptr);
  BL::Scene b_scene(sceneptr);

-  debug_flags_sync_from_scene(b_scene);
+  if (debug_flags_sync_from_scene(b_scene)) {
+    VLOG(2) << "Tagging device list for update.";
+    Device::tag_update();
+  }

  VLOG(2) << "Debug flags set to:\n" << DebugFlags();

@@ -894,7 +917,10 @@ static PyObject *debug_flags_update_func(PyObject * /*self*/, PyObject *args)

 static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/)
 {
-  debug_flags_reset();
+  if (debug_flags_reset()) {
+    VLOG(2) << "Tagging device list for update.";
+    Device::tag_update();
+  }
  if (debug_flags_set) {
    VLOG(2) << "Debug flags reset to:\n" << DebugFlags();
    debug_flags_set = false;
@@ -902,6 +928,84 @@ static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/
  Py_RETURN_NONE;
 }

+static PyObject *set_resumable_chunk_func(PyObject * /*self*/, PyObject *args)
+{
+  int num_resumable_chunks, current_resumable_chunk;
+  if (!PyArg_ParseTuple(args, "ii", &num_resumable_chunks, &current_resumable_chunk)) {
+    Py_RETURN_NONE;
+  }
+
+  if (num_resumable_chunks <= 0) {
+    fprintf(stderr, "Cycles: Bad value for number of resumable chunks.\n");
+    abort();
+    Py_RETURN_NONE;
+  }
+  if (current_resumable_chunk < 1 || current_resumable_chunk > num_resumable_chunks) {
+    fprintf(stderr, "Cycles: Bad value for current resumable chunk number.\n");
+    abort();
+    Py_RETURN_NONE;
+  }
+
+  VLOG(1) << "Initialized resumable render: "
+          << "num_resumable_chunks=" << num_resumable_chunks << ", "
+          << "current_resumable_chunk=" << current_resumable_chunk;
+  BlenderSession::num_resumable_chunks = num_resumable_chunks;
+  BlenderSession::current_resumable_chunk = current_resumable_chunk;
+
+  printf("Cycles: Will render chunk %d of %d\n", current_resumable_chunk, num_resumable_chunks);
+
+  Py_RETURN_NONE;
+}
+
+static PyObject *set_resumable_chunk_range_func(PyObject * /*self*/, PyObject *args)
+{
+  int num_chunks, start_chunk, end_chunk;
+  if (!PyArg_ParseTuple(args, "iii", &num_chunks, &start_chunk, &end_chunk)) {
+    Py_RETURN_NONE;
+  }
+
+  if (num_chunks <= 0) {
+    fprintf(stderr, "Cycles: Bad value for number of resumable chunks.\n");
+    abort();
+    Py_RETURN_NONE;
+  }
+  if (start_chunk < 1 || start_chunk > num_chunks) {
+    fprintf(stderr, "Cycles: Bad value for start chunk number.\n");
+    abort();
+    Py_RETURN_NONE;
+  }
+  if (end_chunk < 1 || end_chunk > num_chunks) {
+    fprintf(stderr, "Cycles: Bad value for start chunk number.\n");
+    abort();
+    Py_RETURN_NONE;
+  }
+  if (start_chunk > end_chunk) {
+    fprintf(stderr, "Cycles: End chunk should be higher than start one.\n");
+    abort();
+    Py_RETURN_NONE;
+  }
+
+  VLOG(1) << "Initialized resumable render: "
+          << "num_resumable_chunks=" << num_chunks << ", "
+          << "start_resumable_chunk=" << start_chunk << "end_resumable_chunk=" << end_chunk;
+  BlenderSession::num_resumable_chunks = num_chunks;
+  BlenderSession::start_resumable_chunk = start_chunk;
+  BlenderSession::end_resumable_chunk = end_chunk;
+
+  printf("Cycles: Will render chunks %d to %d of %d\n", start_chunk, end_chunk, num_chunks);
+
+  Py_RETURN_NONE;
+}
+
+static PyObject *clear_resumable_chunk_func(PyObject * /*self*/, PyObject * /*value*/)
+{
+  VLOG(1) << "Clear resumable render";
+  BlenderSession::num_resumable_chunks = 0;
+  BlenderSession::current_resumable_chunk = 0;
+
+  Py_RETURN_NONE;
+}
+
 static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*/)
 {
  BlenderSession::print_render_stats = true;
@@ -911,16 +1015,16 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
 static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
 {
  vector<DeviceType> device_types = Device::available_types();
-  bool has_cuda = false, has_optix = false, has_hip = false;
+  bool has_cuda = false, has_optix = false, has_opencl = false;
  foreach (DeviceType device_type, device_types) {
    has_cuda |= (device_type == DEVICE_CUDA);
    has_optix |= (device_type == DEVICE_OPTIX);
-    has_hip |= (device_type == DEVICE_HIP);
+    has_opencl |= (device_type == DEVICE_OPENCL);
  }
  PyObject *list = PyTuple_New(3);
  PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
  PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
-  PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
+  PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_opencl));
  return list;
 }

@@ -940,15 +1044,15 @@ static PyObject *set_device_override_func(PyObject * /*self*/, PyObject *arg)
  if (override == "CPU") {
    BlenderSession::device_override = DEVICE_MASK_CPU;
  }
+  else if (override == "OPENCL") {
+    BlenderSession::device_override = DEVICE_MASK_OPENCL;
+  }
  else if (override == "CUDA") {
    BlenderSession::device_override = DEVICE_MASK_CUDA;
  }
  else if (override == "OPTIX") {
    BlenderSession::device_override = DEVICE_MASK_OPTIX;
  }
-  else if (override == "HIP") {
-    BlenderSession::device_override = DEVICE_MASK_HIP;
-  }
  else {
    printf("\nError: %s is not a valid Cycles device.\n", override.c_str());
    Py_RETURN_FALSE;
@@ -968,10 +1072,8 @@ static PyMethodDef methods[] = {
    {"create", create_func, METH_VARARGS, ""},
    {"free", free_func, METH_O, ""},
    {"render", render_func, METH_VARARGS, ""},
-    {"render_frame_finish", render_frame_finish_func, METH_VARARGS, ""},
-    {"draw", draw_func, METH_VARARGS, ""},
    {"bake", bake_func, METH_VARARGS, ""},
-    {"view_draw", view_draw_func, METH_VARARGS, ""},
+    {"draw", draw_func, METH_VARARGS, ""},
    {"sync", sync_func, METH_VARARGS, ""},
    {"reset", reset_func, METH_VARARGS, ""},
 #ifdef WITH_OSL
@@ -980,6 +1082,10 @@ static PyMethodDef methods[] = {
 #endif
    {"available_devices", available_devices_func, METH_VARARGS, ""},
    {"system_info", system_info_func, METH_NOARGS, ""},
+#ifdef WITH_OPENCL
+    {"opencl_disable", opencl_disable_func, METH_NOARGS, ""},
+    {"opencl_compile", opencl_compile_func, METH_VARARGS, ""},
+#endif

    /* Standalone denoising */
    {"denoise", (PyCFunction)denoise_func, METH_VARARGS | METH_KEYWORDS, ""},
@@ -992,6 +1098,11 @@ static PyMethodDef methods[] = {
    /* Statistics. */
    {"enable_print_stats", enable_print_stats_func, METH_NOARGS, ""},

+    /* Resumable render */
+    {"set_resumable_chunk", set_resumable_chunk_func, METH_VARARGS, ""},
+    {"set_resumable_chunk_range", set_resumable_chunk_range_func, METH_VARARGS, ""},
+    {"clear_resumable_chunk", clear_resumable_chunk_func, METH_NOARGS, ""},
+
    /* Compute Device selection */
    {"get_device_types", get_device_types_func, METH_VARARGS, ""},
    {"set_device_override", set_device_override_func, METH_O, ""},
@@ -1042,6 +1153,14 @@ void *CCL_python_module_init()
  PyModule_AddStringConstant(mod, "osl_version_string", "unknown");
 #endif

+#ifdef WITH_NETWORK
+  PyModule_AddObject(mod, "with_network", Py_True);
+  Py_INCREF(Py_True);
+#else  /* WITH_NETWORK */
+  PyModule_AddObject(mod, "with_network", Py_False);
+  Py_INCREF(Py_False);
+#endif /* WITH_NETWORK */
+
 #ifdef WITH_EMBREE
  PyModule_AddObject(mod, "with_embree", Py_True);
  Py_INCREF(Py_True);
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -38,12 +38,9 @@
 #include "util/util_hash.h"
 #include "util/util_logging.h"
 #include "util/util_murmurhash.h"
-#include "util/util_path.h"
 #include "util/util_progress.h"
 #include "util/util_time.h"

-#include "blender/blender_display_driver.h"
-#include "blender/blender_output_driver.h"
 #include "blender/blender_session.h"
 #include "blender/blender_sync.h"
 #include "blender/blender_util.h"
@@ -52,6 +49,10 @@ CCL_NAMESPACE_BEGIN

 DeviceTypeMask BlenderSession::device_override = DEVICE_MASK_ALL;
 bool BlenderSession::headless = false;
+int BlenderSession::num_resumable_chunks = 0;
+int BlenderSession::current_resumable_chunk = 0;
+int BlenderSession::start_resumable_chunk = 0;
+int BlenderSession::end_resumable_chunk = 0;
 bool BlenderSession::print_render_stats = false;

 BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
@@ -72,8 +73,7 @@ BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
      width(0),
      height(0),
      preview_osl(preview_osl),
-      python_thread_state(NULL),
-      use_developer_ui(false)
+      python_thread_state(NULL)
 {
  /* offline render */
  background = true;
@@ -103,9 +103,7 @@ BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
      width(width),
      height(height),
      preview_osl(false),
-      python_thread_state(NULL),
-      use_developer_ui(b_userpref.experimental().use_cycles_debug() &&
-                       b_userpref.view().show_developer_ui())
+      python_thread_state(NULL)
 {
  /* 3d view render */
  background = false;
@@ -121,10 +119,10 @@ BlenderSession::~BlenderSession()

 void BlenderSession::create_session()
 {
-  const SessionParams session_params = BlenderSync::get_session_params(
+  SessionParams session_params = BlenderSync::get_session_params(
      b_engine, b_userpref, b_scene, background);
-  const SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
-  const bool session_pause = BlenderSync::get_session_pause(b_scene, background);
+  SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
+  bool session_pause = BlenderSync::get_session_pause(b_scene, background);

  /* reset status/progress */
  last_status = "";
@@ -133,18 +131,20 @@ void BlenderSession::create_session()
  start_resize_time = 0.0;

  /* create session */
-  session = new Session(session_params, scene_params);
+  session = new Session(session_params);
+  session->scene = scene;
  session->progress.set_update_callback(function_bind(&BlenderSession::tag_redraw, this));
  session->progress.set_cancel_callback(function_bind(&BlenderSession::test_cancel, this));
  session->set_pause(session_pause);

  /* create scene */
-  scene = session->scene;
+  scene = new Scene(scene_params, session->device);
  scene->name = b_scene.name();

+  session->scene = scene;
+
  /* create sync */
-  sync = new BlenderSync(
-      b_engine, b_data, b_scene, scene, !background, use_developer_ui, session->progress);
+  sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress);
  BL::Object b_camera_override(b_engine.camera_override());
  if (b_v3d) {
    sync->sync_view(b_v3d, b_rv3d, width, height);
@@ -154,27 +154,13 @@ void BlenderSession::create_session()
  }

  /* set buffer parameters */
-  const BufferParams buffer_params = BlenderSync::get_buffer_params(
-      b_v3d, b_rv3d, scene->camera, width, height);
-  session->reset(session_params, buffer_params);
+  BufferParams buffer_params = BlenderSync::get_buffer_params(
+      b_v3d, b_rv3d, scene->camera, width, height, session_params.denoising.use);
+  session->reset(buffer_params, session_params.samples);

-  /* Create GPU display.
-   * TODO(sergey): Investigate whether DisplayDriver can be used for the preview as well. */
-  if (!b_engine.is_preview() && !headless) {
-    unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(b_engine,
-                                                                                        b_scene);
-    display_driver_ = display_driver.get();
-    session->set_display_driver(move(display_driver));
-  }
+  b_engine.use_highlight_tiles(session_params.progressive_refine == false);

-  /* Viewport and preview (as in, material preview) does not do tiled rendering, so can inform
-   * engine that no tracking of the tiles state is needed.
-   * The offline rendering will make a decision when tile is being written. The penalty of asking
-   * the engine to keep track of tiles state is minimal, so there is nothing to worry about here
-   * about possible single-tiled final render. */
-  if (!b_engine.is_preview() && !b_v3d) {
-    b_engine.use_highlight_tiles(true);
-  }
+  update_resumable_tile_manager(session_params.samples);
 }

 void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsgraph)
@@ -216,9 +202,9 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
    return;
  }

-  const SessionParams session_params = BlenderSync::get_session_params(
+  SessionParams session_params = BlenderSync::get_session_params(
      b_engine, b_userpref, b_scene, background);
-  const SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
+  SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);

  if (scene->params.modified(scene_params) || session->params.modified(session_params) ||
      !this->b_render.use_persistent_data()) {
@@ -234,6 +220,8 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg

  session->progress.reset();

+  session->tile_manager.set_tile_order(session_params.tile_order);
+
  /* peak memory usage should show current render peak, not peak for all renders
   * made by this render session
   */
@@ -242,8 +230,7 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
  if (is_new_session) {
    /* Sync object should be re-created for new scene. */
    delete sync;
-    sync = new BlenderSync(
-        b_engine, b_data, b_scene, scene, !background, use_developer_ui, session->progress);
+    sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress);
  }
  else {
    /* Sync recalculations to do just the required updates. */
@@ -255,35 +242,171 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg

  BL::SpaceView3D b_null_space_view3d(PointerRNA_NULL);
  BL::RegionView3D b_null_region_view3d(PointerRNA_NULL);
-  const BufferParams buffer_params = BlenderSync::get_buffer_params(
-      b_null_space_view3d, b_null_region_view3d, scene->camera, width, height);
-  session->reset(session_params, buffer_params);
+  BufferParams buffer_params = BlenderSync::get_buffer_params(b_null_space_view3d,
+                                                              b_null_region_view3d,
+                                                              scene->camera,
+                                                              width,
+                                                              height,
+                                                              session_params.denoising.use);
+  session->reset(buffer_params, session_params.samples);
+
+  b_engine.use_highlight_tiles(session_params.progressive_refine == false);

  /* reset time */
  start_resize_time = 0.0;
-
-  {
-    thread_scoped_lock lock(draw_state_.mutex);
-    draw_state_.last_pass_index = -1;
-  }
 }

 void BlenderSession::free_session()
 {
-  if (session) {
-    session->cancel(true);
-  }
+  session->cancel();

  delete sync;
-  sync = nullptr;
-
  delete session;
-  session = nullptr;
 }

-void BlenderSession::full_buffer_written(string_view filename)
+static ShaderEvalType get_shader_type(const string &pass_type)
 {
-  full_buffer_files_.emplace_back(filename);
+  const char *shader_type = pass_type.c_str();
+
+  /* data passes */
+  if (strcmp(shader_type, "NORMAL") == 0)
+    return SHADER_EVAL_NORMAL;
+  else if (strcmp(shader_type, "UV") == 0)
+    return SHADER_EVAL_UV;
+  else if (strcmp(shader_type, "ROUGHNESS") == 0)
+    return SHADER_EVAL_ROUGHNESS;
+  else if (strcmp(shader_type, "DIFFUSE_COLOR") == 0)
+    return SHADER_EVAL_DIFFUSE_COLOR;
+  else if (strcmp(shader_type, "GLOSSY_COLOR") == 0)
+    return SHADER_EVAL_GLOSSY_COLOR;
+  else if (strcmp(shader_type, "TRANSMISSION_COLOR") == 0)
+    return SHADER_EVAL_TRANSMISSION_COLOR;
+  else if (strcmp(shader_type, "EMIT") == 0)
+    return SHADER_EVAL_EMISSION;
+
+  /* light passes */
+  else if (strcmp(shader_type, "AO") == 0)
+    return SHADER_EVAL_AO;
+  else if (strcmp(shader_type, "COMBINED") == 0)
+    return SHADER_EVAL_COMBINED;
+  else if (strcmp(shader_type, "SHADOW") == 0)
+    return SHADER_EVAL_SHADOW;
+  else if (strcmp(shader_type, "DIFFUSE") == 0)
+    return SHADER_EVAL_DIFFUSE;
+  else if (strcmp(shader_type, "GLOSSY") == 0)
+    return SHADER_EVAL_GLOSSY;
+  else if (strcmp(shader_type, "TRANSMISSION") == 0)
+    return SHADER_EVAL_TRANSMISSION;
+
+  /* extra */
+  else if (strcmp(shader_type, "ENVIRONMENT") == 0)
+    return SHADER_EVAL_ENVIRONMENT;
+
+  else
+    return SHADER_EVAL_BAKE;
+}
+
+static BL::RenderResult begin_render_result(BL::RenderEngine &b_engine,
+                                            int x,
+                                            int y,
+                                            int w,
+                                            int h,
+                                            const char *layername,
+                                            const char *viewname)
+{
+  return b_engine.begin_result(x, y, w, h, layername, viewname);
+}
+
+static void end_render_result(BL::RenderEngine &b_engine,
+                              BL::RenderResult &b_rr,
+                              bool cancel,
+                              bool highlight,
+                              bool do_merge_results)
+{
+  b_engine.end_result(b_rr, (int)cancel, (int)highlight, (int)do_merge_results);
+}
+
+void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
+                                                 bool do_update_only,
+                                                 bool do_read_only,
+                                                 bool highlight)
+{
+  int x = rtile.x - session->tile_manager.params.full_x;
+  int y = rtile.y - session->tile_manager.params.full_y;
+  int w = rtile.w;
+  int h = rtile.h;
+
+  /* get render result */
+  BL::RenderResult b_rr = begin_render_result(
+      b_engine, x, y, w, h, b_rlay_name.c_str(), b_rview_name.c_str());
+
+  /* can happen if the intersected rectangle gives 0 width or height */
+  if (b_rr.ptr.data == NULL) {
+    return;
+  }
+
+  BL::RenderResult::layers_iterator b_single_rlay;
+  b_rr.layers.begin(b_single_rlay);
+
+  /* layer will be missing if it was disabled in the UI */
+  if (b_single_rlay == b_rr.layers.end())
+    return;
+
+  BL::RenderLayer b_rlay = *b_single_rlay;
+
+  if (do_read_only) {
+    /* copy each pass */
+    for (BL::RenderPass &b_pass : b_rlay.passes) {
+      /* find matching pass type */
+      PassType pass_type = BlenderSync::get_pass_type(b_pass);
+      int components = b_pass.channels();
+
+      rtile.buffers->set_pass_rect(
+          pass_type, components, (float *)b_pass.rect(), rtile.num_samples);
+    }
+
+    end_render_result(b_engine, b_rr, false, false, false);
+  }
+  else if (do_update_only) {
+    /* Sample would be zero at initial tile update, which is only needed
+     * to tag tile form blender side as IN PROGRESS for proper highlight
+     * no buffers should be sent to blender yet. For denoise we also
+     * keep showing the noisy buffers until denoise is done. */
+    bool merge = (rtile.sample != 0) && (rtile.task != RenderTile::DENOISE);
+
+    if (merge) {
+      update_render_result(b_rlay, rtile);
+    }
+
+    end_render_result(b_engine, b_rr, true, highlight, merge);
+  }
+  else {
+    /* Write final render result. */
+    write_render_result(b_rlay, rtile);
+    end_render_result(b_engine, b_rr, false, false, true);
+  }
+}
+
+void BlenderSession::read_render_tile(RenderTile &rtile)
+{
+  do_write_update_render_tile(rtile, false, true, false);
+}
+
+void BlenderSession::write_render_tile(RenderTile &rtile)
+{
+  do_write_update_render_tile(rtile, false, false, false);
+}
+
+void BlenderSession::update_render_tile(RenderTile &rtile, bool highlight)
+{
+  /* use final write for preview renders, otherwise render result wouldn't be
+   * be updated in blender side
+   * would need to be investigated a bit further, but for now shall be fine
+   */
+  if (!b_engine.is_preview())
+    do_write_update_render_tile(rtile, true, false, highlight);
+  else
+    do_write_update_render_tile(rtile, false, false, false);
 }

 static void add_cryptomatte_layer(BL::RenderResult &b_rr, string name, string manifest)
@@ -307,15 +430,12 @@ void BlenderSession::stamp_view_layer_metadata(Scene *scene, const string &view_
                            to_string(session->params.samples).c_str());

  /* Store ranged samples information. */
-  /* TODO(sergey): Need to bring this information back. */
-#if 0
  if (session->tile_manager.range_num_samples != -1) {
    b_rr.stamp_data_add_field((prefix + "range_start_sample").c_str(),
                              to_string(session->tile_manager.range_start_sample).c_str());
    b_rr.stamp_data_add_field((prefix + "range_num_samples").c_str(),
                              to_string(session->tile_manager.range_num_samples).c_str());
  }
-#endif

  /* Write cryptomatte metadata. */
  if (scene->film->get_cryptomatte_passes() & CRYPT_OBJECT) {
@@ -354,35 +474,39 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
    return;
  }

-  /* Create driver to write out render results. */
-  session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
-
-  session->full_buffer_written_cb = [&](string_view filename) { full_buffer_written(filename); };
+  /* set callback to write out render results */
+  session->write_render_tile_cb = function_bind(&BlenderSession::write_render_tile, this, _1);
+  session->update_render_tile_cb = function_bind(
+      &BlenderSession::update_render_tile, this, _1, _2);

  BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();

  /* get buffer parameters */
-  const SessionParams session_params = BlenderSync::get_session_params(
-      b_engine, b_userpref, b_scene, background);
+  SessionParams session_params = BlenderSync::get_session_params(
+      b_engine, b_userpref, b_scene, background, b_view_layer);
  BufferParams buffer_params = BlenderSync::get_buffer_params(
-      b_v3d, b_rv3d, scene->camera, width, height);
+      b_v3d, b_rv3d, scene->camera, width, height, session_params.denoising.use);

  /* temporary render result to find needed passes and views */
-  BL::RenderResult b_rr = b_engine.begin_result(0, 0, 1, 1, b_view_layer.name().c_str(), NULL);
+  BL::RenderResult b_rr = begin_render_result(
+      b_engine, 0, 0, 1, 1, b_view_layer.name().c_str(), NULL);
  BL::RenderResult::layers_iterator b_single_rlay;
  b_rr.layers.begin(b_single_rlay);
  BL::RenderLayer b_rlay = *b_single_rlay;
+  b_rlay_name = b_view_layer.name();

-  {
-    thread_scoped_lock lock(draw_state_.mutex);
-    b_rlay_name = b_view_layer.name();
-
-    /* Signal that the display pass is to be updated. */
-    draw_state_.last_pass_index = -1;
-  }
+  /* Update denoising parameters. */
+  session->set_denoising(session_params.denoising);

  /* Compute render passes and film settings. */
-  sync->sync_render_passes(b_rlay, b_view_layer);
+  vector<Pass> passes = sync->sync_render_passes(
+      b_scene, b_rlay, b_view_layer, session_params.adaptive_sampling, session_params.denoising);
+
+  /* Set buffer params, using film settings from sync_render_passes. */
+  buffer_params.passes = passes;
+  buffer_params.denoising_data_pass = scene->film->get_denoising_data_pass();
+  buffer_params.denoising_clean_pass = scene->film->get_denoising_clean_pass();
+  buffer_params.denoising_prefiltered_pass = scene->film->get_denoising_prefiltered_pass();

  BL::RenderResult::views_iterator b_view_iter;

@@ -396,9 +520,6 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
       ++b_view_iter, ++view_index) {
    b_rview_name = b_view_iter->name();

-    buffer_params.layer = b_view_layer.name();
-    buffer_params.view = b_rview_name;
-
    /* set the current view */
    b_engine.active_view_set(b_rview_name.c_str());

@@ -428,16 +549,20 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
    }

    /* Update number of samples per layer. */
-    const int samples = sync->get_layer_samples();
-    const bool bound_samples = sync->get_layer_bound_samples();
+    int samples = sync->get_layer_samples();
+    bool bound_samples = sync->get_layer_bound_samples();
+    int effective_layer_samples;

-    SessionParams effective_session_params = session_params;
-    if (samples != 0 && (!bound_samples || (samples < session_params.samples))) {
-      effective_session_params.samples = samples;
-    }
+    if (samples != 0 && (!bound_samples || (samples < session_params.samples)))
+      effective_layer_samples = samples;
+    else
+      effective_layer_samples = session_params.samples;
+
+    /* Update tile manager if we're doing resumable render. */
+    update_resumable_tile_manager(effective_layer_samples);

    /* Update session itself. */
-    session->reset(effective_session_params, buffer_params);
+    session->reset(buffer_params, effective_layer_samples);

    /* render */
    if (!b_engine.is_preview() && background && print_render_stats) {
@@ -461,159 +586,65 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
  stamp_view_layer_metadata(scene, b_rlay_name);

  /* free result without merging */
-  b_engine.end_result(b_rr, true, false, false);
-
-  /* When tiled rendering is used there will be no "write" done for the tile. Forcefully clear
-   * highlighted tiles now, so that the highlight will be removed while processing full frame from
-   * file. */
-  b_engine.tile_highlight_clear_all();
+  end_render_result(b_engine, b_rr, true, true, false);

  double total_time, render_time;
  session->progress.get_time(total_time, render_time);
  VLOG(1) << "Total render time: " << total_time;
  VLOG(1) << "Render time (without synchronization): " << render_time;
+
+  /* clear callback */
+  session->write_render_tile_cb = function_null;
+  session->update_render_tile_cb = function_null;
 }

-void BlenderSession::render_frame_finish()
+static int bake_pass_filter_get(const int pass_filter)
 {
-  /* Processing of all layers and views is done. Clear the strings so that we can communicate
-   * progress about reading files and denoising them. */
-  b_rlay_name = "";
-  b_rview_name = "";
+  int flag = BAKE_FILTER_NONE;

-  if (!b_render.use_persistent_data()) {
-    /* Free the sync object so that it can properly dereference nodes from the scene graph before
-     * the graph is freed. */
-    delete sync;
-    sync = nullptr;
+  if ((pass_filter & BL::BakeSettings::pass_filter_DIRECT) != 0)
+    flag |= BAKE_FILTER_DIRECT;
+  if ((pass_filter & BL::BakeSettings::pass_filter_INDIRECT) != 0)
+    flag |= BAKE_FILTER_INDIRECT;
+  if ((pass_filter & BL::BakeSettings::pass_filter_COLOR) != 0)
+    flag |= BAKE_FILTER_COLOR;

-    session->device_free();
-  }
+  if ((pass_filter & BL::BakeSettings::pass_filter_DIFFUSE) != 0)
+    flag |= BAKE_FILTER_DIFFUSE;
+  if ((pass_filter & BL::BakeSettings::pass_filter_GLOSSY) != 0)
+    flag |= BAKE_FILTER_GLOSSY;
+  if ((pass_filter & BL::BakeSettings::pass_filter_TRANSMISSION) != 0)
+    flag |= BAKE_FILTER_TRANSMISSION;

-  for (string_view filename : full_buffer_files_) {
-    session->process_full_buffer_from_disk(filename);
-    if (check_and_report_session_error()) {
-      break;
-    }
-  }
+  if ((pass_filter & BL::BakeSettings::pass_filter_EMIT) != 0)
+    flag |= BAKE_FILTER_EMISSION;
+  if ((pass_filter & BL::BakeSettings::pass_filter_AO) != 0)
+    flag |= BAKE_FILTER_AO;

-  for (string_view filename : full_buffer_files_) {
-    path_remove(filename);
-  }
-
-  /* Clear driver. */
-  session->set_output_driver(nullptr);
-  session->full_buffer_written_cb = function_null;
-
-  /* All the files are handled.
-   * Clear the list so that this session can be re-used by Persistent Data. */
-  full_buffer_files_.clear();
-}
-
-static PassType bake_type_to_pass(const string &bake_type_str, const int bake_filter)
-{
-  const char *bake_type = bake_type_str.c_str();
-
-  /* data passes */
-  if (strcmp(bake_type, "POSITION") == 0) {
-    return PASS_POSITION;
-  }
-  else if (strcmp(bake_type, "NORMAL") == 0) {
-    return PASS_NORMAL;
-  }
-  else if (strcmp(bake_type, "UV") == 0) {
-    return PASS_UV;
-  }
-  else if (strcmp(bake_type, "ROUGHNESS") == 0) {
-    return PASS_ROUGHNESS;
-  }
-  else if (strcmp(bake_type, "EMIT") == 0) {
-    return PASS_EMISSION;
-  }
-  /* light passes */
-  else if (strcmp(bake_type, "AO") == 0) {
-    return PASS_AO;
-  }
-  else if (strcmp(bake_type, "COMBINED") == 0) {
-    return PASS_COMBINED;
-  }
-  else if (strcmp(bake_type, "SHADOW") == 0) {
-    return PASS_SHADOW;
-  }
-  else if (strcmp(bake_type, "DIFFUSE") == 0) {
-    if ((bake_filter & BL::BakeSettings::pass_filter_DIRECT) &&
-        bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
-      return PASS_DIFFUSE;
-    }
-    else if (bake_filter & BL::BakeSettings::pass_filter_DIRECT) {
-      return PASS_DIFFUSE_DIRECT;
-    }
-    else if (bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
-      return PASS_DIFFUSE_INDIRECT;
-    }
-    else {
-      return PASS_DIFFUSE_COLOR;
-    }
-  }
-  else if (strcmp(bake_type, "GLOSSY") == 0) {
-    if ((bake_filter & BL::BakeSettings::pass_filter_DIRECT) &&
-        bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
-      return PASS_GLOSSY;
-    }
-    else if (bake_filter & BL::BakeSettings::pass_filter_DIRECT) {
-      return PASS_GLOSSY_DIRECT;
-    }
-    else if (bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
-      return PASS_GLOSSY_INDIRECT;
-    }
-    else {
-      return PASS_GLOSSY_COLOR;
-    }
-  }
-  else if (strcmp(bake_type, "TRANSMISSION") == 0) {
-    if ((bake_filter & BL::BakeSettings::pass_filter_DIRECT) &&
-        bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
-      return PASS_TRANSMISSION;
-    }
-    else if (bake_filter & BL::BakeSettings::pass_filter_DIRECT) {
-      return PASS_TRANSMISSION_DIRECT;
-    }
-    else if (bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
-      return PASS_TRANSMISSION_INDIRECT;
-    }
-    else {
-      return PASS_TRANSMISSION_COLOR;
-    }
-  }
-  /* extra */
-  else if (strcmp(bake_type, "ENVIRONMENT") == 0) {
-    return PASS_BACKGROUND;
-  }
-
-  return PASS_COMBINED;
+  return flag;
 }

 void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
                          BL::Object &b_object,
-                          const string &bake_type,
-                          const int bake_filter,
+                          const string &pass_type,
+                          const int pass_filter,
                          const int bake_width,
                          const int bake_height)
 {
  b_depsgraph = b_depsgraph_;

+  ShaderEvalType shader_type = get_shader_type(pass_type);
+  int bake_pass_filter = bake_pass_filter_get(pass_filter);
+
  /* Initialize bake manager, before we load the baking kernels. */
-  scene->bake_manager->set(scene, b_object.name());
+  scene->bake_manager->set(scene, b_object.name(), shader_type, bake_pass_filter);

-  /* Add render pass that we want to bake, and name it Combined so that it is
-   * used as that on the Blender side. */
-  Pass *pass = scene->create_node<Pass>();
-  pass->set_name(ustring("Combined"));
-  pass->set_type(bake_type_to_pass(bake_type, bake_filter));
-  pass->set_include_albedo((bake_filter & BL::BakeSettings::pass_filter_COLOR));
+  /* Passes are identified by name, so in order to return the combined pass we need to set the
+   * name. */
+  Pass::add(PASS_COMBINED, scene->passes, "Combined");

-  session->set_display_driver(nullptr);
-  session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
+  session->read_bake_tile_cb = function_bind(&BlenderSession::read_render_tile, this, _1);
+  session->write_render_tile_cb = function_bind(&BlenderSession::write_render_tile, this, _1);

  if (!session->progress.get_cancel()) {
    /* Sync scene. */
@@ -636,15 +667,18 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,

  if (object_found && !session->progress.get_cancel()) {
    /* Get session and buffer parameters. */
-    const SessionParams session_params = BlenderSync::get_session_params(
+    SessionParams session_params = BlenderSync::get_session_params(
        b_engine, b_userpref, b_scene, background);
+    session_params.progressive_refine = false;

    BufferParams buffer_params;
    buffer_params.width = bake_width;
    buffer_params.height = bake_height;
+    buffer_params.passes = scene->passes;

    /* Update session. */
-    session->reset(session_params, buffer_params);
+    session->tile_manager.set_samples(session_params.samples);
+    session->reset(buffer_params, session_params.samples);

    session->progress.set_update_callback(
        function_bind(&BlenderSession::update_bake_progress, this));
@@ -656,7 +690,71 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
    session->wait();
  }

-  session->set_output_driver(nullptr);
+  session->read_bake_tile_cb = function_null;
+  session->write_render_tile_cb = function_null;
+}
+
+void BlenderSession::do_write_update_render_result(BL::RenderLayer &b_rlay,
+                                                   RenderTile &rtile,
+                                                   bool do_update_only)
+{
+  RenderBuffers *buffers = rtile.buffers;
+
+  /* copy data from device */
+  if (!buffers->copy_from_device())
+    return;
+
+  float exposure = scene->film->get_exposure();
+
+  vector<float> pixels(rtile.w * rtile.h * 4);
+
+  /* Adjust absolute sample number to the range. */
+  int sample = rtile.sample;
+  const int range_start_sample = session->tile_manager.range_start_sample;
+  if (range_start_sample != -1) {
+    sample -= range_start_sample;
+  }
+
+  if (!do_update_only) {
+    /* copy each pass */
+    for (BL::RenderPass &b_pass : b_rlay.passes) {
+      int components = b_pass.channels();
+
+      /* Copy pixels from regular render passes. */
+      bool read = buffers->get_pass_rect(b_pass.name(), exposure, sample, components, &pixels[0]);
+
+      /* If denoising pass, */
+      if (!read) {
+        int denoising_offset = BlenderSync::get_denoising_pass(b_pass);
+        if (denoising_offset >= 0) {
+          read = buffers->get_denoising_pass_rect(
+              denoising_offset, exposure, sample, components, &pixels[0]);
+        }
+      }
+
+      if (!read) {
+        memset(&pixels[0], 0, pixels.size() * sizeof(float));
+      }
+
+      b_pass.rect(&pixels[0]);
+    }
+  }
+  else {
+    /* copy combined pass */
+    BL::RenderPass b_combined_pass(b_rlay.passes.find_by_name("Combined", b_rview_name.c_str()));
+    if (buffers->get_pass_rect("Combined", exposure, sample, 4, &pixels[0]))
+      b_combined_pass.rect(&pixels[0]);
+  }
+}
+
+void BlenderSession::write_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile)
+{
+  do_write_update_render_result(b_rlay, rtile, false);
+}
+
+void BlenderSession::update_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile)
+{
+  do_write_update_render_result(b_rlay, rtile, true);
 }

 void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
@@ -666,19 +764,19 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
    return;

  /* on session/scene parameter changes, we recreate session entirely */
-  const SessionParams session_params = BlenderSync::get_session_params(
+  SessionParams session_params = BlenderSync::get_session_params(
      b_engine, b_userpref, b_scene, background);
-  const SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
-  const bool session_pause = BlenderSync::get_session_pause(b_scene, background);
+  SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
+  bool session_pause = BlenderSync::get_session_pause(b_scene, background);

  if (session->params.modified(session_params) || scene->params.modified(scene_params)) {
    free_session();
    create_session();
  }

-  /* increase samples and render time, but never decrease */
+  /* increase samples, but never decrease */
  session->set_samples(session_params.samples);
-  session->set_time_limit(session_params.time_limit);
+  session->set_denoising_start_sample(session_params.denoising.start_sample);
  session->set_pause(session_pause);

  /* copy recalc flags, outside of mutex so we can decide to do the real
@@ -710,12 +808,21 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
    sync->sync_camera(b_render, b_camera_override, width, height, "");

  /* get buffer parameters */
-  const BufferParams buffer_params = BlenderSync::get_buffer_params(
-      b_v3d, b_rv3d, scene->camera, width, height);
+  BufferParams buffer_params = BlenderSync::get_buffer_params(
+      b_v3d, b_rv3d, scene->camera, width, height, session_params.denoising.use);
+
+  if (!buffer_params.denoising_data_pass) {
+    session_params.denoising.use = false;
+  }
+
+  session->set_denoising(session_params.denoising);
+
+  /* Update film if denoising data was enabled or disabled. */
+  scene->film->set_denoising_data_pass(buffer_params.denoising_data_pass);

  /* reset if needed */
  if (scene->need_reset()) {
-    session->reset(session_params, buffer_params);
+    session->reset(buffer_params, session_params.samples);

    /* After session reset, so device is not accessing image data anymore. */
    builtin_images_load();
@@ -732,44 +839,7 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
  session->start();
 }

-void BlenderSession::draw(BL::SpaceImageEditor &space_image)
-{
-  if (!session || !session->scene) {
-    /* Offline render drawing does not force the render engine update, which means it's possible
-     * that the Session is not created yet. */
-    return;
-  }
-
-  thread_scoped_lock lock(draw_state_.mutex);
-
-  const int pass_index = space_image.image_user().multilayer_pass();
-  if (pass_index != draw_state_.last_pass_index) {
-    BL::RenderPass b_display_pass(b_engine.pass_by_index_get(b_rlay_name.c_str(), pass_index));
-    if (!b_display_pass) {
-      return;
-    }
-
-    Scene *scene = session->scene;
-
-    thread_scoped_lock lock(scene->mutex);
-
-    const Pass *pass = Pass::find(scene->passes, b_display_pass.name());
-    if (!pass) {
-      return;
-    }
-
-    scene->film->set_display_pass(pass->get_type());
-
-    draw_state_.last_pass_index = pass_index;
-  }
-
-  BL::Array<float, 2> zoom = space_image.zoom();
-  display_driver_->set_zoom(zoom[0], zoom[1]);
-
-  session->draw();
-}
-
-void BlenderSession::view_draw(int w, int h)
+bool BlenderSession::draw(int w, int h)
 {
  /* pause in redraw in case update is not being called due to final render */
  session->set_pause(BlenderSync::get_session_pause(b_scene, background));
@@ -815,14 +885,14 @@ void BlenderSession::view_draw(int w, int h)

    /* reset if requested */
    if (reset) {
-      const SessionParams session_params = BlenderSync::get_session_params(
+      SessionParams session_params = BlenderSync::get_session_params(
          b_engine, b_userpref, b_scene, background);
-      const BufferParams buffer_params = BlenderSync::get_buffer_params(
-          b_v3d, b_rv3d, scene->camera, width, height);
-      const bool session_pause = BlenderSync::get_session_pause(b_scene, background);
+      BufferParams buffer_params = BlenderSync::get_buffer_params(
+          b_v3d, b_rv3d, scene->camera, width, height, session_params.denoising.use);
+      bool session_pause = BlenderSync::get_session_pause(b_scene, background);

      if (session_pause == false) {
-        session->reset(session_params, buffer_params);
+        session->reset(buffer_params, session_params.samples);
        start_resize_time = 0.0;
      }
    }
@@ -835,7 +905,18 @@ void BlenderSession::view_draw(int w, int h)
  update_status_progress();

  /* draw */
-  session->draw();
+  BufferParams buffer_params = BlenderSync::get_buffer_params(
+      b_v3d, b_rv3d, scene->camera, width, height, session->params.denoising.use);
+  DeviceDrawParams draw_params;
+
+  if (session->params.display_buffer_linear) {
+    draw_params.bind_display_space_shader_cb = function_bind(
+        &BL::RenderEngine::bind_display_space_shader, &b_engine, b_scene);
+    draw_params.unbind_display_space_shader_cb = function_bind(
+        &BL::RenderEngine::unbind_display_space_shader, &b_engine);
+  }
+
+  return !session->draw(buffer_params, draw_params);
 }

 void BlenderSession::get_status(string &status, string &substatus)
@@ -843,6 +924,11 @@ void BlenderSession::get_status(string &status, string &substatus)
  session->progress.get_status(status, substatus);
 }

+void BlenderSession::get_kernel_status(string &kernel_status)
+{
+  session->progress.get_kernel_status(kernel_status);
+}
+
 void BlenderSession::get_progress(float &progress, double &total_time, double &render_time)
 {
  session->progress.get_time(total_time, render_time);
@@ -861,7 +947,7 @@ void BlenderSession::update_bake_progress()

 void BlenderSession::update_status_progress()
 {
-  string timestatus, status, substatus;
+  string timestatus, status, substatus, kernel_status;
  string scene_status = "";
  float progress;
  double total_time, remaining_time = 0, render_time;
@@ -869,11 +955,11 @@ void BlenderSession::update_status_progress()
  float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f;

  get_status(status, substatus);
+  get_kernel_status(kernel_status);
  get_progress(progress, total_time, render_time);

-  if (progress > 0) {
-    remaining_time = session->get_estimated_remaining_time();
-  }
+  if (progress > 0)
+    remaining_time = (1.0 - (double)progress) * (render_time / (double)progress);

  if (background) {
    if (scene)
@@ -894,12 +980,14 @@ void BlenderSession::update_status_progress()
      status = " | " + status;
    if (substatus.size() > 0)
      status += " | " + substatus;
+    if (kernel_status.size() > 0)
+      status += " | " + kernel_status;
  }

  double current_time = time_dt();
-  /* When rendering in a window, redraw the status at least once per second to keep the elapsed
-   * and remaining time up-to-date. For headless rendering, only report when something
-   * significant changes to keep the console output readable. */
+  /* When rendering in a window, redraw the status at least once per second to keep the elapsed and
+   * remaining time up-to-date. For headless rendering, only report when something significant
+   * changes to keep the console output readable. */
  if (status != last_status || (!headless && (current_time - last_status_time) > 1.0)) {
    b_engine.update_stats("", (timestatus + scene_status + status).c_str());
    b_engine.update_memory_stats(mem_used, mem_peak);
@@ -911,27 +999,20 @@ void BlenderSession::update_status_progress()
    last_progress = progress;
  }

-  check_and_report_session_error();
-}
-
-bool BlenderSession::check_and_report_session_error()
-{
-  if (!session->progress.get_error()) {
-    return false;
+  if (session->progress.get_error()) {
+    string error = session->progress.get_error_message();
+    if (error != last_error) {
+      /* TODO(sergey): Currently C++ RNA API doesn't let us to
+       * use mnemonic name for the variable. Would be nice to
+       * have this figured out.
+       *
+       * For until then, 1 << 5 means RPT_ERROR.
+       */
+      b_engine.report(1 << 5, error.c_str());
+      b_engine.error_set(error.c_str());
+      last_error = error;
+    }
  }
-
-  const string error = session->progress.get_error_message();
-  if (error != last_error) {
-    /* TODO(sergey): Currently C++ RNA API doesn't let us to use mnemonic name for the variable.
-     * Would be nice to have this figured out.
-     *
-     * For until then, 1 << 5 means RPT_ERROR. */
-    b_engine.report(1 << 5, error.c_str());
-    b_engine.error_set(error.c_str());
-    last_error = error;
-  }
-
-  return true;
 }

 void BlenderSession::tag_update()
@@ -967,6 +1048,56 @@ void BlenderSession::test_cancel()
      session->progress.set_cancel("Cancelled");
 }

+void BlenderSession::update_resumable_tile_manager(int num_samples)
+{
+  const int num_resumable_chunks = BlenderSession::num_resumable_chunks,
+            current_resumable_chunk = BlenderSession::current_resumable_chunk;
+  if (num_resumable_chunks == 0) {
+    return;
+  }
+
+  if (num_resumable_chunks > num_samples) {
+    fprintf(stderr,
+            "Cycles warning: more sample chunks (%d) than samples (%d), "
+            "this will cause some samples to be included in multiple chunks.\n",
+            num_resumable_chunks,
+            num_samples);
+  }
+
+  const float num_samples_per_chunk = (float)num_samples / num_resumable_chunks;
+
+  float range_start_sample, range_num_samples;
+  if (current_resumable_chunk != 0) {
+    /* Single chunk rendering. */
+    range_start_sample = num_samples_per_chunk * (current_resumable_chunk - 1);
+    range_num_samples = num_samples_per_chunk;
+  }
+  else {
+    /* Ranged-chunks. */
+    const int num_chunks = end_resumable_chunk - start_resumable_chunk + 1;
+    range_start_sample = num_samples_per_chunk * (start_resumable_chunk - 1);
+    range_num_samples = num_chunks * num_samples_per_chunk;
+  }
+
+  /* Round after doing the multiplications with num_chunks and num_samples_per_chunk
+   * to allow for many small chunks. */
+  int rounded_range_start_sample = (int)floorf(range_start_sample + 0.5f);
+  int rounded_range_num_samples = max((int)floorf(range_num_samples + 0.5f), 1);
+
+  /* Make sure we don't overshoot. */
+  if (rounded_range_start_sample + rounded_range_num_samples > num_samples) {
+    rounded_range_num_samples = num_samples - rounded_range_num_samples;
+  }
+
+  VLOG(1) << "Samples range start is " << range_start_sample << ", "
+          << "number of samples to render is " << range_num_samples;
+
+  scene->integrator->set_start_sample(rounded_range_start_sample);
+
+  session->tile_manager.range_start_sample = rounded_range_start_sample;
+  session->tile_manager.range_num_samples = rounded_range_num_samples;
+}
+
 void BlenderSession::free_blender_memory_if_possible()
 {
  if (!background) {
--- a/intern/cycles/blender/blender_session.h
+++ b/intern/cycles/blender/blender_session.h
@@ -29,11 +29,12 @@

 CCL_NAMESPACE_BEGIN

-class BlenderDisplayDriver;
 class BlenderSync;
 class ImageMetaData;
 class Scene;
 class Session;
+class RenderBuffers;
+class RenderTile;

 class BlenderSession {
 public:
@@ -61,8 +62,6 @@ class BlenderSession {
  /* offline render */
  void render(BL::Depsgraph &b_depsgraph);

-  void render_frame_finish();
-
  void bake(BL::Depsgraph &b_depsgrah,
            BL::Object &b_object,
            const string &pass_type,
@@ -70,16 +69,24 @@ class BlenderSession {
            const int bake_width,
            const int bake_height);

-  void full_buffer_written(string_view filename);
+  void write_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile);
+  void write_render_tile(RenderTile &rtile);
+  void read_render_tile(RenderTile &rtile);
+
+  /* update functions are used to update display buffer only after sample was rendered
+   * only needed for better visual feedback */
+  void update_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile);
+  void update_render_tile(RenderTile &rtile, bool highlight);
+
  /* interactive updates */
  void synchronize(BL::Depsgraph &b_depsgraph);

  /* drawing */
-  void draw(BL::SpaceImageEditor &space_image);
-  void view_draw(int w, int h);
+  bool draw(int w, int h);
  void tag_redraw();
  void tag_update();
  void get_status(string &status, string &substatus);
+  void get_kernel_status(string &kernel_status);
  void get_progress(float &progress, double &total_time, double &render_time);
  void test_cancel();
  void update_status_progress();
@@ -97,7 +104,8 @@ class BlenderSession {
  BL::RenderSettings b_render;
  BL::Depsgraph b_depsgraph;
  /* NOTE: Blender's scene might become invalid after call
-   * #free_blender_memory_if_possible(). */
+   * free_blender_memory_if_possible().
+   */
  BL::Scene b_scene;
  BL::SpaceView3D b_v3d;
  BL::RegionView3D b_rv3d;
@@ -115,8 +123,6 @@ class BlenderSession {

  void *python_thread_state;

-  bool use_developer_ui;
-
  /* Global state which is common for all render sessions created from Blender.
   * Usually denotes command line arguments.
   */
@@ -128,33 +134,41 @@ class BlenderSession {
   */
  static bool headless;

+  /* ** Resumable render ** */
+
+  /* Overall number of chunks in which the sample range is to be divided. */
+  static int num_resumable_chunks;
+
+  /* Current resumable chunk index to render. */
+  static int current_resumable_chunk;
+
+  /* Alternative to single-chunk rendering to render a range of chunks. */
+  static int start_resumable_chunk;
+  static int end_resumable_chunk;
+
  static bool print_render_stats;

 protected:
  void stamp_view_layer_metadata(Scene *scene, const string &view_layer_name);

-  /* Check whether session error happened.
-   * If so, it is reported to the render engine and true is returned.
-   * Otherwise false is returned. */
-  bool check_and_report_session_error();
+  void do_write_update_render_result(BL::RenderLayer &b_rlay,
+                                     RenderTile &rtile,
+                                     bool do_update_only);
+  void do_write_update_render_tile(RenderTile &rtile,
+                                   bool do_update_only,
+                                   bool do_read_only,
+                                   bool highlight);

  void builtin_images_load();

+  /* Update tile manager to reflect resumable render settings. */
+  void update_resumable_tile_manager(int num_samples);
+
  /* Is used after each render layer synchronization is done with the goal
   * of freeing render engine data which is held from Blender side (for
   * example, dependency graph).
   */
  void free_blender_memory_if_possible();
-
-  struct {
-    thread_mutex mutex;
-    int last_pass_index = -1;
-  } draw_state_;
-
-  /* NOTE: The BlenderSession references the display driver. */
-  BlenderDisplayDriver *display_driver_ = nullptr;
-
-  vector<string> full_buffer_files_;
 };

 CCL_NAMESPACE_END
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -17,7 +17,6 @@
 #include "render/background.h"
 #include "render/colorspace.h"
 #include "render/graph.h"
-#include "render/integrator.h"
 #include "render/light.h"
 #include "render/nodes.h"
 #include "render/osl.h"
@@ -279,7 +278,7 @@ static ShaderNode *add_node(Scene *scene,
    array<float3> curve_mapping_curves;
    float min_x, max_x;
    curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, true);
-    curvemapping_minmax(mapping, 4, &min_x, &max_x);
+    curvemapping_minmax(mapping, true, &min_x, &max_x);
    curves->set_min_x(min_x);
    curves->set_max_x(max_x);
    curves->set_curves(curve_mapping_curves);
@@ -292,25 +291,12 @@ static ShaderNode *add_node(Scene *scene,
    array<float3> curve_mapping_curves;
    float min_x, max_x;
    curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, false);
-    curvemapping_minmax(mapping, 3, &min_x, &max_x);
+    curvemapping_minmax(mapping, false, &min_x, &max_x);
    curves->set_min_x(min_x);
    curves->set_max_x(max_x);
    curves->set_curves(curve_mapping_curves);
    node = curves;
  }
-  else if (b_node.is_a(&RNA_ShaderNodeFloatCurve)) {
-    BL::ShaderNodeFloatCurve b_curve_node(b_node);
-    BL::CurveMapping mapping(b_curve_node.mapping());
-    FloatCurveNode *curve = graph->create_node<FloatCurveNode>();
-    array<float> curve_mapping_curve;
-    float min_x, max_x;
-    curvemapping_float_to_array(mapping, curve_mapping_curve, RAMP_TABLE_SIZE);
-    curvemapping_minmax(mapping, 1, &min_x, &max_x);
-    curve->set_min_x(min_x);
-    curve->set_max_x(max_x);
-    curve->set_curve(curve_mapping_curve);
-    node = curve;
-  }
  else if (b_node.is_a(&RNA_ShaderNodeValToRGB)) {
    RGBRampNode *ramp = graph->create_node<RGBRampNode>();
    BL::ShaderNodeValToRGB b_ramp_node(b_node);
@@ -489,11 +475,17 @@ static ShaderNode *add_node(Scene *scene,
    SubsurfaceScatteringNode *subsurface = graph->create_node<SubsurfaceScatteringNode>();

    switch (b_subsurface_node.falloff()) {
-      case BL::ShaderNodeSubsurfaceScattering::falloff_RANDOM_WALK_FIXED_RADIUS:
-        subsurface->set_method(CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID);
+      case BL::ShaderNodeSubsurfaceScattering::falloff_CUBIC:
+        subsurface->set_falloff(CLOSURE_BSSRDF_CUBIC_ID);
+        break;
+      case BL::ShaderNodeSubsurfaceScattering::falloff_GAUSSIAN:
+        subsurface->set_falloff(CLOSURE_BSSRDF_GAUSSIAN_ID);
+        break;
+      case BL::ShaderNodeSubsurfaceScattering::falloff_BURLEY:
+        subsurface->set_falloff(CLOSURE_BSSRDF_BURLEY_ID);
        break;
      case BL::ShaderNodeSubsurfaceScattering::falloff_RANDOM_WALK:
-        subsurface->set_method(CLOSURE_BSSRDF_RANDOM_WALK_ID);
+        subsurface->set_falloff(CLOSURE_BSSRDF_RANDOM_WALK_ID);
        break;
    }

@@ -605,11 +597,11 @@ static ShaderNode *add_node(Scene *scene,
        break;
    }
    switch (b_principled_node.subsurface_method()) {
-      case BL::ShaderNodeBsdfPrincipled::subsurface_method_RANDOM_WALK_FIXED_RADIUS:
-        principled->set_subsurface_method(CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID);
+      case BL::ShaderNodeBsdfPrincipled::subsurface_method_BURLEY:
+        principled->set_subsurface_method(CLOSURE_BSSRDF_PRINCIPLED_ID);
        break;
      case BL::ShaderNodeBsdfPrincipled::subsurface_method_RANDOM_WALK:
-        principled->set_subsurface_method(CLOSURE_BSSRDF_RANDOM_WALK_ID);
+        principled->set_subsurface_method(CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID);
        break;
    }
    node = principled;
@@ -1368,11 +1360,10 @@ void BlenderSync::sync_materials(BL::Depsgraph &b_depsgraph, bool update_all)
 void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, bool update_all)
 {
  Background *background = scene->background;
-  Integrator *integrator = scene->integrator;

  BL::World b_world = b_scene.world();

-  BlenderViewportParameters new_viewport_parameters(b_v3d, use_developer_ui);
+  BlenderViewportParameters new_viewport_parameters(b_v3d);

  if (world_recalc || update_all || b_world.ptr.data != world_map ||
      viewport_parameters.shader_modified(new_viewport_parameters)) {
@@ -1464,8 +1455,9 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d,
      /* AO */
      BL::WorldLighting b_light = b_world.light_settings();

-      integrator->set_ao_factor(b_light.ao_factor());
-      integrator->set_ao_distance(b_light.distance());
+      background->set_use_ao(b_light.use_ambient_occlusion());
+      background->set_ao_factor(b_light.ao_factor());
+      background->set_ao_distance(b_light.distance());

      /* visibility */
      PointerRNA cvisibility = RNA_pointer_get(&b_world.ptr, "cycles_visibility");
@@ -1480,8 +1472,9 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d,
      background->set_visibility(visibility);
    }
    else {
-      integrator->set_ao_factor(1.0f);
-      integrator->set_ao_distance(10.0f);
+      background->set_use_ao(false);
+      background->set_ao_factor(0.0f);
+      background->set_ao_distance(FLT_MAX);
    }

    shader->set_graph(graph);
@@ -1503,6 +1496,7 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d,

  background->set_use_shader(view_layer.use_background_shader ||
                             viewport_parameters.use_custom_shader());
+  background->set_use_ao(background->get_use_ao() && view_layer.use_background_ao);

  background->tag_update(scene);
 }
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -53,7 +53,6 @@ BlenderSync::BlenderSync(BL::RenderEngine &b_engine,
                         BL::Scene &b_scene,
                         Scene *scene,
                         bool preview,
-                         bool use_developer_ui,
                         Progress &progress)
    : b_engine(b_engine),
      b_data(b_data),
@@ -69,7 +68,6 @@ BlenderSync::BlenderSync(BL::RenderEngine &b_engine,
      scene(scene),
      preview(preview),
      experimental(false),
-      use_developer_ui(use_developer_ui),
      dicing_rate(1.0f),
      max_subdivisions(12),
      progress(progress),
@@ -226,7 +224,7 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d
  }

  if (b_v3d) {
-    BlenderViewportParameters new_viewport_parameters(b_v3d, use_developer_ui);
+    BlenderViewportParameters new_viewport_parameters(b_v3d);

    if (viewport_parameters.shader_modified(new_viewport_parameters)) {
      world_recalc = true;
@@ -253,13 +251,9 @@ void BlenderSync::sync_data(BL::RenderSettings &b_render,

  BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();

-  /* TODO(sergey): This feels weak to pass view layer to the integrator, and even weaker to have an
-   * implicit check on whether it is a background render or not. What is the nicer thing here? */
-  const bool background = !b_v3d;
-
  sync_view_layer(b_view_layer);
-  sync_integrator(b_view_layer, background);
-  sync_film(b_view_layer, b_v3d);
+  sync_integrator();
+  sync_film(b_v3d);
  sync_shaders(b_depsgraph, b_v3d);
  sync_images();

@@ -286,7 +280,7 @@ void BlenderSync::sync_data(BL::RenderSettings &b_render,

 /* Integrator */

-void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
+void BlenderSync::sync_integrator()
 {
  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");

@@ -334,24 +328,59 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
    integrator->set_motion_blur(view_layer.use_motion_blur);
  }

+  integrator->set_method((Integrator::Method)get_enum(
+      cscene, "progressive", Integrator::NUM_METHODS, Integrator::PATH));
+
+  integrator->set_sample_all_lights_direct(get_boolean(cscene, "sample_all_lights_direct"));
+  integrator->set_sample_all_lights_indirect(get_boolean(cscene, "sample_all_lights_indirect"));
  integrator->set_light_sampling_threshold(get_float(cscene, "light_sampling_threshold"));

  SamplingPattern sampling_pattern = (SamplingPattern)get_enum(
      cscene, "sampling_pattern", SAMPLING_NUM_PATTERNS, SAMPLING_PATTERN_SOBOL);
-  integrator->set_sampling_pattern(sampling_pattern);

-  if (preview) {
-    integrator->set_use_adaptive_sampling(
-        RNA_boolean_get(&cscene, "use_preview_adaptive_sampling"));
-    integrator->set_adaptive_threshold(get_float(cscene, "preview_adaptive_threshold"));
-    integrator->set_adaptive_min_samples(get_int(cscene, "preview_adaptive_min_samples"));
+  int adaptive_min_samples = INT_MAX;
+
+  if (RNA_boolean_get(&cscene, "use_adaptive_sampling")) {
+    sampling_pattern = SAMPLING_PATTERN_PMJ;
+    adaptive_min_samples = get_int(cscene, "adaptive_min_samples");
+    integrator->set_adaptive_threshold(get_float(cscene, "adaptive_threshold"));
  }
  else {
-    integrator->set_use_adaptive_sampling(RNA_boolean_get(&cscene, "use_adaptive_sampling"));
-    integrator->set_adaptive_threshold(get_float(cscene, "adaptive_threshold"));
-    integrator->set_adaptive_min_samples(get_int(cscene, "adaptive_min_samples"));
+    integrator->set_adaptive_threshold(0.0f);
  }

+  integrator->set_sampling_pattern(sampling_pattern);
+
+  int diffuse_samples = get_int(cscene, "diffuse_samples");
+  int glossy_samples = get_int(cscene, "glossy_samples");
+  int transmission_samples = get_int(cscene, "transmission_samples");
+  int ao_samples = get_int(cscene, "ao_samples");
+  int mesh_light_samples = get_int(cscene, "mesh_light_samples");
+  int subsurface_samples = get_int(cscene, "subsurface_samples");
+  int volume_samples = get_int(cscene, "volume_samples");
+
+  if (get_boolean(cscene, "use_square_samples")) {
+    integrator->set_diffuse_samples(diffuse_samples * diffuse_samples);
+    integrator->set_glossy_samples(glossy_samples * glossy_samples);
+    integrator->set_transmission_samples(transmission_samples * transmission_samples);
+    integrator->set_ao_samples(ao_samples * ao_samples);
+    integrator->set_mesh_light_samples(mesh_light_samples * mesh_light_samples);
+    integrator->set_subsurface_samples(subsurface_samples * subsurface_samples);
+    integrator->set_volume_samples(volume_samples * volume_samples);
+    adaptive_min_samples = min(adaptive_min_samples * adaptive_min_samples, INT_MAX);
+  }
+  else {
+    integrator->set_diffuse_samples(diffuse_samples);
+    integrator->set_glossy_samples(glossy_samples);
+    integrator->set_transmission_samples(transmission_samples);
+    integrator->set_ao_samples(ao_samples);
+    integrator->set_mesh_light_samples(mesh_light_samples);
+    integrator->set_subsurface_samples(subsurface_samples);
+    integrator->set_volume_samples(volume_samples);
+  }
+
+  integrator->set_adaptive_min_samples(adaptive_min_samples);
+
  if (get_boolean(cscene, "use_fast_gi")) {
    if (preview) {
      integrator->set_ao_bounces(get_int(cscene, "ao_bounces"));
@@ -364,38 +393,20 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
    integrator->set_ao_bounces(0);
  }

-  const DenoiseParams denoise_params = get_denoise_params(b_scene, b_view_layer, background);
-  integrator->set_use_denoise(denoise_params.use);
-
-  /* Only update denoiser parameters if the denoiser is actually used. This allows to tweak
-   * denoiser parameters before enabling it without render resetting on every change. The downside
-   * is that the interface and the integrator are technically out of sync. */
-  if (denoise_params.use) {
-    integrator->set_denoiser_type(denoise_params.type);
-    integrator->set_denoise_start_sample(denoise_params.start_sample);
-    integrator->set_use_denoise_pass_albedo(denoise_params.use_pass_albedo);
-    integrator->set_use_denoise_pass_normal(denoise_params.use_pass_normal);
-    integrator->set_denoiser_prefilter(denoise_params.prefilter);
-  }
-
-  /* UPDATE_NONE as we don't want to tag the integrator as modified (this was done by the
-   * set calls above), but we need to make sure that the dependent things are tagged. */
+  /* UPDATE_NONE as we don't want to tag the integrator as modified, just tag dependent things */
  integrator->tag_update(scene, Integrator::UPDATE_NONE);
 }

 /* Film */

-void BlenderSync::sync_film(BL::ViewLayer &b_view_layer, BL::SpaceView3D &b_v3d)
+void BlenderSync::sync_film(BL::SpaceView3D &b_v3d)
 {
  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-  PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");

  Film *film = scene->film;

  if (b_v3d) {
-    const BlenderViewportParameters new_viewport_parameters(b_v3d, use_developer_ui);
-    film->set_display_pass(new_viewport_parameters.display_pass);
-    film->set_show_active_pixels(new_viewport_parameters.show_active_pixels);
+    film->set_display_pass(update_viewport_display_passes(b_v3d, scene->passes));
  }

  film->set_exposure(get_float(cscene, "film_exposure"));
@@ -423,15 +434,6 @@ void BlenderSync::sync_film(BL::ViewLayer &b_view_layer, BL::SpaceView3D &b_v3d)
        break;
    }
  }
-
-  /* Blender viewport does not support proper shadow catcher compositing, so force an approximate
-   * mode to improve visual feedback. */
-  if (b_v3d) {
-    film->set_use_approximate_shadow_catcher(true);
-  }
-  else {
-    film->set_use_approximate_shadow_catcher(!get_boolean(crl, "use_pass_shadow_catcher"));
-  }
 }

 /* Render Layer */
@@ -442,6 +444,7 @@ void BlenderSync::sync_view_layer(BL::ViewLayer &b_view_layer)

  /* Filter. */
  view_layer.use_background_shader = b_view_layer.use_sky();
+  view_layer.use_background_ao = b_view_layer.use_ao();
  /* Always enable surfaces for baking, otherwise there is nothing to bake to. */
  view_layer.use_surfaces = b_view_layer.use_solid() || scene->bake_manager->get_baking();
  view_layer.use_hair = b_view_layer.use_strand();
@@ -461,7 +464,10 @@ void BlenderSync::sync_view_layer(BL::ViewLayer &b_view_layer)

  if (use_layer_samples != 2) {
    int samples = b_view_layer.samples();
-    view_layer.samples = samples;
+    if (get_boolean(cscene, "use_square_samples"))
+      view_layer.samples = samples * samples;
+    else
+      view_layer.samples = samples;
  }
 }

@@ -493,8 +499,7 @@ void BlenderSync::sync_images()
 }

 /* Passes */
-
-static PassType get_blender_pass_type(BL::RenderPass &b_pass)
+PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass)
 {
  string name = b_pass.name();
 #define MAP_PASS(passname, passtype) \
@@ -502,15 +507,10 @@ static PassType get_blender_pass_type(BL::RenderPass &b_pass)
    return passtype; \
  } \
  ((void)0)
-
  /* NOTE: Keep in sync with defined names from DNA_scene_types.h */
-
  MAP_PASS("Combined", PASS_COMBINED);
-  MAP_PASS("Noisy Image", PASS_COMBINED);
-
  MAP_PASS("Depth", PASS_DEPTH);
  MAP_PASS("Mist", PASS_MIST);
-  MAP_PASS("Position", PASS_POSITION);
  MAP_PASS("Normal", PASS_NORMAL);
  MAP_PASS("IndexOB", PASS_OBJECT_ID);
  MAP_PASS("UV", PASS_UV);
@@ -539,86 +539,118 @@ static PassType get_blender_pass_type(BL::RenderPass &b_pass)
  MAP_PASS("BakePrimitive", PASS_BAKE_PRIMITIVE);
  MAP_PASS("BakeDifferential", PASS_BAKE_DIFFERENTIAL);

-  MAP_PASS("Denoising Normal", PASS_DENOISING_NORMAL);
-  MAP_PASS("Denoising Albedo", PASS_DENOISING_ALBEDO);
-
-  MAP_PASS("Shadow Catcher", PASS_SHADOW_CATCHER);
-  MAP_PASS("Noisy Shadow Catcher", PASS_SHADOW_CATCHER);
-
+  MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
  MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER);
  MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT);
-
  if (string_startswith(name, cryptomatte_prefix)) {
    return PASS_CRYPTOMATTE;
  }
-
 #undef MAP_PASS

  return PASS_NONE;
 }

-static Pass *pass_add(Scene *scene,
-                      PassType type,
-                      const char *name,
-                      PassMode mode = PassMode::DENOISED)
+int BlenderSync::get_denoising_pass(BL::RenderPass &b_pass)
 {
-  Pass *pass = scene->create_node<Pass>();
+  string name = b_pass.name();

-  pass->set_type(type);
-  pass->set_name(ustring(name));
-  pass->set_mode(mode);
+  if (name == "Noisy Image")
+    return DENOISING_PASS_PREFILTERED_COLOR;

-  return pass;
+  if (name.substr(0, 10) != "Denoising ") {
+    return -1;
+  }
+  name = name.substr(10);
+
+#define MAP_PASS(passname, offset) \
+  if (name == passname) { \
+    return offset; \
+  } \
+  ((void)0)
+  MAP_PASS("Normal", DENOISING_PASS_PREFILTERED_NORMAL);
+  MAP_PASS("Albedo", DENOISING_PASS_PREFILTERED_ALBEDO);
+  MAP_PASS("Depth", DENOISING_PASS_PREFILTERED_DEPTH);
+  MAP_PASS("Shadowing", DENOISING_PASS_PREFILTERED_SHADOWING);
+  MAP_PASS("Variance", DENOISING_PASS_PREFILTERED_VARIANCE);
+  MAP_PASS("Intensity", DENOISING_PASS_PREFILTERED_INTENSITY);
+  MAP_PASS("Clean", DENOISING_PASS_CLEAN);
+#undef MAP_PASS
+
+  return -1;
 }

-void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer)
+vector<Pass> BlenderSync::sync_render_passes(BL::Scene &b_scene,
+                                             BL::RenderLayer &b_rlay,
+                                             BL::ViewLayer &b_view_layer,
+                                             bool adaptive_sampling,
+                                             const DenoiseParams &denoising)
 {
-  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+  vector<Pass> passes;

-  /* Delete all existing passes. */
-  set<Pass *> clear_passes(scene->passes.begin(), scene->passes.end());
-  scene->delete_nodes(clear_passes);
-
-  /* Always add combined pass. */
-  pass_add(scene, PASS_COMBINED, "Combined");
-
-  /* Blender built-in data and light passes. */
+  /* loop over passes */
  for (BL::RenderPass &b_pass : b_rlay.passes) {
-    const PassType pass_type = get_blender_pass_type(b_pass);
-
-    if (pass_type == PASS_NONE) {
-      LOG(ERROR) << "Unknown pass " << b_pass.name();
-      continue;
-    }
+    PassType pass_type = get_pass_type(b_pass);

    if (pass_type == PASS_MOTION &&
        (b_view_layer.use_motion_blur() && b_scene.render().use_motion_blur())) {
      continue;
    }
-
-    pass_add(scene, pass_type, b_pass.name().c_str());
+    if (pass_type != PASS_NONE)
+      Pass::add(pass_type, passes, b_pass.name().c_str());
  }

  PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");

-  /* Debug passes. */
-  if (get_boolean(crl, "pass_debug_sample_count")) {
-    b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
-    pass_add(scene, PASS_SAMPLE_COUNT, "Debug Sample Count");
+  int denoising_flags = 0;
+  if (denoising.use || denoising.store_passes) {
+    if (denoising.type == DENOISER_NLM) {
+#define MAP_OPTION(name, flag) \
+  if (!get_boolean(crl, name)) { \
+    denoising_flags |= flag; \
+  } \
+  ((void)0)
+      MAP_OPTION("denoising_diffuse_direct", DENOISING_CLEAN_DIFFUSE_DIR);
+      MAP_OPTION("denoising_diffuse_indirect", DENOISING_CLEAN_DIFFUSE_IND);
+      MAP_OPTION("denoising_glossy_direct", DENOISING_CLEAN_GLOSSY_DIR);
+      MAP_OPTION("denoising_glossy_indirect", DENOISING_CLEAN_GLOSSY_IND);
+      MAP_OPTION("denoising_transmission_direct", DENOISING_CLEAN_TRANSMISSION_DIR);
+      MAP_OPTION("denoising_transmission_indirect", DENOISING_CLEAN_TRANSMISSION_IND);
+#undef MAP_OPTION
+    }
+    b_engine.add_pass("Noisy Image", 4, "RGBA", b_view_layer.name().c_str());
+  }
+  scene->film->set_denoising_flags(denoising_flags);
+
+  if (denoising.store_passes) {
+    b_engine.add_pass("Denoising Normal", 3, "XYZ", b_view_layer.name().c_str());
+    b_engine.add_pass("Denoising Albedo", 3, "RGB", b_view_layer.name().c_str());
+    b_engine.add_pass("Denoising Depth", 1, "Z", b_view_layer.name().c_str());
+    if (denoising.type == DENOISER_NLM) {
+      b_engine.add_pass("Denoising Shadowing", 1, "X", b_view_layer.name().c_str());
+      b_engine.add_pass("Denoising Variance", 3, "RGB", b_view_layer.name().c_str());
+      b_engine.add_pass("Denoising Intensity", 1, "X", b_view_layer.name().c_str());
+    }
+
+    if (scene->film->get_denoising_flags() & DENOISING_CLEAN_ALL_PASSES) {
+      b_engine.add_pass("Denoising Clean", 3, "RGB", b_view_layer.name().c_str());
+    }
  }

-  /* Cycles specific passes. */
+  if (get_boolean(crl, "pass_debug_render_time")) {
+    b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str());
+    Pass::add(PASS_RENDER_TIME, passes, "Debug Render Time");
+  }
+  if (get_boolean(crl, "pass_debug_sample_count")) {
+    b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
+    Pass::add(PASS_SAMPLE_COUNT, passes, "Debug Sample Count");
+  }
  if (get_boolean(crl, "use_pass_volume_direct")) {
    b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str());
-    pass_add(scene, PASS_VOLUME_DIRECT, "VolumeDir");
+    Pass::add(PASS_VOLUME_DIRECT, passes, "VolumeDir");
  }
  if (get_boolean(crl, "use_pass_volume_indirect")) {
    b_engine.add_pass("VolumeInd", 3, "RGB", b_view_layer.name().c_str());
-    pass_add(scene, PASS_VOLUME_INDIRECT, "VolumeInd");
-  }
-  if (get_boolean(crl, "use_pass_shadow_catcher")) {
-    b_engine.add_pass("Shadow Catcher", 3, "RGB", b_view_layer.name().c_str());
-    pass_add(scene, PASS_SHADOW_CATCHER, "Shadow Catcher");
+    Pass::add(PASS_VOLUME_INDIRECT, passes, "VolumeInd");
  }

  /* Cryptomatte stores two ID/weight pairs per RGBA layer.
@@ -630,7 +662,7 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v
    for (int i = 0; i < crypto_depth; i++) {
      string passname = cryptomatte_prefix + string_printf("Object%02d", i);
      b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
-      pass_add(scene, PASS_CRYPTOMATTE, passname.c_str());
+      Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
    }
    cryptomatte_passes = (CryptomatteType)(cryptomatte_passes | CRYPT_OBJECT);
  }
@@ -638,7 +670,7 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v
    for (int i = 0; i < crypto_depth; i++) {
      string passname = cryptomatte_prefix + string_printf("Material%02d", i);
      b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
-      pass_add(scene, PASS_CRYPTOMATTE, passname.c_str());
+      Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
    }
    cryptomatte_passes = (CryptomatteType)(cryptomatte_passes | CRYPT_MATERIAL);
  }
@@ -646,33 +678,22 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v
    for (int i = 0; i < crypto_depth; i++) {
      string passname = cryptomatte_prefix + string_printf("Asset%02d", i);
      b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
-      pass_add(scene, PASS_CRYPTOMATTE, passname.c_str());
+      Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
    }
    cryptomatte_passes = (CryptomatteType)(cryptomatte_passes | CRYPT_ASSET);
  }
+  if (b_view_layer.use_pass_cryptomatte_accurate() && cryptomatte_passes != CRYPT_NONE) {
+    cryptomatte_passes = (CryptomatteType)(cryptomatte_passes | CRYPT_ACCURATE);
+  }
  scene->film->set_cryptomatte_passes(cryptomatte_passes);

-  /* Denoising passes. */
-  const bool use_denoising = get_boolean(cscene, "use_denoising") &&
-                             get_boolean(crl, "use_denoising");
-  const bool store_denoising_passes = get_boolean(crl, "denoising_store_passes");
-  if (use_denoising) {
-    b_engine.add_pass("Noisy Image", 4, "RGBA", b_view_layer.name().c_str());
-    pass_add(scene, PASS_COMBINED, "Noisy Image", PassMode::NOISY);
-    if (get_boolean(crl, "use_pass_shadow_catcher")) {
-      b_engine.add_pass("Noisy Shadow Catcher", 3, "RGB", b_view_layer.name().c_str());
-      pass_add(scene, PASS_SHADOW_CATCHER, "Noisy Shadow Catcher", PassMode::NOISY);
+  if (adaptive_sampling) {
+    Pass::add(PASS_ADAPTIVE_AUX_BUFFER, passes);
+    if (!get_boolean(crl, "pass_debug_sample_count")) {
+      Pass::add(PASS_SAMPLE_COUNT, passes);
    }
  }
-  if (store_denoising_passes) {
-    b_engine.add_pass("Denoising Normal", 3, "XYZ", b_view_layer.name().c_str());
-    pass_add(scene, PASS_DENOISING_NORMAL, "Denoising Normal", PassMode::NOISY);

-    b_engine.add_pass("Denoising Albedo", 3, "RGB", b_view_layer.name().c_str());
-    pass_add(scene, PASS_DENOISING_ALBEDO, "Denoising Albedo", PassMode::NOISY);
-  }
-
-  /* Custom AOV passes. */
  BL::ViewLayer::aovs_iterator b_aov_iter;
  for (b_view_layer.aovs.begin(b_aov_iter); b_aov_iter != b_view_layer.aovs.end(); ++b_aov_iter) {
    BL::AOV b_aov(*b_aov_iter);
@@ -685,15 +706,28 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v

    if (is_color) {
      b_engine.add_pass(name.c_str(), 4, "RGBA", b_view_layer.name().c_str());
-      pass_add(scene, PASS_AOV_COLOR, name.c_str());
+      Pass::add(PASS_AOV_COLOR, passes, name.c_str());
    }
    else {
      b_engine.add_pass(name.c_str(), 1, "X", b_view_layer.name().c_str());
-      pass_add(scene, PASS_AOV_VALUE, name.c_str());
+      Pass::add(PASS_AOV_VALUE, passes, name.c_str());
    }
  }

+  scene->film->set_denoising_data_pass(denoising.use || denoising.store_passes);
+  scene->film->set_denoising_clean_pass(scene->film->get_denoising_flags() &
+                                        DENOISING_CLEAN_ALL_PASSES);
+  scene->film->set_denoising_prefiltered_pass(denoising.store_passes &&
+                                              denoising.type == DENOISER_NLM);
  scene->film->set_pass_alpha_threshold(b_view_layer.pass_alpha_threshold());
+
+  if (!Pass::equals(passes, scene->passes)) {
+    scene->film->tag_passes_update(scene, passes);
+    scene->film->tag_modified();
+    scene->integrator->tag_update(scene, Integrator::UPDATE_ALL);
+  }
+
+  return passes;
 }

 void BlenderSync::free_data_after_sync(BL::Depsgraph &b_depsgraph)
@@ -739,9 +773,9 @@ SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background)
    params.shadingsystem = SHADINGSYSTEM_OSL;

  if (background || DebugFlags().viewport_static_bvh)
-    params.bvh_type = BVH_TYPE_STATIC;
+    params.bvh_type = SceneParams::BVH_STATIC;
  else
-    params.bvh_type = BVH_TYPE_DYNAMIC;
+    params.bvh_type = SceneParams::BVH_DYNAMIC;

  params.use_bvh_spatial_split = RNA_boolean_get(&cscene, "debug_use_spatial_splits");
  params.use_bvh_unaligned_nodes = RNA_boolean_get(&cscene, "debug_use_hair_bvh");
@@ -784,7 +818,8 @@ bool BlenderSync::get_session_pause(BL::Scene &b_scene, bool background)
 SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
                                              BL::Preferences &b_preferences,
                                              BL::Scene &b_scene,
-                                              bool background)
+                                              bool background,
+                                              BL::ViewLayer b_view_layer)
 {
  SessionParams params;
  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
@@ -792,8 +827,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
  /* feature set */
  params.experimental = (get_enum(cscene, "feature_set") != 0);

-  /* Headless and background rendering. */
-  params.headless = BlenderSession::headless;
+  /* Background */
  params.background = background;

  /* Device */
@@ -802,26 +836,111 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,

  /* samples */
  int samples = get_int(cscene, "samples");
+  int aa_samples = get_int(cscene, "aa_samples");
  int preview_samples = get_int(cscene, "preview_samples");
+  int preview_aa_samples = get_int(cscene, "preview_aa_samples");

-  if (background) {
-    params.samples = samples;
+  if (get_boolean(cscene, "use_square_samples")) {
+    aa_samples = aa_samples * aa_samples;
+    preview_aa_samples = preview_aa_samples * preview_aa_samples;
+
+    samples = samples * samples;
+    preview_samples = preview_samples * preview_samples;
+  }
+
+  if (get_enum(cscene, "progressive") == 0 && params.device.has_branched_path) {
+    if (background) {
+      params.samples = aa_samples;
+    }
+    else {
+      params.samples = preview_aa_samples;
+      if (params.samples == 0)
+        params.samples = INT_MAX;
+    }
  }
  else {
-    params.samples = preview_samples;
-    if (params.samples == 0)
-      params.samples = INT_MAX;
+    if (background) {
+      params.samples = samples;
+    }
+    else {
+      params.samples = preview_samples;
+      if (params.samples == 0)
+        params.samples = INT_MAX;
+    }
  }

  /* Clamp samples. */
  params.samples = min(params.samples, Integrator::MAX_SAMPLES);

+  /* Adaptive sampling. */
+  params.adaptive_sampling = RNA_boolean_get(&cscene, "use_adaptive_sampling");
+
+  /* tiles */
+  const bool is_cpu = (params.device.type == DEVICE_CPU);
+  if (!is_cpu && !background) {
+    /* currently GPU could be much slower than CPU when using tiles,
+     * still need to be investigated, but meanwhile make it possible
+     * to work in viewport smoothly
+     */
+    int debug_tile_size = get_int(cscene, "debug_tile_size");
+
+    params.tile_size = make_int2(debug_tile_size, debug_tile_size);
+  }
+  else {
+    int tile_x = b_engine.tile_x();
+    int tile_y = b_engine.tile_y();
+
+    params.tile_size = make_int2(tile_x, tile_y);
+  }
+
+  if ((BlenderSession::headless == false) && background) {
+    params.tile_order = (TileOrder)get_enum(cscene, "tile_order");
+  }
+  else {
+    params.tile_order = TILE_BOTTOM_TO_TOP;
+  }
+
+  /* Denoising */
+  params.denoising = get_denoise_params(b_scene, b_view_layer, background);
+
+  if (params.denoising.use) {
+    /* Add additional denoising devices if we are rendering and denoising
+     * with different devices. */
+    params.device.add_denoising_devices(params.denoising.type);
+
+    /* Check if denoiser is supported by device. */
+    if (!(params.device.denoisers & params.denoising.type)) {
+      params.denoising.use = false;
+    }
+  }
+
  /* Viewport Performance */
+  params.start_resolution = get_int(cscene, "preview_start_resolution");
  params.pixel_size = b_engine.get_preview_pixel_size(b_scene);

+  /* other parameters */
+  params.cancel_timeout = (double)get_float(cscene, "debug_cancel_timeout");
+  params.reset_timeout = (double)get_float(cscene, "debug_reset_timeout");
+  params.text_timeout = (double)get_float(cscene, "debug_text_timeout");
+
+  /* progressive refine */
+  BL::RenderSettings b_r = b_scene.render();
+  params.progressive_refine = b_engine.is_preview() ||
+                              get_boolean(cscene, "use_progressive_refine");
+  if (b_r.use_save_buffers() || params.adaptive_sampling)
+    params.progressive_refine = false;
+
  if (background) {
+    if (params.progressive_refine)
+      params.progressive = true;
+    else
+      params.progressive = false;
+
+    params.start_resolution = INT_MAX;
    params.pixel_size = 1;
  }
+  else
+    params.progressive = true;

  /* shading system - scene level needs full refresh */
  const bool shadingsystem = RNA_boolean_get(&cscene, "shading_system");
@@ -831,30 +950,19 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
  else if (shadingsystem == 1)
    params.shadingsystem = SHADINGSYSTEM_OSL;

-  /* Time limit. */
-  if (background) {
-    params.time_limit = get_float(cscene, "time_limit");
-  }
-  else {
-    /* For the viewport it kind of makes more sense to think in terms of the noise floor, which is
-     * usually higher than acceptable level for the final frame. */
-    /* TODO: It might be useful to support time limit in the viewport as well, but needs some
-     * extra thoughts and input. */
-    params.time_limit = 0.0;
+  /* Color management. */
+  params.display_buffer_linear = b_engine.support_display_space_shader(b_scene);
+
+  if (b_engine.is_preview()) {
+    /* For preview rendering we're using same timeout as
+     * blender's job update.
+     */
+    params.progressive_update_timeout = 0.1;
  }

-  /* Profiling. */
  params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background &&
                         BlenderSession::print_render_stats;

-  if (background) {
-    params.use_auto_tile = RNA_boolean_get(&cscene, "use_auto_tile");
-    params.tile_size = max(get_int(cscene, "tile_size"), 8);
-  }
-  else {
-    params.use_auto_tile = false;
-  }
-
  return params;
 }

@@ -862,34 +970,33 @@ DenoiseParams BlenderSync::get_denoise_params(BL::Scene &b_scene,
                                              BL::ViewLayer &b_view_layer,
                                              bool background)
 {
-  enum DenoiserInput {
-    DENOISER_INPUT_RGB = 1,
-    DENOISER_INPUT_RGB_ALBEDO = 2,
-    DENOISER_INPUT_RGB_ALBEDO_NORMAL = 3,
-
-    DENOISER_INPUT_NUM,
-  };
-
  DenoiseParams denoising;
  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");

-  int input_passes = -1;
-
  if (background) {
    /* Final Render Denoising */
    denoising.use = get_boolean(cscene, "use_denoising");
    denoising.type = (DenoiserType)get_enum(cscene, "denoiser", DENOISER_NUM, DENOISER_NONE);
-    denoising.prefilter = (DenoiserPrefilter)get_enum(
-        cscene, "denoising_prefilter", DENOISER_PREFILTER_NUM, DENOISER_PREFILTER_NONE);
-
-    input_passes = (DenoiserInput)get_enum(
-        cscene, "denoising_input_passes", DENOISER_INPUT_NUM, DENOISER_INPUT_RGB_ALBEDO_NORMAL);

    if (b_view_layer) {
      PointerRNA clayer = RNA_pointer_get(&b_view_layer.ptr, "cycles");
      if (!get_boolean(clayer, "use_denoising")) {
        denoising.use = false;
      }
+
+      denoising.radius = get_int(clayer, "denoising_radius");
+      denoising.strength = get_float(clayer, "denoising_strength");
+      denoising.feature_strength = get_float(clayer, "denoising_feature_strength");
+      denoising.relative_pca = get_boolean(clayer, "denoising_relative_pca");
+
+      denoising.input_passes = (DenoiserInput)get_enum(
+          clayer,
+          (denoising.type == DENOISER_OPTIX) ? "denoising_optix_input_passes" :
+                                               "denoising_openimagedenoise_input_passes",
+          DENOISER_INPUT_NUM,
+          DENOISER_INPUT_RGB_ALBEDO_NORMAL);
+
+      denoising.store_passes = get_boolean(clayer, "denoising_store_passes");
    }
  }
  else {
@@ -897,12 +1004,10 @@ DenoiseParams BlenderSync::get_denoise_params(BL::Scene &b_scene,
    denoising.use = get_boolean(cscene, "use_preview_denoising");
    denoising.type = (DenoiserType)get_enum(
        cscene, "preview_denoiser", DENOISER_NUM, DENOISER_NONE);
-    denoising.prefilter = (DenoiserPrefilter)get_enum(
-        cscene, "preview_denoising_prefilter", DENOISER_PREFILTER_NUM, DENOISER_PREFILTER_FAST);
    denoising.start_sample = get_int(cscene, "preview_denoising_start_sample");

-    input_passes = (DenoiserInput)get_enum(
-        cscene, "preview_denoising_input_passes", DENOISER_INPUT_NUM, DENOISER_INPUT_RGB_ALBEDO);
+    denoising.input_passes = (DenoiserInput)get_enum(
+        cscene, "preview_denoising_input_passes", DENOISER_INPUT_NUM, (int)denoising.input_passes);

    /* Auto select fastest denoiser. */
    if (denoising.type == DENOISER_NONE) {
@@ -918,27 +1023,6 @@ DenoiseParams BlenderSync::get_denoise_params(BL::Scene &b_scene,
    }
  }

-  switch (input_passes) {
-    case DENOISER_INPUT_RGB:
-      denoising.use_pass_albedo = false;
-      denoising.use_pass_normal = false;
-      break;
-
-    case DENOISER_INPUT_RGB_ALBEDO:
-      denoising.use_pass_albedo = true;
-      denoising.use_pass_normal = false;
-      break;
-
-    case DENOISER_INPUT_RGB_ALBEDO_NORMAL:
-      denoising.use_pass_albedo = true;
-      denoising.use_pass_normal = true;
-      break;
-
-    default:
-      LOG(ERROR) << "Unhandled input passes enum " << input_passes;
-      break;
-  }
-
  return denoising;
 }

--- a/intern/cycles/blender/blender_sync.h
+++ b/intern/cycles/blender/blender_sync.h
@@ -60,7 +60,6 @@ class BlenderSync {
              BL::Scene &b_scene,
              Scene *scene,
              bool preview,
-              bool use_developer_ui,
              Progress &progress);
  ~BlenderSync();

@@ -76,8 +75,12 @@ class BlenderSync {
                 int height,
                 void **python_thread_state);
  void sync_view_layer(BL::ViewLayer &b_view_layer);
-  void sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer);
-  void sync_integrator(BL::ViewLayer &b_view_layer, bool background);
+  vector<Pass> sync_render_passes(BL::Scene &b_scene,
+                                  BL::RenderLayer &b_render_layer,
+                                  BL::ViewLayer &b_view_layer,
+                                  bool adaptive_sampling,
+                                  const DenoiseParams &denoising);
+  void sync_integrator();
  void sync_camera(BL::RenderSettings &b_render,
                   BL::Object &b_override,
                   int width,
@@ -95,13 +98,22 @@ class BlenderSync {

  /* get parameters */
  static SceneParams get_scene_params(BL::Scene &b_scene, bool background);
-  static SessionParams get_session_params(BL::RenderEngine &b_engine,
-                                          BL::Preferences &b_userpref,
-                                          BL::Scene &b_scene,
-                                          bool background);
+  static SessionParams get_session_params(
+      BL::RenderEngine &b_engine,
+      BL::Preferences &b_userpref,
+      BL::Scene &b_scene,
+      bool background,
+      BL::ViewLayer b_view_layer = BL::ViewLayer(PointerRNA_NULL));
  static bool get_session_pause(BL::Scene &b_scene, bool background);
-  static BufferParams get_buffer_params(
-      BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, Camera *cam, int width, int height);
+  static BufferParams get_buffer_params(BL::SpaceView3D &b_v3d,
+                                        BL::RegionView3D &b_rv3d,
+                                        Camera *cam,
+                                        int width,
+                                        int height,
+                                        const bool use_denoiser);
+
+  static PassType get_pass_type(BL::RenderPass &b_pass);
+  static int get_denoising_pass(BL::RenderPass &b_pass);

 private:
  static DenoiseParams get_denoise_params(BL::Scene &b_scene,
@@ -119,7 +131,7 @@ class BlenderSync {
                   int width,
                   int height,
                   void **python_thread_state);
-  void sync_film(BL::ViewLayer &b_view_layer, BL::SpaceView3D &b_v3d);
+  void sync_film(BL::SpaceView3D &b_v3d);
  void sync_view();

  /* Shader */
@@ -233,7 +245,6 @@ class BlenderSync {
  Scene *scene;
  bool preview;
  bool experimental;
-  bool use_developer_ui;

  float dicing_rate;
  int max_subdivisions;
@@ -242,6 +253,7 @@ class BlenderSync {
    RenderLayerInfo()
        : material_override(PointerRNA_NULL),
          use_background_shader(true),
+          use_background_ao(true),
          use_surfaces(true),
          use_hair(true),
          use_volumes(true),
@@ -254,6 +266,7 @@ class BlenderSync {
    string name;
    BL::Material material_override;
    bool use_background_shader;
+    bool use_background_ao;
    bool use_surfaces;
    bool use_hair;
    bool use_volumes;
--- a/intern/cycles/blender/blender_util.h
+++ b/intern/cycles/blender/blender_util.h
@@ -90,27 +90,26 @@ static inline BL::Mesh object_to_mesh(BL::BlendData & /*data*/,
  }
 #endif

-  BL::Mesh mesh = (b_ob_info.object_data.is_a(&RNA_Mesh)) ? BL::Mesh(b_ob_info.object_data) :
-                                                            BL::Mesh(PointerRNA_NULL);
+  BL::Mesh mesh(PointerRNA_NULL);
+  if (b_ob_info.object_data.is_a(&RNA_Mesh)) {
+    /* TODO: calc_undeformed is not used. */
+    mesh = BL::Mesh(b_ob_info.object_data);

-  if (b_ob_info.is_real_object_data()) {
-    if (mesh) {
-      /* Make a copy to split faces if we use autosmooth, otherwise not needed.
-       * Also in edit mode do we need to make a copy, to ensure data layers like
-       * UV are not empty. */
-      if (mesh.is_editmode() ||
-          (mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
-        BL::Depsgraph depsgraph(PointerRNA_NULL);
-        mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
-      }
-    }
-    else {
+    /* Make a copy to split faces if we use autosmooth, otherwise not needed.
+     * Also in edit mode do we need to make a copy, to ensure data layers like
+     * UV are not empty. */
+    if (mesh.is_editmode() ||
+        (mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
      BL::Depsgraph depsgraph(PointerRNA_NULL);
+      assert(b_ob_info.is_real_object_data());
      mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
    }
  }
  else {
-    /* TODO: what to do about non-mesh geometry instances? */
+    BL::Depsgraph depsgraph(PointerRNA_NULL);
+    if (b_ob_info.is_real_object_data()) {
+      mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
+    }
  }

 #if 0
@@ -171,11 +170,12 @@ static inline void curvemap_minmax_curve(/*const*/ BL::CurveMap &curve, float *m
 }

 static inline void curvemapping_minmax(/*const*/ BL::CurveMapping &cumap,
-                                       int num_curves,
+                                       bool rgb_curve,
                                       float *min_x,
                                       float *max_x)
 {
  // const int num_curves = cumap.curves.length(); /* Gives linking error so far. */
+  const int num_curves = rgb_curve ? 4 : 3;
  *min_x = FLT_MAX;
  *max_x = -FLT_MAX;
  for (int i = 0; i < num_curves; ++i) {
@@ -195,28 +195,6 @@ static inline void curvemapping_to_array(BL::CurveMapping &cumap, array<float> &
  }
 }

-static inline void curvemapping_float_to_array(BL::CurveMapping &cumap,
-                                               array<float> &data,
-                                               int size)
-{
-  float min = 0.0f, max = 1.0f;
-
-  curvemapping_minmax(cumap, 1, &min, &max);
-
-  const float range = max - min;
-
-  cumap.update();
-
-  BL::CurveMap map = cumap.curves[0];
-
-  data.resize(size);
-
-  for (int i = 0; i < size; i++) {
-    float t = min + (float)i / (float)(size - 1) * range;
-    data[i] = cumap.evaluate(map, t);
-  }
-}
-
 static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
                                               array<float3> &data,
                                               int size,
@@ -235,8 +213,7 @@ static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
   *
   * There might be some better estimations here tho.
   */
-  const int num_curves = rgb_curve ? 4 : 3;
-  curvemapping_minmax(cumap, num_curves, &min_x, &max_x);
+  curvemapping_minmax(cumap, rgb_curve, &min_x, &max_x);

  const float range_x = max_x - min_x;

--- a/intern/cycles/blender/blender_viewport.cpp
+++ b/intern/cycles/blender/blender_viewport.cpp
@@ -17,8 +17,6 @@
 #include "blender_viewport.h"

 #include "blender_util.h"
-#include "render/pass.h"
-#include "util/util_logging.h"

 CCL_NAMESPACE_BEGIN

@@ -28,12 +26,11 @@ BlenderViewportParameters::BlenderViewportParameters()
      studiolight_rotate_z(0.0f),
      studiolight_intensity(1.0f),
      studiolight_background_alpha(1.0f),
-      display_pass(PASS_COMBINED),
-      show_active_pixels(false)
+      display_pass(PASS_COMBINED)
 {
 }

-BlenderViewportParameters::BlenderViewportParameters(BL::SpaceView3D &b_v3d, bool use_developer_ui)
+BlenderViewportParameters::BlenderViewportParameters(BL::SpaceView3D &b_v3d)
    : BlenderViewportParameters()
 {
  if (!b_v3d) {
@@ -58,25 +55,7 @@ BlenderViewportParameters::BlenderViewportParameters(BL::SpaceView3D &b_v3d, boo
  }

  /* Film. */
-
-  /* Lookup display pass based on the enum identifier.
-   * This is because integer values of python enum are not aligned with the passes definition in
-   * the kernel. */
-
-  display_pass = PASS_COMBINED;
-
-  const string display_pass_identifier = get_enum_identifier(cshading, "render_pass");
-  if (!display_pass_identifier.empty()) {
-    const ustring pass_type_identifier(string_to_lower(display_pass_identifier));
-    const NodeEnum *pass_type_enum = Pass::get_type_enum();
-    if (pass_type_enum->exists(pass_type_identifier)) {
-      display_pass = static_cast<PassType>((*pass_type_enum)[pass_type_identifier]);
-    }
-  }
-
-  if (use_developer_ui) {
-    show_active_pixels = get_boolean(cshading, "show_active_pixels");
-  }
+  display_pass = (PassType)get_enum(cshading, "render_pass", -1, -1);
 }

 bool BlenderViewportParameters::shader_modified(const BlenderViewportParameters &other) const
@@ -90,7 +69,7 @@ bool BlenderViewportParameters::shader_modified(const BlenderViewportParameters

 bool BlenderViewportParameters::film_modified(const BlenderViewportParameters &other) const
 {
-  return display_pass != other.display_pass || show_active_pixels != other.show_active_pixels;
+  return display_pass != other.display_pass;
 }

 bool BlenderViewportParameters::modified(const BlenderViewportParameters &other) const
@@ -103,4 +82,18 @@ bool BlenderViewportParameters::use_custom_shader() const
  return !(use_scene_world && use_scene_lights);
 }

+PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes)
+{
+  if (b_v3d) {
+    const BlenderViewportParameters viewport_parameters(b_v3d);
+    const PassType display_pass = viewport_parameters.display_pass;
+
+    passes.clear();
+    Pass::add(display_pass, passes);
+
+    return display_pass;
+  }
+  return PASS_NONE;
+}
+
 CCL_NAMESPACE_END
--- a/intern/cycles/blender/blender_viewport.h
+++ b/intern/cycles/blender/blender_viewport.h
@@ -39,10 +39,9 @@ class BlenderViewportParameters {

  /* Film. */
  PassType display_pass;
-  bool show_active_pixels;

  BlenderViewportParameters();
-  BlenderViewportParameters(BL::SpaceView3D &b_v3d, bool use_developer_ui);
+  explicit BlenderViewportParameters(BL::SpaceView3D &b_v3d);

  /* Check whether any of shading related settings are different from the given parameters. */
  bool shader_modified(const BlenderViewportParameters &other) const;
@@ -58,6 +57,8 @@ class BlenderViewportParameters {
  bool use_custom_shader() const;
 };

+PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes);
+
 CCL_NAMESPACE_END

 #endif
--- a/intern/cycles/bvh/bvh_build.cpp
+++ b/intern/cycles/bvh/bvh_build.cpp
@@ -832,18 +832,18 @@ BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector<BVHRefer
  typedef StackAllocator<256, float2> LeafTimeStackAllocator;
  typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator;

-  vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM];
-  vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM];
-  vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM];
-  vector<float2, LeafTimeStackAllocator> p_time[PRIMITIVE_NUM];
-  vector<BVHReference, LeafReferenceStackAllocator> p_ref[PRIMITIVE_NUM];
+  vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM_TOTAL];
+  vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM_TOTAL];
+  vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM_TOTAL];
+  vector<float2, LeafTimeStackAllocator> p_time[PRIMITIVE_NUM_TOTAL];
+  vector<BVHReference, LeafReferenceStackAllocator> p_ref[PRIMITIVE_NUM_TOTAL];

  /* TODO(sergey): In theory we should be able to store references. */
  vector<BVHReference, LeafReferenceStackAllocator> object_references;

-  uint visibility[PRIMITIVE_NUM] = {0};
+  uint visibility[PRIMITIVE_NUM_TOTAL] = {0};
  /* NOTE: Keep initialization in sync with actual number of primitives. */
-  BoundBox bounds[PRIMITIVE_NUM] = {
+  BoundBox bounds[PRIMITIVE_NUM_TOTAL] = {
      BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty};
  int ob_num = 0;
  int num_new_prims = 0;
@@ -877,7 +877,7 @@ BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector<BVHRefer
   * TODO(sergey): With some pointer trickery we can write directly to the
   * destination buffers for the non-spatial split BVH.
   */
-  BVHNode *leaves[PRIMITIVE_NUM + 1] = {NULL};
+  BVHNode *leaves[PRIMITIVE_NUM_TOTAL + 1] = {NULL};
  int num_leaves = 0;
  size_t start_index = 0;
  vector<int, LeafStackAllocator> local_prim_type, local_prim_index, local_prim_object;
@@ -888,7 +888,7 @@ BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector<BVHRefer
  if (need_prim_time) {
    local_prim_time.resize(num_new_prims);
  }
-  for (int i = 0; i < PRIMITIVE_NUM; ++i) {
+  for (int i = 0; i < PRIMITIVE_NUM_TOTAL; ++i) {
    int num = (int)p_type[i].size();
    if (num != 0) {
      assert(p_type[i].size() == p_index[i].size());
--- a/intern/cycles/bvh/bvh_embree.cpp
+++ b/intern/cycles/bvh/bvh_embree.cpp
@@ -37,10 +37,10 @@
 /* Kernel includes are necessary so that the filter function for Embree can access the packed BVH.
 */
 #  include "kernel/bvh/bvh_embree.h"
-#  include "kernel/bvh/bvh_util.h"
-#  include "kernel/device/cpu/compat.h"
-#  include "kernel/device/cpu/globals.h"
+#  include "kernel/kernel_compat_cpu.h"
+#  include "kernel/kernel_globals.h"
 #  include "kernel/kernel_random.h"
+#  include "kernel/split/kernel_split_data_types.h"

 #  include "render/hair.h"
 #  include "render/mesh.h"
@@ -73,69 +73,46 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
  const RTCRay *ray = (RTCRay *)args->ray;
  RTCHit *hit = (RTCHit *)args->hit;
  CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
-  const KernelGlobals *kg = ctx->kg;
+  KernelGlobals *kg = ctx->kg;

  switch (ctx->type) {
    case CCLIntersectContext::RAY_SHADOW_ALL: {
-      Intersection current_isect;
-      kernel_embree_convert_hit(kg, ray, hit, &current_isect);
-
-      /* If no transparent shadows, all light is blocked. */
-      const int flags = intersection_get_shader_flags(kg, &current_isect);
-      if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->max_hits == 0) {
-        ctx->opaque_hit = true;
-        return;
-      }
-
-      /* Test if we need to record this transparent intersection. */
-      if (ctx->num_hits < ctx->max_hits || ray->tfar < ctx->max_t) {
-        /* Skip already recorded intersections. */
-        int num_recorded_hits = min(ctx->num_hits, ctx->max_hits);
-
-        for (int i = 0; i < num_recorded_hits; ++i) {
+      /* Append the intersection to the end of the array. */
+      if (ctx->num_hits < ctx->max_hits) {
+        Intersection current_isect;
+        kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+        for (size_t i = 0; i < ctx->max_hits; ++i) {
          if (current_isect.object == ctx->isect_s[i].object &&
              current_isect.prim == ctx->isect_s[i].prim && current_isect.t == ctx->isect_s[i].t) {
            /* This intersection was already recorded, skip it. */
            *args->valid = 0;
-            return;
+            break;
          }
        }
-
-        /* If maximum number of hits was reached, replace the intersection with the
-         * highest distance. We want to find the N closest intersections. */
-        int isect_index = num_recorded_hits;
-        if (num_recorded_hits + 1 >= ctx->max_hits) {
-          float max_t = ctx->isect_s[0].t;
-          int max_recorded_hit = 0;
-
-          for (int i = 1; i < num_recorded_hits; ++i) {
-            if (ctx->isect_s[i].t > max_t) {
-              max_recorded_hit = i;
-              max_t = ctx->isect_s[i].t;
-            }
-          }
-
-          if (num_recorded_hits >= ctx->max_hits) {
-            isect_index = max_recorded_hit;
-          }
-
-          /* Limit the ray distance and stop counting hits beyond this.
-           * TODO: is there some way we can tell Embree to stop intersecting beyond
-           * this distance when max number of hits is reached?. Or maybe it will
-           * become irrelevant if we make max_hits a very high number on the CPU. */
-          ctx->max_t = max(current_isect.t, max_t);
+        Intersection *isect = &ctx->isect_s[ctx->num_hits];
+        ++ctx->num_hits;
+        *isect = current_isect;
+        int prim = kernel_tex_fetch(__prim_index, isect->prim);
+        int shader = 0;
+        if (kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
+          shader = kernel_tex_fetch(__tri_shader, prim);
+        }
+        else {
+          float4 str = kernel_tex_fetch(__curves, prim);
+          shader = __float_as_int(str.z);
+        }
+        int flag = kernel_tex_fetch(__shaders, shader & SHADER_MASK).flags;
+        /* If no transparent shadows, all light is blocked. */
+        if (flag & (SD_HAS_TRANSPARENT_SHADOW)) {
+          /* This tells Embree to continue tracing. */
+          *args->valid = 0;
        }
-
-        ctx->isect_s[isect_index] = current_isect;
      }
-
-      /* Always increase the number of hits, even beyond ray.max_hits so that
-       * the caller can detect this as and consider it opaque, or trace another
-       * ray. */
-      ++ctx->num_hits;
-
-      /* This tells Embree to continue tracing. */
-      *args->valid = 0;
+      else {
+        /* Increase the number of hits beyond ray.max_hits
+         * so that the caller can detect this as opaque. */
+        ++ctx->num_hits;
+      }
      break;
    }
    case CCLIntersectContext::RAY_LOCAL:
@@ -213,7 +190,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
      if (ctx->num_hits < ctx->max_hits) {
        Intersection current_isect;
        kernel_embree_convert_hit(kg, ray, hit, &current_isect);
-        for (size_t i = 0; i < ctx->num_hits; ++i) {
+        for (size_t i = 0; i < ctx->max_hits; ++i) {
          if (current_isect.object == ctx->isect_s[i].object &&
              current_isect.prim == ctx->isect_s[i].prim && current_isect.t == ctx->isect_s[i].t) {
            /* This intersection was already recorded, skip it. */
@@ -352,7 +329,7 @@ void BVHEmbree::build(Progress &progress, Stats *stats, RTCDevice rtc_device_)
    scene = NULL;
  }

-  const bool dynamic = params.bvh_type == BVH_TYPE_DYNAMIC;
+  const bool dynamic = params.bvh_type == SceneParams::BVH_DYNAMIC;

  scene = rtcNewScene(rtc_device);
  const RTCSceneFlags scene_flags = (dynamic ? RTC_SCENE_FLAG_DYNAMIC : RTC_SCENE_FLAG_NONE) |
--- a/intern/cycles/bvh/bvh_params.h
+++ b/intern/cycles/bvh/bvh_params.h
@@ -31,27 +31,6 @@ CCL_NAMESPACE_BEGIN
 */
 typedef KernelBVHLayout BVHLayout;

-/* Type of BVH, in terms whether it is supported dynamic updates of meshes
- * or whether modifying geometry requires full BVH rebuild.
- */
-enum BVHType {
-  /* BVH supports dynamic updates of geometry.
-   *
-   * Faster for updating BVH tree when doing modifications in viewport,
-   * but slower for rendering.
-   */
-  BVH_TYPE_DYNAMIC = 0,
-  /* BVH tree is calculated for specific scene, updates in geometry
-   * requires full tree rebuild.
-   *
-   * Slower to update BVH tree when modifying objects in viewport, also
-   * slower to build final BVH tree but gives best possible render speed.
-   */
-  BVH_TYPE_STATIC = 1,
-
-  BVH_NUM_TYPES,
-};
-
 /* Names bitflag type to denote which BVH layouts are supported by
 * particular area.
 *
--- a/intern/cycles/cmake/external_libs.cmake
+++ b/intern/cycles/cmake/external_libs.cmake
@@ -287,6 +287,9 @@ if(CYCLES_STANDALONE_REPOSITORY)
  endif()

  set(__boost_packages filesystem regex system thread date_time)
+  if(WITH_CYCLES_NETWORK)
+    list(APPEND __boost_packages serialization)
+  endif()
  if(WITH_CYCLES_OSL)
    list(APPEND __boost_packages wave)
  endif()
@@ -532,13 +535,4 @@ if(WITH_CYCLES_CUDA_BINARIES OR NOT WITH_CUDA_DYNLOAD)
  endif()
 endif()

-
-###########################################################################
-# HIP
-###########################################################################
-
-if(NOT WITH_HIP_DYNLOAD)
-  set(WITH_HIP_DYNLOAD ON)
-endif()
-
 unset(_cycles_lib_dir)
--- a/intern/cycles/cmake/macros.cmake
+++ b/intern/cycles/cmake/macros.cmake
@@ -156,16 +156,10 @@ macro(cycles_target_link_libraries target)
    ${PLATFORM_LINKLIBS}
  )

-  if(WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX)
-    if(WITH_CUDA_DYNLOAD)
-      target_link_libraries(${target} extern_cuew)
-    else()
-      target_link_libraries(${target} ${CUDA_CUDA_LIBRARY})
-    endif()
-  endif()
-
-  if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
-    target_link_libraries(${target} extern_hipew)
+  if(WITH_CUDA_DYNLOAD)
+    target_link_libraries(${target} extern_cuew)
+  else()
+    target_link_libraries(${target} ${CUDA_CUDA_LIBRARY})
  endif()

  if(CYCLES_STANDALONE_REPOSITORY)
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -22,139 +22,91 @@ set(INC_SYS
  ../../../extern/clew/include
 )

-if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
-  if(WITH_CUDA_DYNLOAD)
-    list(APPEND INC
-      ../../../extern/cuew/include
-    )
-    add_definitions(-DWITH_CUDA_DYNLOAD)
-  else()
-    list(APPEND INC_SYS
-      ${CUDA_TOOLKIT_INCLUDE}
-    )
-    add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
-  endif()
-endif()
-
-if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
+if(WITH_CUDA_DYNLOAD)
  list(APPEND INC
-    ../../../extern/hipew/include
+    ../../../extern/cuew/include
  )
-  add_definitions(-DWITH_HIP_DYNLOAD)
+  add_definitions(-DWITH_CUDA_DYNLOAD)
+else()
+  list(APPEND INC_SYS
+    ${CUDA_TOOLKIT_INCLUDE}
+  )
+  add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
 endif()

 set(SRC
  device.cpp
-  device_denoise.cpp
-  device_graphics_interop.cpp
-  device_kernel.cpp
+  device_cpu.cpp
+  device_cuda.cpp
+  device_denoising.cpp
+  device_dummy.cpp
  device_memory.cpp
-  device_queue.cpp
-)
-
-set(SRC_CPU
-  cpu/device.cpp
-  cpu/device.h
-  cpu/device_impl.cpp
-  cpu/device_impl.h
-  cpu/kernel.cpp
-  cpu/kernel.h
-  cpu/kernel_function.h
-  cpu/kernel_thread_globals.cpp
-  cpu/kernel_thread_globals.h
+  device_multi.cpp
+  device_opencl.cpp
+  device_optix.cpp
+  device_split_kernel.cpp
+  device_task.cpp
 )

 set(SRC_CUDA
-  cuda/device.cpp
-  cuda/device.h
-  cuda/device_impl.cpp
-  cuda/device_impl.h
-  cuda/graphics_interop.cpp
-  cuda/graphics_interop.h
-  cuda/kernel.cpp
-  cuda/kernel.h
-  cuda/queue.cpp
-  cuda/queue.h
-  cuda/util.cpp
-  cuda/util.h
+  cuda/device_cuda.h
+  cuda/device_cuda_impl.cpp
 )

-set(SRC_HIP
-  hip/device.cpp
-  hip/device.h
-  hip/device_impl.cpp
-  hip/device_impl.h
-  hip/graphics_interop.cpp
-  hip/graphics_interop.h
-  hip/kernel.cpp
-  hip/kernel.h
-  hip/queue.cpp
-  hip/queue.h
-  hip/util.cpp
-  hip/util.h
+set(SRC_OPENCL
+  opencl/device_opencl.h
+  opencl/device_opencl_impl.cpp
+  opencl/memory_manager.h
+  opencl/memory_manager.cpp
+  opencl/opencl_util.cpp
 )

-set(SRC_DUMMY
-  dummy/device.cpp
-  dummy/device.h
-)
-
-set(SRC_MULTI
-  multi/device.cpp
-  multi/device.h
-)
-
-set(SRC_OPTIX
-  optix/device.cpp
-  optix/device.h
-  optix/device_impl.cpp
-  optix/device_impl.h
-  optix/queue.cpp
-  optix/queue.h
-  optix/util.h
-)
+if(WITH_CYCLES_NETWORK)
+  list(APPEND SRC
+    device_network.cpp
+  )
+endif()

 set(SRC_HEADERS
  device.h
-  device_denoise.h
-  device_graphics_interop.h
+  device_denoising.h
  device_memory.h
-  device_kernel.h
-  device_queue.h
+  device_intern.h
+  device_network.h
+  device_split_kernel.h
+  device_task.h
 )

 set(LIB
+  cycles_render
  cycles_kernel
  cycles_util
  ${CYCLES_GL_LIBRARIES}
 )

-if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
-  if(WITH_CUDA_DYNLOAD)
-    list(APPEND LIB
-      extern_cuew
-    )
-  else()
-    list(APPEND LIB
-      ${CUDA_CUDA_LIBRARY}
-    )
-  endif()
-endif()
-
-if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
+if(WITH_CUDA_DYNLOAD)
  list(APPEND LIB
-    extern_hipew
+    extern_cuew
+  )
+else()
+  list(APPEND LIB
+    ${CUDA_CUDA_LIBRARY}
  )
 endif()

 add_definitions(${GL_DEFINITIONS})
-
+if(WITH_CYCLES_NETWORK)
+  add_definitions(-DWITH_NETWORK)
+endif()
+if(WITH_CYCLES_DEVICE_OPENCL)
+  list(APPEND LIB
+    extern_clew
+  )
+  add_definitions(-DWITH_OPENCL)
+endif()
 if(WITH_CYCLES_DEVICE_CUDA)
  add_definitions(-DWITH_CUDA)
 endif()
-if(WITH_CYCLES_DEVICE_HIP)
-  add_definitions(-DWITH_HIP)
-endif()
 if(WITH_CYCLES_DEVICE_OPTIX)
  add_definitions(-DWITH_OPTIX)
 endif()
@@ -163,28 +115,18 @@ if(WITH_CYCLES_DEVICE_MULTI)
 endif()

 if(WITH_OPENIMAGEDENOISE)
+  add_definitions(-DWITH_OPENIMAGEDENOISE)
+  add_definitions(-DOIDN_STATIC_LIB)
+  list(APPEND INC_SYS
+    ${OPENIMAGEDENOISE_INCLUDE_DIRS}
+  )
  list(APPEND LIB
    ${OPENIMAGEDENOISE_LIBRARIES}
+    ${TBB_LIBRARIES}
  )
 endif()

 include_directories(${INC})
 include_directories(SYSTEM ${INC_SYS})

-cycles_add_library(cycles_device "${LIB}"
-  ${SRC}
-  ${SRC_CPU}
-  ${SRC_CUDA}
-  ${SRC_HIP}
-  ${SRC_DUMMY}
-  ${SRC_MULTI}
-  ${SRC_OPTIX}
-  ${SRC_HEADERS}
-)
-
-source_group("cpu" FILES ${SRC_CPU})
-source_group("cuda" FILES ${SRC_CUDA})
-source_group("dummy" FILES ${SRC_DUMMY})
-source_group("multi" FILES ${SRC_MULTI})
-source_group("optix" FILES ${SRC_OPTIX})
-source_group("common" FILES ${SRC} ${SRC_HEADERS})
+cycles_add_library(cycles_device "${LIB}" ${SRC} ${SRC_CUDA} ${SRC_OPENCL} ${SRC_HEADERS})
--- a/intern/cycles/device/cpu/device.cpp
+++ b/intern/cycles/device/cpu/device.cpp
@@ -1,64 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/cpu/device.h"
-#include "device/cpu/device_impl.h"
-
-/* Used for `info.denoisers`. */
-/* TODO(sergey): The denoisers are probably to be moved completely out of the device into their
- * own class. But until then keep API consistent with how it used to work before. */
-#include "util/util_openimagedenoise.h"
-
-CCL_NAMESPACE_BEGIN
-
-Device *device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
-{
-  return new CPUDevice(info, stats, profiler);
-}
-
-void device_cpu_info(vector<DeviceInfo> &devices)
-{
-  DeviceInfo info;
-
-  info.type = DEVICE_CPU;
-  info.description = system_cpu_brand_string();
-  info.id = "CPU";
-  info.num = 0;
-  info.has_osl = true;
-  info.has_half_images = true;
-  info.has_nanovdb = true;
-  info.has_profiling = true;
-  if (openimagedenoise_supported()) {
-    info.denoisers |= DENOISER_OPENIMAGEDENOISE;
-  }
-
-  devices.insert(devices.begin(), info);
-}
-
-string device_cpu_capabilities()
-{
-  string capabilities = "";
-  capabilities += system_cpu_support_sse2() ? "SSE2 " : "";
-  capabilities += system_cpu_support_sse3() ? "SSE3 " : "";
-  capabilities += system_cpu_support_sse41() ? "SSE41 " : "";
-  capabilities += system_cpu_support_avx() ? "AVX " : "";
-  capabilities += system_cpu_support_avx2() ? "AVX2" : "";
-  if (capabilities[capabilities.size() - 1] == ' ')
-    capabilities.resize(capabilities.size() - 1);
-  return capabilities;
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/cpu/device_impl.cpp
+++ b/intern/cycles/device/cpu/device_impl.cpp
@@ -1,332 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/cpu/device_impl.h"
-
-#include <stdlib.h>
-#include <string.h>
-
-/* So ImathMath is included before our kernel_cpu_compat. */
-#ifdef WITH_OSL
-/* So no context pollution happens from indirectly included windows.h */
-#  include "util/util_windows.h"
-#  include <OSL/oslexec.h>
-#endif
-
-#ifdef WITH_EMBREE
-#  include <embree3/rtcore.h>
-#endif
-
-#include "device/cpu/kernel.h"
-#include "device/cpu/kernel_thread_globals.h"
-
-#include "device/device.h"
-
-// clang-format off
-#include "kernel/device/cpu/compat.h"
-#include "kernel/device/cpu/globals.h"
-#include "kernel/device/cpu/kernel.h"
-#include "kernel/kernel_types.h"
-
-#include "kernel/osl/osl_shader.h"
-#include "kernel/osl/osl_globals.h"
-// clang-format on
-
-#include "bvh/bvh_embree.h"
-
-#include "render/buffers.h"
-
-#include "util/util_debug.h"
-#include "util/util_foreach.h"
-#include "util/util_function.h"
-#include "util/util_logging.h"
-#include "util/util_map.h"
-#include "util/util_openimagedenoise.h"
-#include "util/util_optimization.h"
-#include "util/util_progress.h"
-#include "util/util_system.h"
-#include "util/util_task.h"
-#include "util/util_thread.h"
-
-CCL_NAMESPACE_BEGIN
-
-CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
-    : Device(info_, stats_, profiler_), texture_info(this, "__texture_info", MEM_GLOBAL)
-{
-  /* Pick any kernel, all of them are supposed to have same level of microarchitecture
-   * optimization. */
-  VLOG(1) << "Will be using " << kernels.integrator_init_from_camera.get_uarch_name()
-          << " kernels.";
-
-  if (info.cpu_threads == 0) {
-    info.cpu_threads = TaskScheduler::num_threads();
-  }
-
-#ifdef WITH_OSL
-  kernel_globals.osl = &osl_globals;
-#endif
-#ifdef WITH_EMBREE
-  embree_device = rtcNewDevice("verbose=0");
-#endif
-  need_texture_info = false;
-}
-
-CPUDevice::~CPUDevice()
-{
-#ifdef WITH_EMBREE
-  rtcReleaseDevice(embree_device);
-#endif
-
-  texture_info.free();
-}
-
-bool CPUDevice::show_samples() const
-{
-  return (info.cpu_threads == 1);
-}
-
-BVHLayoutMask CPUDevice::get_bvh_layout_mask() const
-{
-  BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
-#ifdef WITH_EMBREE
-  bvh_layout_mask |= BVH_LAYOUT_EMBREE;
-#endif /* WITH_EMBREE */
-  return bvh_layout_mask;
-}
-
-bool CPUDevice::load_texture_info()
-{
-  if (!need_texture_info) {
-    return false;
-  }
-
-  texture_info.copy_to_device();
-  need_texture_info = false;
-
-  return true;
-}
-
-void CPUDevice::mem_alloc(device_memory &mem)
-{
-  if (mem.type == MEM_TEXTURE) {
-    assert(!"mem_alloc not supported for textures.");
-  }
-  else if (mem.type == MEM_GLOBAL) {
-    assert(!"mem_alloc not supported for global memory.");
-  }
-  else {
-    if (mem.name) {
-      VLOG(1) << "Buffer allocate: " << mem.name << ", "
-              << string_human_readable_number(mem.memory_size()) << " bytes. ("
-              << string_human_readable_size(mem.memory_size()) << ")";
-    }
-
-    if (mem.type == MEM_DEVICE_ONLY) {
-      assert(!mem.host_pointer);
-      size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
-      void *data = util_aligned_malloc(mem.memory_size(), alignment);
-      mem.device_pointer = (device_ptr)data;
-    }
-    else {
-      mem.device_pointer = (device_ptr)mem.host_pointer;
-    }
-
-    mem.device_size = mem.memory_size();
-    stats.mem_alloc(mem.device_size);
-  }
-}
-
-void CPUDevice::mem_copy_to(device_memory &mem)
-{
-  if (mem.type == MEM_GLOBAL) {
-    global_free(mem);
-    global_alloc(mem);
-  }
-  else if (mem.type == MEM_TEXTURE) {
-    tex_free((device_texture &)mem);
-    tex_alloc((device_texture &)mem);
-  }
-  else {
-    if (!mem.device_pointer) {
-      mem_alloc(mem);
-    }
-
-    /* copy is no-op */
-  }
-}
-
-void CPUDevice::mem_copy_from(
-    device_memory & /*mem*/, size_t /*y*/, size_t /*w*/, size_t /*h*/, size_t /*elem*/)
-{
-  /* no-op */
-}
-
-void CPUDevice::mem_zero(device_memory &mem)
-{
-  if (!mem.device_pointer) {
-    mem_alloc(mem);
-  }
-
-  if (mem.device_pointer) {
-    memset((void *)mem.device_pointer, 0, mem.memory_size());
-  }
-}
-
-void CPUDevice::mem_free(device_memory &mem)
-{
-  if (mem.type == MEM_GLOBAL) {
-    global_free(mem);
-  }
-  else if (mem.type == MEM_TEXTURE) {
-    tex_free((device_texture &)mem);
-  }
-  else if (mem.device_pointer) {
-    if (mem.type == MEM_DEVICE_ONLY) {
-      util_aligned_free((void *)mem.device_pointer);
-    }
-    mem.device_pointer = 0;
-    stats.mem_free(mem.device_size);
-    mem.device_size = 0;
-  }
-}
-
-device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
-{
-  return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
-}
-
-void CPUDevice::const_copy_to(const char *name, void *host, size_t size)
-{
-#if WITH_EMBREE
-  if (strcmp(name, "__data") == 0) {
-    assert(size <= sizeof(KernelData));
-
-    // Update scene handle (since it is different for each device on multi devices)
-    KernelData *const data = (KernelData *)host;
-    data->bvh.scene = embree_scene;
-  }
-#endif
-  kernel_const_copy(&kernel_globals, name, host, size);
-}
-
-void CPUDevice::global_alloc(device_memory &mem)
-{
-  VLOG(1) << "Global memory allocate: " << mem.name << ", "
-          << string_human_readable_number(mem.memory_size()) << " bytes. ("
-          << string_human_readable_size(mem.memory_size()) << ")";
-
-  kernel_global_memory_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size);
-
-  mem.device_pointer = (device_ptr)mem.host_pointer;
-  mem.device_size = mem.memory_size();
-  stats.mem_alloc(mem.device_size);
-}
-
-void CPUDevice::global_free(device_memory &mem)
-{
-  if (mem.device_pointer) {
-    mem.device_pointer = 0;
-    stats.mem_free(mem.device_size);
-    mem.device_size = 0;
-  }
-}
-
-void CPUDevice::tex_alloc(device_texture &mem)
-{
-  VLOG(1) << "Texture allocate: " << mem.name << ", "
-          << string_human_readable_number(mem.memory_size()) << " bytes. ("
-          << string_human_readable_size(mem.memory_size()) << ")";
-
-  mem.device_pointer = (device_ptr)mem.host_pointer;
-  mem.device_size = mem.memory_size();
-  stats.mem_alloc(mem.device_size);
-
-  const uint slot = mem.slot;
-  if (slot >= texture_info.size()) {
-    /* Allocate some slots in advance, to reduce amount of re-allocations. */
-    texture_info.resize(slot + 128);
-  }
-
-  texture_info[slot] = mem.info;
-  texture_info[slot].data = (uint64_t)mem.host_pointer;
-  need_texture_info = true;
-}
-
-void CPUDevice::tex_free(device_texture &mem)
-{
-  if (mem.device_pointer) {
-    mem.device_pointer = 0;
-    stats.mem_free(mem.device_size);
-    mem.device_size = 0;
-    need_texture_info = true;
-  }
-}
-
-void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
-{
-#ifdef WITH_EMBREE
-  if (bvh->params.bvh_layout == BVH_LAYOUT_EMBREE ||
-      bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE) {
-    BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
-    if (refit) {
-      bvh_embree->refit(progress);
-    }
-    else {
-      bvh_embree->build(progress, &stats, embree_device);
-    }
-
-    if (bvh->params.top_level) {
-      embree_scene = bvh_embree->scene;
-    }
-  }
-  else
-#endif
-    Device::build_bvh(bvh, progress, refit);
-}
-
-const CPUKernels *CPUDevice::get_cpu_kernels() const
-{
-  return &kernels;
-}
-
-void CPUDevice::get_cpu_kernel_thread_globals(
-    vector<CPUKernelThreadGlobals> &kernel_thread_globals)
-{
-  /* Ensure latest texture info is loaded into kernel globals before returning. */
-  load_texture_info();
-
-  kernel_thread_globals.clear();
-  void *osl_memory = get_cpu_osl_memory();
-  for (int i = 0; i < info.cpu_threads; i++) {
-    kernel_thread_globals.emplace_back(kernel_globals, osl_memory, profiler);
-  }
-}
-
-void *CPUDevice::get_cpu_osl_memory()
-{
-#ifdef WITH_OSL
-  return &osl_globals;
-#else
-  return NULL;
-#endif
-}
-
-bool CPUDevice::load_kernels(const uint /*kernel_features*/)
-{
-  return true;
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/cpu/device_impl.h
+++ b/intern/cycles/device/cpu/device_impl.h
@@ -1,102 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-/* So ImathMath is included before our kernel_cpu_compat. */
-#ifdef WITH_OSL
-/* So no context pollution happens from indirectly included windows.h */
-#  include "util/util_windows.h"
-#  include <OSL/oslexec.h>
-#endif
-
-#ifdef WITH_EMBREE
-#  include <embree3/rtcore.h>
-#endif
-
-#include "device/cpu/kernel.h"
-#include "device/device.h"
-#include "device/device_memory.h"
-
-// clang-format off
-#include "kernel/device/cpu/compat.h"
-#include "kernel/device/cpu/kernel.h"
-#include "kernel/device/cpu/globals.h"
-
-#include "kernel/osl/osl_shader.h"
-#include "kernel/osl/osl_globals.h"
-// clang-format on
-
-CCL_NAMESPACE_BEGIN
-
-class CPUDevice : public Device {
- public:
-  KernelGlobals kernel_globals;
-
-  device_vector<TextureInfo> texture_info;
-  bool need_texture_info;
-
-#ifdef WITH_OSL
-  OSLGlobals osl_globals;
-#endif
-#ifdef WITH_EMBREE
-  RTCScene embree_scene = NULL;
-  RTCDevice embree_device;
-#endif
-
-  CPUKernels kernels;
-
-  CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
-  ~CPUDevice();
-
-  virtual bool show_samples() const override;
-
-  virtual BVHLayoutMask get_bvh_layout_mask() const override;
-
-  /* Returns true if the texture info was copied to the device (meaning, some more
-   * re-initialization might be needed). */
-  bool load_texture_info();
-
-  virtual void mem_alloc(device_memory &mem) override;
-  virtual void mem_copy_to(device_memory &mem) override;
-  virtual void mem_copy_from(
-      device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
-  virtual void mem_zero(device_memory &mem) override;
-  virtual void mem_free(device_memory &mem) override;
-  virtual device_ptr mem_alloc_sub_ptr(device_memory &mem,
-                                       size_t offset,
-                                       size_t /*size*/) override;
-
-  virtual void const_copy_to(const char *name, void *host, size_t size) override;
-
-  void global_alloc(device_memory &mem);
-  void global_free(device_memory &mem);
-
-  void tex_alloc(device_texture &mem);
-  void tex_free(device_texture &mem);
-
-  void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
-
-  virtual const CPUKernels *get_cpu_kernels() const override;
-  virtual void get_cpu_kernel_thread_globals(
-      vector<CPUKernelThreadGlobals> &kernel_thread_globals) override;
-  virtual void *get_cpu_osl_memory() override;
-
- protected:
-  virtual bool load_kernels(uint /*kernel_features*/) override;
-};
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/cpu/kernel.cpp
+++ b/intern/cycles/device/cpu/kernel.cpp
@@ -1,61 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/cpu/kernel.h"
-
-#include "kernel/device/cpu/kernel.h"
-
-CCL_NAMESPACE_BEGIN
-
-#define KERNEL_FUNCTIONS(name) \
-  KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_sse2, name), \
-      KERNEL_NAME_EVAL(cpu_sse3, name), KERNEL_NAME_EVAL(cpu_sse41, name), \
-      KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name)
-
-#define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name))
-
-CPUKernels::CPUKernels()
-    : /* Integrator. */
-      REGISTER_KERNEL(integrator_init_from_camera),
-      REGISTER_KERNEL(integrator_init_from_bake),
-      REGISTER_KERNEL(integrator_intersect_closest),
-      REGISTER_KERNEL(integrator_intersect_shadow),
-      REGISTER_KERNEL(integrator_intersect_subsurface),
-      REGISTER_KERNEL(integrator_intersect_volume_stack),
-      REGISTER_KERNEL(integrator_shade_background),
-      REGISTER_KERNEL(integrator_shade_light),
-      REGISTER_KERNEL(integrator_shade_shadow),
-      REGISTER_KERNEL(integrator_shade_surface),
-      REGISTER_KERNEL(integrator_shade_volume),
-      REGISTER_KERNEL(integrator_megakernel),
-      /* Shader evaluation. */
-      REGISTER_KERNEL(shader_eval_displace),
-      REGISTER_KERNEL(shader_eval_background),
-      /* Adaptive sampling. */
-      REGISTER_KERNEL(adaptive_sampling_convergence_check),
-      REGISTER_KERNEL(adaptive_sampling_filter_x),
-      REGISTER_KERNEL(adaptive_sampling_filter_y),
-      /* Cryptomatte. */
-      REGISTER_KERNEL(cryptomatte_postprocess),
-      /* Bake. */
-      REGISTER_KERNEL(bake)
-{
-}
-
-#undef REGISTER_KERNEL
-#undef KERNEL_FUNCTIONS
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/cpu/kernel.h
+++ b/intern/cycles/device/cpu/kernel.h
@@ -1,111 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "device/cpu/kernel_function.h"
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-struct KernelGlobals;
-struct IntegratorStateCPU;
-struct TileInfo;
-
-class CPUKernels {
- public:
-  /* Integrator. */
-
-  using IntegratorFunction =
-      CPUKernelFunction<void (*)(const KernelGlobals *kg, IntegratorStateCPU *state)>;
-  using IntegratorShadeFunction = CPUKernelFunction<void (*)(
-      const KernelGlobals *kg, IntegratorStateCPU *state, ccl_global float *render_buffer)>;
-  using IntegratorInitFunction = CPUKernelFunction<bool (*)(const KernelGlobals *kg,
-                                                            IntegratorStateCPU *state,
-                                                            KernelWorkTile *tile,
-                                                            ccl_global float *render_buffer)>;
-
-  IntegratorInitFunction integrator_init_from_camera;
-  IntegratorInitFunction integrator_init_from_bake;
-  IntegratorFunction integrator_intersect_closest;
-  IntegratorFunction integrator_intersect_shadow;
-  IntegratorFunction integrator_intersect_subsurface;
-  IntegratorFunction integrator_intersect_volume_stack;
-  IntegratorShadeFunction integrator_shade_background;
-  IntegratorShadeFunction integrator_shade_light;
-  IntegratorShadeFunction integrator_shade_shadow;
-  IntegratorShadeFunction integrator_shade_surface;
-  IntegratorShadeFunction integrator_shade_volume;
-  IntegratorShadeFunction integrator_megakernel;
-
-  /* Shader evaluation. */
-
-  using ShaderEvalFunction = CPUKernelFunction<void (*)(
-      const KernelGlobals *kg, const KernelShaderEvalInput *, float4 *, const int)>;
-
-  ShaderEvalFunction shader_eval_displace;
-  ShaderEvalFunction shader_eval_background;
-
-  /* Adaptive stopping. */
-
-  using AdaptiveSamplingConvergenceCheckFunction =
-      CPUKernelFunction<bool (*)(const KernelGlobals *kg,
-                                 ccl_global float *render_buffer,
-                                 int x,
-                                 int y,
-                                 float threshold,
-                                 bool reset,
-                                 int offset,
-                                 int stride)>;
-
-  using AdaptiveSamplingFilterXFunction =
-      CPUKernelFunction<void (*)(const KernelGlobals *kg,
-                                 ccl_global float *render_buffer,
-                                 int y,
-                                 int start_x,
-                                 int width,
-                                 int offset,
-                                 int stride)>;
-
-  using AdaptiveSamplingFilterYFunction =
-      CPUKernelFunction<void (*)(const KernelGlobals *kg,
-                                 ccl_global float *render_buffer,
-                                 int x,
-                                 int start_y,
-                                 int height,
-                                 int offset,
-                                 int stride)>;
-
-  AdaptiveSamplingConvergenceCheckFunction adaptive_sampling_convergence_check;
-
-  AdaptiveSamplingFilterXFunction adaptive_sampling_filter_x;
-  AdaptiveSamplingFilterYFunction adaptive_sampling_filter_y;
-
-  /* Cryptomatte. */
-
-  using CryptomattePostprocessFunction = CPUKernelFunction<void (*)(
-      const KernelGlobals *kg, ccl_global float *render_buffer, int pixel_index)>;
-
-  CryptomattePostprocessFunction cryptomatte_postprocess;
-
-  /* Bake. */
-
-  CPUKernelFunction<void (*)(const KernelGlobals *, float *, int, int, int, int, int)> bake;
-
-  CPUKernels();
-};
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/cpu/kernel_function.h
+++ b/intern/cycles/device/cpu/kernel_function.h
@@ -1,124 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "util/util_debug.h"
-#include "util/util_system.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* A wrapper around per-microarchitecture variant of a kernel function.
- *
- * Provides a function-call-like API which gets routed to the most suitable implementation.
- *
- * For example, on a computer which only has SSE4.1 the kernel_sse41 will be used. */
-template<typename FunctionType> class CPUKernelFunction {
- public:
-  CPUKernelFunction(FunctionType kernel_default,
-                    FunctionType kernel_sse2,
-                    FunctionType kernel_sse3,
-                    FunctionType kernel_sse41,
-                    FunctionType kernel_avx,
-                    FunctionType kernel_avx2)
-  {
-    kernel_info_ = get_best_kernel_info(
-        kernel_default, kernel_sse2, kernel_sse3, kernel_sse41, kernel_avx, kernel_avx2);
-  }
-
-  template<typename... Args> inline auto operator()(Args... args) const
-  {
-    assert(kernel_info_.kernel);
-
-    return kernel_info_.kernel(args...);
-  }
-
-  const char *get_uarch_name() const
-  {
-    return kernel_info_.uarch_name;
-  }
-
- protected:
-  /* Helper class which allows to pass human-readable microarchitecture name together with function
-   * pointer. */
-  class KernelInfo {
-   public:
-    KernelInfo() : KernelInfo("", nullptr)
-    {
-    }
-
-    /* TODO(sergey): Use string view, to have higher-level functionality (i.e. comparison) without
-     * memory allocation. */
-    KernelInfo(const char *uarch_name, FunctionType kernel)
-        : uarch_name(uarch_name), kernel(kernel)
-    {
-    }
-
-    const char *uarch_name;
-    FunctionType kernel;
-  };
-
-  KernelInfo get_best_kernel_info(FunctionType kernel_default,
-                                  FunctionType kernel_sse2,
-                                  FunctionType kernel_sse3,
-                                  FunctionType kernel_sse41,
-                                  FunctionType kernel_avx,
-                                  FunctionType kernel_avx2)
-  {
-    /* Silence warnings about unused variables when compiling without some architectures. */
-    (void)kernel_sse2;
-    (void)kernel_sse3;
-    (void)kernel_sse41;
-    (void)kernel_avx;
-    (void)kernel_avx2;
-
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
-    if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
-      return KernelInfo("AVX2", kernel_avx2);
-    }
-#endif
-
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
-    if (DebugFlags().cpu.has_avx() && system_cpu_support_avx()) {
-      return KernelInfo("AVX", kernel_avx);
-    }
-#endif
-
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
-    if (DebugFlags().cpu.has_sse41() && system_cpu_support_sse41()) {
-      return KernelInfo("SSE4.1", kernel_sse41);
-    }
-#endif
-
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
-    if (DebugFlags().cpu.has_sse3() && system_cpu_support_sse3()) {
-      return KernelInfo("SSE3", kernel_sse3);
-    }
-#endif
-
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
-    if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
-      return KernelInfo("SSE2", kernel_sse2);
-    }
-#endif
-
-    return KernelInfo("default", kernel_default);
-  }
-
-  KernelInfo kernel_info_;
-};
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/cpu/kernel_thread_globals.cpp
+++ b/intern/cycles/device/cpu/kernel_thread_globals.cpp
@@ -1,85 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/cpu/kernel_thread_globals.h"
-
-// clang-format off
-#include "kernel/osl/osl_shader.h"
-#include "kernel/osl/osl_globals.h"
-// clang-format on
-
-#include "util/util_profiling.h"
-
-CCL_NAMESPACE_BEGIN
-
-CPUKernelThreadGlobals::CPUKernelThreadGlobals(const KernelGlobals &kernel_globals,
-                                               void *osl_globals_memory,
-                                               Profiler &cpu_profiler)
-    : KernelGlobals(kernel_globals), cpu_profiler_(cpu_profiler)
-{
-  reset_runtime_memory();
-
-#ifdef WITH_OSL
-  OSLShader::thread_init(this, reinterpret_cast<OSLGlobals *>(osl_globals_memory));
-#else
-  (void)osl_globals_memory;
-#endif
-}
-
-CPUKernelThreadGlobals::CPUKernelThreadGlobals(CPUKernelThreadGlobals &&other) noexcept
-    : KernelGlobals(std::move(other)), cpu_profiler_(other.cpu_profiler_)
-{
-  other.reset_runtime_memory();
-}
-
-CPUKernelThreadGlobals::~CPUKernelThreadGlobals()
-{
-#ifdef WITH_OSL
-  OSLShader::thread_free(this);
-#endif
-}
-
-CPUKernelThreadGlobals &CPUKernelThreadGlobals::operator=(CPUKernelThreadGlobals &&other)
-{
-  if (this == &other) {
-    return *this;
-  }
-
-  *static_cast<KernelGlobals *>(this) = *static_cast<KernelGlobals *>(&other);
-
-  other.reset_runtime_memory();
-
-  return *this;
-}
-
-void CPUKernelThreadGlobals::reset_runtime_memory()
-{
-#ifdef WITH_OSL
-  osl = nullptr;
-#endif
-}
-
-void CPUKernelThreadGlobals::start_profiling()
-{
-  cpu_profiler_.add_state(&profiler);
-}
-
-void CPUKernelThreadGlobals::stop_profiling()
-{
-  cpu_profiler_.remove_state(&profiler);
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/cpu/kernel_thread_globals.h
+++ b/intern/cycles/device/cpu/kernel_thread_globals.h
@@ -1,57 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/device/cpu/compat.h"
-#include "kernel/device/cpu/globals.h"
-
-CCL_NAMESPACE_BEGIN
-
-class Profiler;
-
-/* A special class which extends memory ownership of the `KernelGlobals` decoupling any resource
- * which is not thread-safe for access. Every worker thread which needs to operate on
- * `KernelGlobals` needs to initialize its own copy of this object.
- *
- * NOTE: Only minimal subset of objects are copied: `KernelData` is never copied. This means that
- * there is no unnecessary data duplication happening when using this object. */
-class CPUKernelThreadGlobals : public KernelGlobals {
- public:
-  /* TODO(sergey): Would be nice to have properly typed OSLGlobals even in the case when building
-   * without OSL support. Will avoid need to those unnamed pointers and casts. */
-  CPUKernelThreadGlobals(const KernelGlobals &kernel_globals,
-                         void *osl_globals_memory,
-                         Profiler &cpu_profiler);
-
-  ~CPUKernelThreadGlobals();
-
-  CPUKernelThreadGlobals(const CPUKernelThreadGlobals &other) = delete;
-  CPUKernelThreadGlobals(CPUKernelThreadGlobals &&other) noexcept;
-
-  CPUKernelThreadGlobals &operator=(const CPUKernelThreadGlobals &other) = delete;
-  CPUKernelThreadGlobals &operator=(CPUKernelThreadGlobals &&other);
-
-  void start_profiling();
-  void stop_profiling();
-
- protected:
-  void reset_runtime_memory();
-
-  Profiler &cpu_profiler_;
-};
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/cuda/device_cuda.h
+++ b/intern/cycles/device/cuda/device_cuda.h
@@ -0,0 +1,270 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_CUDA
+
+#  include "device/device.h"
+#  include "device/device_denoising.h"
+#  include "device/device_split_kernel.h"
+
+#  include "util/util_map.h"
+#  include "util/util_task.h"
+
+#  ifdef WITH_CUDA_DYNLOAD
+#    include "cuew.h"
+#  else
+#    include "util/util_opengl.h"
+#    include <cuda.h>
+#    include <cudaGL.h>
+#  endif
+
+CCL_NAMESPACE_BEGIN
+
+class CUDASplitKernel;
+
+class CUDADevice : public Device {
+
+  friend class CUDASplitKernelFunction;
+  friend class CUDASplitKernel;
+  friend class CUDAContextScope;
+
+ public:
+  DedicatedTaskPool task_pool;
+  CUdevice cuDevice;
+  CUcontext cuContext;
+  CUmodule cuModule, cuFilterModule;
+  size_t device_texture_headroom;
+  size_t device_working_headroom;
+  bool move_texture_to_host;
+  size_t map_host_used;
+  size_t map_host_limit;
+  int can_map_host;
+  int pitch_alignment;
+  int cuDevId;
+  int cuDevArchitecture;
+  bool first_error;
+  CUDASplitKernel *split_kernel;
+
+  struct CUDAMem {
+    CUDAMem() : texobject(0), array(0), use_mapped_host(false)
+    {
+    }
+
+    CUtexObject texobject;
+    CUarray array;
+
+    /* If true, a mapped host memory in shared_pointer is being used. */
+    bool use_mapped_host;
+  };
+  typedef map<device_memory *, CUDAMem> CUDAMemMap;
+  CUDAMemMap cuda_mem_map;
+  thread_mutex cuda_mem_map_mutex;
+
+  struct PixelMem {
+    GLuint cuPBO;
+    CUgraphicsResource cuPBOresource;
+    GLuint cuTexId;
+    int w, h;
+  };
+  map<device_ptr, PixelMem> pixel_mem_map;
+
+  /* Bindless Textures */
+  device_vector<TextureInfo> texture_info;
+  bool need_texture_info;
+
+  /* Kernels */
+  struct {
+    bool loaded;
+
+    CUfunction adaptive_stopping;
+    CUfunction adaptive_filter_x;
+    CUfunction adaptive_filter_y;
+    CUfunction adaptive_scale_samples;
+    int adaptive_num_threads_per_block;
+  } functions;
+
+  static bool have_precompiled_kernels();
+
+  virtual bool show_samples() const override;
+
+  virtual BVHLayoutMask get_bvh_layout_mask() const override;
+
+  void set_error(const string &error) override;
+
+  CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_);
+
+  virtual ~CUDADevice();
+
+  bool support_device(const DeviceRequestedFeatures & /*requested_features*/);
+
+  bool check_peer_access(Device *peer_device) override;
+
+  bool use_adaptive_compilation();
+
+  bool use_split_kernel();
+
+  virtual string compile_kernel_get_common_cflags(
+      const DeviceRequestedFeatures &requested_features, bool filter = false, bool split = false);
+
+  string compile_kernel(const DeviceRequestedFeatures &requested_features,
+                        const char *name,
+                        const char *base = "cuda",
+                        bool force_ptx = false);
+
+  virtual bool load_kernels(const DeviceRequestedFeatures &requested_features) override;
+
+  void load_functions();
+
+  void reserve_local_memory(const DeviceRequestedFeatures &requested_features);
+
+  void init_host_memory();
+
+  void load_texture_info();
+
+  void move_textures_to_host(size_t size, bool for_texture);
+
+  CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
+
+  void generic_copy_to(device_memory &mem);
+
+  void generic_free(device_memory &mem);
+
+  void mem_alloc(device_memory &mem) override;
+
+  void mem_copy_to(device_memory &mem) override;
+
+  void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override;
+
+  void mem_zero(device_memory &mem) override;
+
+  void mem_free(device_memory &mem) override;
+
+  device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override;
+
+  virtual void const_copy_to(const char *name, void *host, size_t size) override;
+
+  void global_alloc(device_memory &mem);
+
+  void global_free(device_memory &mem);
+
+  void tex_alloc(device_texture &mem);
+
+  void tex_free(device_texture &mem);
+
+  bool denoising_non_local_means(device_ptr image_ptr,
+                                 device_ptr guide_ptr,
+                                 device_ptr variance_ptr,
+                                 device_ptr out_ptr,
+                                 DenoisingTask *task);
+
+  bool denoising_construct_transform(DenoisingTask *task);
+
+  bool denoising_accumulate(device_ptr color_ptr,
+                            device_ptr color_variance_ptr,
+                            device_ptr scale_ptr,
+                            int frame,
+                            DenoisingTask *task);
+
+  bool denoising_solve(device_ptr output_ptr, DenoisingTask *task);
+
+  bool denoising_combine_halves(device_ptr a_ptr,
+                                device_ptr b_ptr,
+                                device_ptr mean_ptr,
+                                device_ptr variance_ptr,
+                                int r,
+                                int4 rect,
+                                DenoisingTask *task);
+
+  bool denoising_divide_shadow(device_ptr a_ptr,
+                               device_ptr b_ptr,
+                               device_ptr sample_variance_ptr,
+                               device_ptr sv_variance_ptr,
+                               device_ptr buffer_variance_ptr,
+                               DenoisingTask *task);
+
+  bool denoising_get_feature(int mean_offset,
+                             int variance_offset,
+                             device_ptr mean_ptr,
+                             device_ptr variance_ptr,
+                             float scale,
+                             DenoisingTask *task);
+
+  bool denoising_write_feature(int out_offset,
+                               device_ptr from_ptr,
+                               device_ptr buffer_ptr,
+                               DenoisingTask *task);
+
+  bool denoising_detect_outliers(device_ptr image_ptr,
+                                 device_ptr variance_ptr,
+                                 device_ptr depth_ptr,
+                                 device_ptr output_ptr,
+                                 DenoisingTask *task);
+
+  void denoise(RenderTile &rtile, DenoisingTask &denoising);
+
+  void adaptive_sampling_filter(uint filter_sample,
+                                WorkTile *wtile,
+                                CUdeviceptr d_wtile,
+                                CUstream stream = 0);
+  void adaptive_sampling_post(RenderTile &rtile,
+                              WorkTile *wtile,
+                              CUdeviceptr d_wtile,
+                              CUstream stream = 0);
+
+  void render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles);
+
+  void film_convert(DeviceTask &task,
+                    device_ptr buffer,
+                    device_ptr rgba_byte,
+                    device_ptr rgba_half);
+
+  void shader(DeviceTask &task);
+
+  CUdeviceptr map_pixels(device_ptr mem);
+
+  void unmap_pixels(device_ptr mem);
+
+  void pixels_alloc(device_memory &mem);
+
+  void pixels_copy_from(device_memory &mem, int y, int w, int h);
+
+  void pixels_free(device_memory &mem);
+
+  void draw_pixels(device_memory &mem,
+                   int y,
+                   int w,
+                   int h,
+                   int width,
+                   int height,
+                   int dx,
+                   int dy,
+                   int dw,
+                   int dh,
+                   bool transparent,
+                   const DeviceDrawParams &draw_params) override;
+
+  void thread_run(DeviceTask &task);
+
+  virtual void task_add(DeviceTask &task) override;
+
+  virtual void task_wait() override;
+
+  virtual void task_cancel() override;
+};
+
+CCL_NAMESPACE_END
+
+#endif
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
--- a/intern/cycles/device/cuda/device_impl.h
+++ b/intern/cycles/device/cuda/device_impl.h
@@ -1,154 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_CUDA
-
-#  include "device/cuda/kernel.h"
-#  include "device/cuda/queue.h"
-#  include "device/cuda/util.h"
-#  include "device/device.h"
-
-#  include "util/util_map.h"
-
-#  ifdef WITH_CUDA_DYNLOAD
-#    include "cuew.h"
-#  else
-#    include <cuda.h>
-#    include <cudaGL.h>
-#  endif
-
-CCL_NAMESPACE_BEGIN
-
-class DeviceQueue;
-
-class CUDADevice : public Device {
-
-  friend class CUDAContextScope;
-
- public:
-  CUdevice cuDevice;
-  CUcontext cuContext;
-  CUmodule cuModule;
-  size_t device_texture_headroom;
-  size_t device_working_headroom;
-  bool move_texture_to_host;
-  size_t map_host_used;
-  size_t map_host_limit;
-  int can_map_host;
-  int pitch_alignment;
-  int cuDevId;
-  int cuDevArchitecture;
-  bool first_error;
-
-  struct CUDAMem {
-    CUDAMem() : texobject(0), array(0), use_mapped_host(false)
-    {
-    }
-
-    CUtexObject texobject;
-    CUarray array;
-
-    /* If true, a mapped host memory in shared_pointer is being used. */
-    bool use_mapped_host;
-  };
-  typedef map<device_memory *, CUDAMem> CUDAMemMap;
-  CUDAMemMap cuda_mem_map;
-  thread_mutex cuda_mem_map_mutex;
-
-  /* Bindless Textures */
-  device_vector<TextureInfo> texture_info;
-  bool need_texture_info;
-
-  CUDADeviceKernels kernels;
-
-  static bool have_precompiled_kernels();
-
-  virtual bool show_samples() const override;
-
-  virtual BVHLayoutMask get_bvh_layout_mask() const override;
-
-  void set_error(const string &error) override;
-
-  CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
-
-  virtual ~CUDADevice();
-
-  bool support_device(const uint /*kernel_features*/);
-
-  bool check_peer_access(Device *peer_device) override;
-
-  bool use_adaptive_compilation();
-
-  virtual string compile_kernel_get_common_cflags(const uint kernel_features);
-
-  string compile_kernel(const uint kernel_features,
-                        const char *name,
-                        const char *base = "cuda",
-                        bool force_ptx = false);
-
-  virtual bool load_kernels(const uint kernel_features) override;
-
-  void reserve_local_memory(const uint kernel_features);
-
-  void init_host_memory();
-
-  void load_texture_info();
-
-  void move_textures_to_host(size_t size, bool for_texture);
-
-  CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
-
-  void generic_copy_to(device_memory &mem);
-
-  void generic_free(device_memory &mem);
-
-  void mem_alloc(device_memory &mem) override;
-
-  void mem_copy_to(device_memory &mem) override;
-
-  void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
-
-  void mem_zero(device_memory &mem) override;
-
-  void mem_free(device_memory &mem) override;
-
-  device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
-
-  virtual void const_copy_to(const char *name, void *host, size_t size) override;
-
-  void global_alloc(device_memory &mem);
-
-  void global_free(device_memory &mem);
-
-  void tex_alloc(device_texture &mem);
-
-  void tex_free(device_texture &mem);
-
-  virtual bool should_use_graphics_interop() override;
-
-  virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
-
-  int get_num_multiprocessors();
-  int get_max_num_threads_per_multiprocessor();
-
- protected:
-  bool get_device_attribute(CUdevice_attribute attribute, int *value);
-  int get_device_default_attribute(CUdevice_attribute attribute, int default_value);
-};
-
-CCL_NAMESPACE_END
-
-#endif
--- a/intern/cycles/device/cuda/graphics_interop.cpp
+++ b/intern/cycles/device/cuda/graphics_interop.cpp
@@ -1,103 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_CUDA
-
-#  include "device/cuda/graphics_interop.h"
-
-#  include "device/cuda/device_impl.h"
-#  include "device/cuda/util.h"
-
-CCL_NAMESPACE_BEGIN
-
-CUDADeviceGraphicsInterop::CUDADeviceGraphicsInterop(CUDADeviceQueue *queue)
-    : queue_(queue), device_(static_cast<CUDADevice *>(queue->device))
-{
-}
-
-CUDADeviceGraphicsInterop::~CUDADeviceGraphicsInterop()
-{
-  CUDAContextScope scope(device_);
-
-  if (cu_graphics_resource_) {
-    cuda_device_assert(device_, cuGraphicsUnregisterResource(cu_graphics_resource_));
-  }
-}
-
-void CUDADeviceGraphicsInterop::set_display_interop(
-    const DisplayDriver::GraphicsInterop &display_interop)
-{
-  const int64_t new_buffer_area = int64_t(display_interop.buffer_width) *
-                                  display_interop.buffer_height;
-
-  need_clear_ = display_interop.need_clear;
-
-  if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
-    return;
-  }
-
-  CUDAContextScope scope(device_);
-
-  if (cu_graphics_resource_) {
-    cuda_device_assert(device_, cuGraphicsUnregisterResource(cu_graphics_resource_));
-  }
-
-  const CUresult result = cuGraphicsGLRegisterBuffer(
-      &cu_graphics_resource_, display_interop.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
-  if (result != CUDA_SUCCESS) {
-    LOG(ERROR) << "Error registering OpenGL buffer: " << cuewErrorString(result);
-  }
-
-  opengl_pbo_id_ = display_interop.opengl_pbo_id;
-  buffer_area_ = new_buffer_area;
-}
-
-device_ptr CUDADeviceGraphicsInterop::map()
-{
-  if (!cu_graphics_resource_) {
-    return 0;
-  }
-
-  CUDAContextScope scope(device_);
-
-  CUdeviceptr cu_buffer;
-  size_t bytes;
-
-  cuda_device_assert(device_, cuGraphicsMapResources(1, &cu_graphics_resource_, queue_->stream()));
-  cuda_device_assert(
-      device_, cuGraphicsResourceGetMappedPointer(&cu_buffer, &bytes, cu_graphics_resource_));
-
-  if (need_clear_) {
-    cuda_device_assert(
-        device_, cuMemsetD8Async(static_cast<CUdeviceptr>(cu_buffer), 0, bytes, queue_->stream()));
-
-    need_clear_ = false;
-  }
-
-  return static_cast<device_ptr>(cu_buffer);
-}
-
-void CUDADeviceGraphicsInterop::unmap()
-{
-  CUDAContextScope scope(device_);
-
-  cuda_device_assert(device_,
-                     cuGraphicsUnmapResources(1, &cu_graphics_resource_, queue_->stream()));
-}
-
-CCL_NAMESPACE_END
-
-#endif
--- a/intern/cycles/device/cuda/graphics_interop.h
+++ b/intern/cycles/device/cuda/graphics_interop.h
@@ -1,66 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_CUDA
-
-#  include "device/device_graphics_interop.h"
-
-#  ifdef WITH_CUDA_DYNLOAD
-#    include "cuew.h"
-#  else
-#    include <cuda.h>
-#  endif
-
-CCL_NAMESPACE_BEGIN
-
-class CUDADevice;
-class CUDADeviceQueue;
-
-class CUDADeviceGraphicsInterop : public DeviceGraphicsInterop {
- public:
-  explicit CUDADeviceGraphicsInterop(CUDADeviceQueue *queue);
-
-  CUDADeviceGraphicsInterop(const CUDADeviceGraphicsInterop &other) = delete;
-  CUDADeviceGraphicsInterop(CUDADeviceGraphicsInterop &&other) noexcept = delete;
-
-  ~CUDADeviceGraphicsInterop();
-
-  CUDADeviceGraphicsInterop &operator=(const CUDADeviceGraphicsInterop &other) = delete;
-  CUDADeviceGraphicsInterop &operator=(CUDADeviceGraphicsInterop &&other) = delete;
-
-  virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override;
-
-  virtual device_ptr map() override;
-  virtual void unmap() override;
-
- protected:
-  CUDADeviceQueue *queue_ = nullptr;
-  CUDADevice *device_ = nullptr;
-
-  /* OpenGL PBO which is currently registered as the destination for the CUDA buffer. */
-  uint opengl_pbo_id_ = 0;
-  /* Buffer area in pixels of the corresponding PBO. */
-  int64_t buffer_area_ = 0;
-
-  /* The destination was requested to be cleared. */
-  bool need_clear_ = false;
-
-  CUgraphicsResource cu_graphics_resource_ = nullptr;
-};
-
-CCL_NAMESPACE_END
-
-#endif
--- a/intern/cycles/device/cuda/kernel.cpp
+++ b/intern/cycles/device/cuda/kernel.cpp
@@ -1,69 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_CUDA
-
-#  include "device/cuda/kernel.h"
-#  include "device/cuda/device_impl.h"
-
-CCL_NAMESPACE_BEGIN
-
-void CUDADeviceKernels::load(CUDADevice *device)
-{
-  CUmodule cuModule = device->cuModule;
-
-  for (int i = 0; i < (int)DEVICE_KERNEL_NUM; i++) {
-    CUDADeviceKernel &kernel = kernels_[i];
-
-    /* No mega-kernel used for GPU. */
-    if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
-      continue;
-    }
-
-    const std::string function_name = std::string("kernel_gpu_") +
-                                      device_kernel_as_string((DeviceKernel)i);
-    cuda_device_assert(device,
-                       cuModuleGetFunction(&kernel.function, cuModule, function_name.c_str()));
-
-    if (kernel.function) {
-      cuda_device_assert(device, cuFuncSetCacheConfig(kernel.function, CU_FUNC_CACHE_PREFER_L1));
-
-      cuda_device_assert(
-          device,
-          cuOccupancyMaxPotentialBlockSize(
-              &kernel.min_blocks, &kernel.num_threads_per_block, kernel.function, NULL, 0, 0));
-    }
-    else {
-      LOG(ERROR) << "Unable to load kernel " << function_name;
-    }
-  }
-
-  loaded = true;
-}
-
-const CUDADeviceKernel &CUDADeviceKernels::get(DeviceKernel kernel) const
-{
-  return kernels_[(int)kernel];
-}
-
-bool CUDADeviceKernels::available(DeviceKernel kernel) const
-{
-  return kernels_[(int)kernel].function != nullptr;
-}
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_CUDA*/
--- a/intern/cycles/device/cuda/kernel.h
+++ b/intern/cycles/device/cuda/kernel.h
@@ -1,56 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef WITH_CUDA
-
-#  include "device/device_kernel.h"
-
-#  ifdef WITH_CUDA_DYNLOAD
-#    include "cuew.h"
-#  else
-#    include <cuda.h>
-#  endif
-
-CCL_NAMESPACE_BEGIN
-
-class CUDADevice;
-
-/* CUDA kernel and associate occupancy information. */
-class CUDADeviceKernel {
- public:
-  CUfunction function = nullptr;
-
-  int num_threads_per_block = 0;
-  int min_blocks = 0;
-};
-
-/* Cache of CUDA kernels for each DeviceKernel. */
-class CUDADeviceKernels {
- public:
-  void load(CUDADevice *device);
-  const CUDADeviceKernel &get(DeviceKernel kernel) const;
-  bool available(DeviceKernel kernel) const;
-
- protected:
-  CUDADeviceKernel kernels_[DEVICE_KERNEL_NUM];
-  bool loaded = false;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_CUDA */
--- a/intern/cycles/device/cuda/queue.cpp
+++ b/intern/cycles/device/cuda/queue.cpp
@@ -1,230 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_CUDA
-
-#  include "device/cuda/queue.h"
-
-#  include "device/cuda/device_impl.h"
-#  include "device/cuda/graphics_interop.h"
-#  include "device/cuda/kernel.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* CUDADeviceQueue */
-
-CUDADeviceQueue::CUDADeviceQueue(CUDADevice *device)
-    : DeviceQueue(device), cuda_device_(device), cuda_stream_(nullptr)
-{
-  const CUDAContextScope scope(cuda_device_);
-  cuda_device_assert(cuda_device_, cuStreamCreate(&cuda_stream_, CU_STREAM_NON_BLOCKING));
-}
-
-CUDADeviceQueue::~CUDADeviceQueue()
-{
-  const CUDAContextScope scope(cuda_device_);
-  cuStreamDestroy(cuda_stream_);
-}
-
-int CUDADeviceQueue::num_concurrent_states(const size_t state_size) const
-{
-  int num_states = max(cuda_device_->get_num_multiprocessors() *
-                           cuda_device_->get_max_num_threads_per_multiprocessor() * 16,
-                       1048576);
-
-  const char *factor_str = getenv("CYCLES_CONCURRENT_STATES_FACTOR");
-  if (factor_str) {
-    num_states = max((int)(num_states * atof(factor_str)), 1024);
-  }
-
-  VLOG(3) << "GPU queue concurrent states: " << num_states << ", using up to "
-          << string_human_readable_size(num_states * state_size);
-
-  return num_states;
-}
-
-int CUDADeviceQueue::num_concurrent_busy_states() const
-{
-  const int max_num_threads = cuda_device_->get_num_multiprocessors() *
-                              cuda_device_->get_max_num_threads_per_multiprocessor();
-
-  if (max_num_threads == 0) {
-    return 65536;
-  }
-
-  return 4 * max_num_threads;
-}
-
-void CUDADeviceQueue::init_execution()
-{
-  /* Synchronize all textures and memory copies before executing task. */
-  CUDAContextScope scope(cuda_device_);
-  cuda_device_->load_texture_info();
-  cuda_device_assert(cuda_device_, cuCtxSynchronize());
-
-  debug_init_execution();
-}
-
-bool CUDADeviceQueue::kernel_available(DeviceKernel kernel) const
-{
-  return cuda_device_->kernels.available(kernel);
-}
-
-bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
-{
-  if (cuda_device_->have_error()) {
-    return false;
-  }
-
-  debug_enqueue(kernel, work_size);
-
-  const CUDAContextScope scope(cuda_device_);
-  const CUDADeviceKernel &cuda_kernel = cuda_device_->kernels.get(kernel);
-
-  /* Compute kernel launch parameters. */
-  const int num_threads_per_block = cuda_kernel.num_threads_per_block;
-  const int num_blocks = divide_up(work_size, num_threads_per_block);
-
-  int shared_mem_bytes = 0;
-
-  switch (kernel) {
-    case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
-      /* See parall_active_index.h for why this amount of shared memory is needed. */
-      shared_mem_bytes = (num_threads_per_block + 1) * sizeof(int);
-      break;
-
-    default:
-      break;
-  }
-
-  /* Launch kernel. */
-  assert_success(cuLaunchKernel(cuda_kernel.function,
-                                num_blocks,
-                                1,
-                                1,
-                                num_threads_per_block,
-                                1,
-                                1,
-                                shared_mem_bytes,
-                                cuda_stream_,
-                                args,
-                                0),
-                 "enqueue");
-
-  return !(cuda_device_->have_error());
-}
-
-bool CUDADeviceQueue::synchronize()
-{
-  if (cuda_device_->have_error()) {
-    return false;
-  }
-
-  const CUDAContextScope scope(cuda_device_);
-  assert_success(cuStreamSynchronize(cuda_stream_), "synchronize");
-
-  debug_synchronize();
-
-  return !(cuda_device_->have_error());
-}
-
-void CUDADeviceQueue::zero_to_device(device_memory &mem)
-{
-  assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
-
-  if (mem.memory_size() == 0) {
-    return;
-  }
-
-  /* Allocate on demand. */
-  if (mem.device_pointer == 0) {
-    cuda_device_->mem_alloc(mem);
-  }
-
-  /* Zero memory on device. */
-  assert(mem.device_pointer != 0);
-
-  const CUDAContextScope scope(cuda_device_);
-  assert_success(
-      cuMemsetD8Async((CUdeviceptr)mem.device_pointer, 0, mem.memory_size(), cuda_stream_),
-      "zero_to_device");
-}
-
-void CUDADeviceQueue::copy_to_device(device_memory &mem)
-{
-  assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
-
-  if (mem.memory_size() == 0) {
-    return;
-  }
-
-  /* Allocate on demand. */
-  if (mem.device_pointer == 0) {
-    cuda_device_->mem_alloc(mem);
-  }
-
-  assert(mem.device_pointer != 0);
-  assert(mem.host_pointer != nullptr);
-
-  /* Copy memory to device. */
-  const CUDAContextScope scope(cuda_device_);
-  assert_success(
-      cuMemcpyHtoDAsync(
-          (CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size(), cuda_stream_),
-      "copy_to_device");
-}
-
-void CUDADeviceQueue::copy_from_device(device_memory &mem)
-{
-  assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
-
-  if (mem.memory_size() == 0) {
-    return;
-  }
-
-  assert(mem.device_pointer != 0);
-  assert(mem.host_pointer != nullptr);
-
-  /* Copy memory from device. */
-  const CUDAContextScope scope(cuda_device_);
-  assert_success(
-      cuMemcpyDtoHAsync(
-          mem.host_pointer, (CUdeviceptr)mem.device_pointer, mem.memory_size(), cuda_stream_),
-      "copy_from_device");
-}
-
-void CUDADeviceQueue::assert_success(CUresult result, const char *operation)
-{
-  if (result != CUDA_SUCCESS) {
-    const char *name = cuewErrorString(result);
-    cuda_device_->set_error(string_printf(
-        "%s in CUDA queue %s (%s)", name, operation, debug_active_kernels().c_str()));
-  }
-}
-
-unique_ptr<DeviceGraphicsInterop> CUDADeviceQueue::graphics_interop_create()
-{
-  return make_unique<CUDADeviceGraphicsInterop>(this);
-}
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_CUDA */
--- a/intern/cycles/device/cuda/queue.h
+++ b/intern/cycles/device/cuda/queue.h
@@ -1,69 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef WITH_CUDA
-
-#  include "device/device_kernel.h"
-#  include "device/device_memory.h"
-#  include "device/device_queue.h"
-
-#  include "device/cuda/util.h"
-
-CCL_NAMESPACE_BEGIN
-
-class CUDADevice;
-class device_memory;
-
-/* Base class for CUDA queues. */
-class CUDADeviceQueue : public DeviceQueue {
- public:
-  CUDADeviceQueue(CUDADevice *device);
-  ~CUDADeviceQueue();
-
-  virtual int num_concurrent_states(const size_t state_size) const override;
-  virtual int num_concurrent_busy_states() const override;
-
-  virtual void init_execution() override;
-
-  virtual bool kernel_available(DeviceKernel kernel) const override;
-
-  virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;
-
-  virtual bool synchronize() override;
-
-  virtual void zero_to_device(device_memory &mem) override;
-  virtual void copy_to_device(device_memory &mem) override;
-  virtual void copy_from_device(device_memory &mem) override;
-
-  virtual CUstream stream()
-  {
-    return cuda_stream_;
-  }
-
-  virtual unique_ptr<DeviceGraphicsInterop> graphics_interop_create() override;
-
- protected:
-  CUDADevice *cuda_device_;
-  CUstream cuda_stream_;
-
-  void assert_success(CUresult result, const char *operation);
-};
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_CUDA */
--- a/intern/cycles/device/cuda/util.cpp
+++ b/intern/cycles/device/cuda/util.cpp
@@ -1,61 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_CUDA
-
-#  include "device/cuda/util.h"
-#  include "device/cuda/device_impl.h"
-
-CCL_NAMESPACE_BEGIN
-
-CUDAContextScope::CUDAContextScope(CUDADevice *device) : device(device)
-{
-  cuda_device_assert(device, cuCtxPushCurrent(device->cuContext));
-}
-
-CUDAContextScope::~CUDAContextScope()
-{
-  cuda_device_assert(device, cuCtxPopCurrent(NULL));
-}
-
-#  ifndef WITH_CUDA_DYNLOAD
-const char *cuewErrorString(CUresult result)
-{
-  /* We can only give error code here without major code duplication, that
-   * should be enough since dynamic loading is only being disabled by folks
-   * who knows what they're doing anyway.
-   *
-   * NOTE: Avoid call from several threads.
-   */
-  static string error;
-  error = string_printf("%d", result);
-  return error.c_str();
-}
-
-const char *cuewCompilerPath()
-{
-  return CYCLES_CUDA_NVCC_EXECUTABLE;
-}
-
-int cuewCompilerVersion()
-{
-  return (CUDA_VERSION / 100) + (CUDA_VERSION % 100 / 10);
-}
-#  endif
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_CUDA */
--- a/intern/cycles/device/cuda/util.h
+++ b/intern/cycles/device/cuda/util.h
@@ -1,65 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef WITH_CUDA
-
-#  ifdef WITH_CUDA_DYNLOAD
-#    include "cuew.h"
-#  else
-#    include <cuda.h>
-#  endif
-
-CCL_NAMESPACE_BEGIN
-
-class CUDADevice;
-
-/* Utility to push/pop CUDA context. */
-class CUDAContextScope {
- public:
-  CUDAContextScope(CUDADevice *device);
-  ~CUDAContextScope();
-
- private:
-  CUDADevice *device;
-};
-
-/* Utility for checking return values of CUDA function calls. */
-#  define cuda_device_assert(cuda_device, stmt) \
-    { \
-      CUresult result = stmt; \
-      if (result != CUDA_SUCCESS) { \
-        const char *name = cuewErrorString(result); \
-        cuda_device->set_error( \
-            string_printf("%s in %s (%s:%d)", name, #stmt, __FILE__, __LINE__)); \
-      } \
-    } \
-    (void)0
-
-#  define cuda_assert(stmt) cuda_device_assert(this, stmt)
-
-#  ifndef WITH_CUDA_DYNLOAD
-/* Transparently implement some functions, so majority of the file does not need
- * to worry about difference between dynamically loaded and linked CUDA at all. */
-const char *cuewErrorString(CUresult result);
-const char *cuewCompilerPath();
-int cuewCompilerVersion();
-#  endif /* WITH_CUDA_DYNLOAD */
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_CUDA */
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -20,19 +20,13 @@
 #include "bvh/bvh2.h"

 #include "device/device.h"
-#include "device/device_queue.h"
-
-#include "device/cpu/device.h"
-#include "device/cuda/device.h"
-#include "device/dummy/device.h"
-#include "device/hip/device.h"
-#include "device/multi/device.h"
-#include "device/optix/device.h"
+#include "device/device_intern.h"

 #include "util/util_foreach.h"
 #include "util/util_half.h"
 #include "util/util_logging.h"
 #include "util/util_math.h"
+#include "util/util_opengl.h"
 #include "util/util_string.h"
 #include "util/util_system.h"
 #include "util/util_time.h"
@@ -44,16 +38,332 @@ CCL_NAMESPACE_BEGIN
 bool Device::need_types_update = true;
 bool Device::need_devices_update = true;
 thread_mutex Device::device_mutex;
+vector<DeviceInfo> Device::opencl_devices;
 vector<DeviceInfo> Device::cuda_devices;
 vector<DeviceInfo> Device::optix_devices;
 vector<DeviceInfo> Device::cpu_devices;
-vector<DeviceInfo> Device::hip_devices;
+vector<DeviceInfo> Device::network_devices;
 uint Device::devices_initialized_mask = 0;

+/* Device Requested Features */
+
+std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &requested_features)
+{
+  os << "Experimental features: " << (requested_features.experimental ? "On" : "Off") << std::endl;
+  os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
+  /* TODO(sergey): Decode bitflag into list of names. */
+  os << "Nodes features: " << requested_features.nodes_features << std::endl;
+  os << "Use Hair: " << string_from_bool(requested_features.use_hair) << std::endl;
+  os << "Use Object Motion: " << string_from_bool(requested_features.use_object_motion)
+     << std::endl;
+  os << "Use Camera Motion: " << string_from_bool(requested_features.use_camera_motion)
+     << std::endl;
+  os << "Use Baking: " << string_from_bool(requested_features.use_baking) << std::endl;
+  os << "Use Subsurface: " << string_from_bool(requested_features.use_subsurface) << std::endl;
+  os << "Use Volume: " << string_from_bool(requested_features.use_volume) << std::endl;
+  os << "Use Branched Integrator: " << string_from_bool(requested_features.use_integrator_branched)
+     << std::endl;
+  os << "Use Patch Evaluation: " << string_from_bool(requested_features.use_patch_evaluation)
+     << std::endl;
+  os << "Use Transparent Shadows: " << string_from_bool(requested_features.use_transparent)
+     << std::endl;
+  os << "Use Principled BSDF: " << string_from_bool(requested_features.use_principled)
+     << std::endl;
+  os << "Use Denoising: " << string_from_bool(requested_features.use_denoising) << std::endl;
+  os << "Use Displacement: " << string_from_bool(requested_features.use_true_displacement)
+     << std::endl;
+  os << "Use Background Light: " << string_from_bool(requested_features.use_background_light)
+     << std::endl;
+  return os;
+}
+
 /* Device */

 Device::~Device() noexcept(false)
 {
+  if (!background) {
+    if (vertex_buffer != 0) {
+      glDeleteBuffers(1, &vertex_buffer);
+    }
+    if (fallback_shader_program != 0) {
+      glDeleteProgram(fallback_shader_program);
+    }
+  }
+}
+
+/* TODO move shaders to standalone .glsl file. */
+const char *FALLBACK_VERTEX_SHADER =
+    "#version 330\n"
+    "uniform vec2 fullscreen;\n"
+    "in vec2 texCoord;\n"
+    "in vec2 pos;\n"
+    "out vec2 texCoord_interp;\n"
+    "\n"
+    "vec2 normalize_coordinates()\n"
+    "{\n"
+    "   return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
+    "}\n"
+    "\n"
+    "void main()\n"
+    "{\n"
+    "   gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
+    "   texCoord_interp = texCoord;\n"
+    "}\n\0";
+
+const char *FALLBACK_FRAGMENT_SHADER =
+    "#version 330\n"
+    "uniform sampler2D image_texture;\n"
+    "in vec2 texCoord_interp;\n"
+    "out vec4 fragColor;\n"
+    "\n"
+    "void main()\n"
+    "{\n"
+    "   fragColor = texture(image_texture, texCoord_interp);\n"
+    "}\n\0";
+
+static void shader_print_errors(const char *task, const char *log, const char *code)
+{
+  LOG(ERROR) << "Shader: " << task << " error:";
+  LOG(ERROR) << "===== shader string ====";
+
+  stringstream stream(code);
+  string partial;
+
+  int line = 1;
+  while (getline(stream, partial, '\n')) {
+    if (line < 10) {
+      LOG(ERROR) << " " << line << " " << partial;
+    }
+    else {
+      LOG(ERROR) << line << " " << partial;
+    }
+    line++;
+  }
+  LOG(ERROR) << log;
+}
+
+static int bind_fallback_shader(void)
+{
+  GLint status;
+  GLchar log[5000];
+  GLsizei length = 0;
+  GLuint program = 0;
+
+  struct Shader {
+    const char *source;
+    GLenum type;
+  } shaders[2] = {{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
+                  {FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}};
+
+  program = glCreateProgram();
+
+  for (int i = 0; i < 2; i++) {
+    GLuint shader = glCreateShader(shaders[i].type);
+
+    string source_str = shaders[i].source;
+    const char *c_str = source_str.c_str();
+
+    glShaderSource(shader, 1, &c_str, NULL);
+    glCompileShader(shader);
+
+    glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
+
+    if (!status) {
+      glGetShaderInfoLog(shader, sizeof(log), &length, log);
+      shader_print_errors("compile", log, c_str);
+      return 0;
+    }
+
+    glAttachShader(program, shader);
+  }
+
+  /* Link output. */
+  glBindFragDataLocation(program, 0, "fragColor");
+
+  /* Link and error check. */
+  glLinkProgram(program);
+
+  glGetProgramiv(program, GL_LINK_STATUS, &status);
+  if (!status) {
+    glGetShaderInfoLog(program, sizeof(log), &length, log);
+    shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER);
+    shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER);
+    return 0;
+  }
+
+  return program;
+}
+
+bool Device::bind_fallback_display_space_shader(const float width, const float height)
+{
+  if (fallback_status == FALLBACK_SHADER_STATUS_ERROR) {
+    return false;
+  }
+
+  if (fallback_status == FALLBACK_SHADER_STATUS_NONE) {
+    fallback_shader_program = bind_fallback_shader();
+    fallback_status = FALLBACK_SHADER_STATUS_ERROR;
+
+    if (fallback_shader_program == 0) {
+      return false;
+    }
+
+    glUseProgram(fallback_shader_program);
+    image_texture_location = glGetUniformLocation(fallback_shader_program, "image_texture");
+    if (image_texture_location < 0) {
+      LOG(ERROR) << "Shader doesn't contain the 'image_texture' uniform.";
+      return false;
+    }
+
+    fullscreen_location = glGetUniformLocation(fallback_shader_program, "fullscreen");
+    if (fullscreen_location < 0) {
+      LOG(ERROR) << "Shader doesn't contain the 'fullscreen' uniform.";
+      return false;
+    }
+
+    fallback_status = FALLBACK_SHADER_STATUS_SUCCESS;
+  }
+
+  /* Run this every time. */
+  glUseProgram(fallback_shader_program);
+  glUniform1i(image_texture_location, 0);
+  glUniform2f(fullscreen_location, width, height);
+  return true;
+}
+
+void Device::draw_pixels(device_memory &rgba,
+                         int y,
+                         int w,
+                         int h,
+                         int width,
+                         int height,
+                         int dx,
+                         int dy,
+                         int dw,
+                         int dh,
+                         bool transparent,
+                         const DeviceDrawParams &draw_params)
+{
+  const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
+
+  assert(rgba.type == MEM_PIXELS);
+  mem_copy_from(rgba, y, w, h, rgba.memory_elements_size(1));
+
+  GLuint texid;
+  glActiveTexture(GL_TEXTURE0);
+  glGenTextures(1, &texid);
+  glBindTexture(GL_TEXTURE_2D, texid);
+
+  if (rgba.data_type == TYPE_HALF) {
+    GLhalf *data_pointer = (GLhalf *)rgba.host_pointer;
+    data_pointer += 4 * y * w;
+    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, w, h, 0, GL_RGBA, GL_HALF_FLOAT, data_pointer);
+  }
+  else {
+    uint8_t *data_pointer = (uint8_t *)rgba.host_pointer;
+    data_pointer += 4 * y * w;
+    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, data_pointer);
+  }
+
+  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+
+  if (transparent) {
+    glEnable(GL_BLEND);
+    glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
+  }
+
+  GLint shader_program;
+  if (use_fallback_shader) {
+    if (!bind_fallback_display_space_shader(dw, dh)) {
+      return;
+    }
+    shader_program = fallback_shader_program;
+  }
+  else {
+    draw_params.bind_display_space_shader_cb();
+    glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
+  }
+
+  if (!vertex_buffer) {
+    glGenBuffers(1, &vertex_buffer);
+  }
+
+  glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
+  /* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered
+   */
+  glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
+
+  float *vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
+
+  if (vpointer) {
+    /* texture coordinate - vertex pair */
+    vpointer[0] = 0.0f;
+    vpointer[1] = 0.0f;
+    vpointer[2] = dx;
+    vpointer[3] = dy;
+
+    vpointer[4] = 1.0f;
+    vpointer[5] = 0.0f;
+    vpointer[6] = (float)width + dx;
+    vpointer[7] = dy;
+
+    vpointer[8] = 1.0f;
+    vpointer[9] = 1.0f;
+    vpointer[10] = (float)width + dx;
+    vpointer[11] = (float)height + dy;
+
+    vpointer[12] = 0.0f;
+    vpointer[13] = 1.0f;
+    vpointer[14] = dx;
+    vpointer[15] = (float)height + dy;
+
+    if (vertex_buffer) {
+      glUnmapBuffer(GL_ARRAY_BUFFER);
+    }
+  }
+
+  GLuint vertex_array_object;
+  GLuint position_attribute, texcoord_attribute;
+
+  glGenVertexArrays(1, &vertex_array_object);
+  glBindVertexArray(vertex_array_object);
+
+  texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
+  position_attribute = glGetAttribLocation(shader_program, "pos");
+
+  glEnableVertexAttribArray(texcoord_attribute);
+  glEnableVertexAttribArray(position_attribute);
+
+  glVertexAttribPointer(
+      texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
+  glVertexAttribPointer(position_attribute,
+                        2,
+                        GL_FLOAT,
+                        GL_FALSE,
+                        4 * sizeof(float),
+                        (const GLvoid *)(sizeof(float) * 2));
+
+  glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+  if (vertex_buffer) {
+    glBindBuffer(GL_ARRAY_BUFFER, 0);
+  }
+
+  if (use_fallback_shader) {
+    glUseProgram(0);
+  }
+  else {
+    draw_params.unbind_display_space_shader_cb();
+  }
+
+  glDeleteVertexArrays(1, &vertex_array_object);
+  glBindTexture(GL_TEXTURE_2D, 0);
+  glDeleteTextures(1, &texid);
+
+  if (transparent) {
+    glDisable(GL_BLEND);
+  }
 }

 void Device::build_bvh(BVH *bvh, Progress &progress, bool refit)
@@ -69,14 +379,14 @@ void Device::build_bvh(BVH *bvh, Progress &progress, bool refit)
  }
 }

-Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
+Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
 {
 #ifdef WITH_MULTI
  if (!info.multi_devices.empty()) {
    /* Always create a multi device when info contains multiple devices.
     * This is done so that the type can still be e.g. DEVICE_CPU to indicate
     * that it is a homogeneous collection of devices, which simplifies checks. */
-    return device_multi_create(info, stats, profiler);
+    return device_multi_create(info, stats, profiler, background);
  }
 #endif

@@ -84,34 +394,37 @@ Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)

  switch (info.type) {
    case DEVICE_CPU:
-      device = device_cpu_create(info, stats, profiler);
+      device = device_cpu_create(info, stats, profiler, background);
      break;
 #ifdef WITH_CUDA
    case DEVICE_CUDA:
      if (device_cuda_init())
-        device = device_cuda_create(info, stats, profiler);
+        device = device_cuda_create(info, stats, profiler, background);
      break;
 #endif
 #ifdef WITH_OPTIX
    case DEVICE_OPTIX:
      if (device_optix_init())
-        device = device_optix_create(info, stats, profiler);
+        device = device_optix_create(info, stats, profiler, background);
      break;
 #endif
-
-#ifdef WITH_HIP
-    case DEVICE_HIP:
-      if (device_hip_init())
-        device = device_hip_create(info, stats, profiler);
+#ifdef WITH_NETWORK
+    case DEVICE_NETWORK:
+      device = device_network_create(info, stats, profiler, "127.0.0.1");
+      break;
+#endif
+#ifdef WITH_OPENCL
+    case DEVICE_OPENCL:
+      if (device_opencl_init())
+        device = device_opencl_create(info, stats, profiler, background);
      break;
 #endif
-
    default:
      break;
  }

  if (device == NULL) {
-    device = device_dummy_create(info, stats, profiler);
+    device = device_dummy_create(info, stats, profiler, background);
  }

  return device;
@@ -125,10 +438,12 @@ DeviceType Device::type_from_string(const char *name)
    return DEVICE_CUDA;
  else if (strcmp(name, "OPTIX") == 0)
    return DEVICE_OPTIX;
+  else if (strcmp(name, "OPENCL") == 0)
+    return DEVICE_OPENCL;
+  else if (strcmp(name, "NETWORK") == 0)
+    return DEVICE_NETWORK;
  else if (strcmp(name, "MULTI") == 0)
    return DEVICE_MULTI;
-  else if (strcmp(name, "HIP") == 0)
-    return DEVICE_HIP;

  return DEVICE_NONE;
 }
@@ -141,10 +456,12 @@ string Device::string_from_type(DeviceType type)
    return "CUDA";
  else if (type == DEVICE_OPTIX)
    return "OPTIX";
+  else if (type == DEVICE_OPENCL)
+    return "OPENCL";
+  else if (type == DEVICE_NETWORK)
+    return "NETWORK";
  else if (type == DEVICE_MULTI)
    return "MULTI";
-  else if (type == DEVICE_HIP)
-    return "HIP";

  return "";
 }
@@ -159,10 +476,12 @@ vector<DeviceType> Device::available_types()
 #ifdef WITH_OPTIX
  types.push_back(DEVICE_OPTIX);
 #endif
-#ifdef WITH_HIP
-  types.push_back(DEVICE_HIP);
+#ifdef WITH_OPENCL
+  types.push_back(DEVICE_OPENCL);
+#endif
+#ifdef WITH_NETWORK
+  types.push_back(DEVICE_NETWORK);
 #endif
-
  return types;
 }

@@ -174,6 +493,20 @@ vector<DeviceInfo> Device::available_devices(uint mask)
  thread_scoped_lock lock(device_mutex);
  vector<DeviceInfo> devices;

+#ifdef WITH_OPENCL
+  if (mask & DEVICE_MASK_OPENCL) {
+    if (!(devices_initialized_mask & DEVICE_MASK_OPENCL)) {
+      if (device_opencl_init()) {
+        device_opencl_info(opencl_devices);
+      }
+      devices_initialized_mask |= DEVICE_MASK_OPENCL;
+    }
+    foreach (DeviceInfo &info, opencl_devices) {
+      devices.push_back(info);
+    }
+  }
+#endif
+
 #if defined(WITH_CUDA) || defined(WITH_OPTIX)
  if (mask & (DEVICE_MASK_CUDA | DEVICE_MASK_OPTIX)) {
    if (!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
@@ -204,20 +537,6 @@ vector<DeviceInfo> Device::available_devices(uint mask)
  }
 #endif

-#ifdef WITH_HIP
-  if (mask & DEVICE_MASK_HIP) {
-    if (!(devices_initialized_mask & DEVICE_MASK_HIP)) {
-      if (device_hip_init()) {
-        device_hip_info(hip_devices);
-      }
-      devices_initialized_mask |= DEVICE_MASK_HIP;
-    }
-    foreach (DeviceInfo &info, hip_devices) {
-      devices.push_back(info);
-    }
-  }
-#endif
-
  if (mask & DEVICE_MASK_CPU) {
    if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
      device_cpu_info(cpu_devices);
@@ -228,6 +547,18 @@ vector<DeviceInfo> Device::available_devices(uint mask)
    }
  }

+#ifdef WITH_NETWORK
+  if (mask & DEVICE_MASK_NETWORK) {
+    if (!(devices_initialized_mask & DEVICE_MASK_NETWORK)) {
+      device_network_info(network_devices);
+      devices_initialized_mask |= DEVICE_MASK_NETWORK;
+    }
+    foreach (DeviceInfo &info, network_devices) {
+      devices.push_back(info);
+    }
+  }
+#endif
+
  return devices;
 }

@@ -249,6 +580,15 @@ string Device::device_capabilities(uint mask)
    capabilities += device_cpu_capabilities() + "\n";
  }

+#ifdef WITH_OPENCL
+  if (mask & DEVICE_MASK_OPENCL) {
+    if (device_opencl_init()) {
+      capabilities += "\nOpenCL device capabilities:\n";
+      capabilities += device_opencl_capabilities();
+    }
+  }
+#endif
+
 #ifdef WITH_CUDA
  if (mask & DEVICE_MASK_CUDA) {
    if (device_cuda_init()) {
@@ -258,15 +598,6 @@ string Device::device_capabilities(uint mask)
  }
 #endif

-#ifdef WITH_HIP
-  if (mask & DEVICE_MASK_HIP) {
-    if (device_hip_init()) {
-      capabilities += "\nHIP device capabilities:\n";
-      capabilities += device_hip_capabilities();
-    }
-  }
-#endif
-
  return capabilities;
 }

@@ -282,13 +613,16 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
  }

  DeviceInfo info;
-  info.type = DEVICE_NONE;
+  info.type = subdevices.front().type;
  info.id = "MULTI";
  info.description = "Multi Device";
  info.num = 0;

  info.has_half_images = true;
  info.has_nanovdb = true;
+  info.has_volume_decoupled = true;
+  info.has_branched_path = true;
+  info.has_adaptive_stop_per_sample = true;
  info.has_osl = true;
  info.has_profiling = true;
  info.has_peer_memory = false;
@@ -326,16 +660,16 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
    info.id += device.id;

    /* Set device type to MULTI if subdevices are not of a common type. */
-    if (info.type == DEVICE_NONE) {
-      info.type = device.type;
-    }
-    else if (device.type != info.type) {
+    if (device.type != info.type) {
      info.type = DEVICE_MULTI;
    }

    /* Accumulate device info. */
    info.has_half_images &= device.has_half_images;
    info.has_nanovdb &= device.has_nanovdb;
+    info.has_volume_decoupled &= device.has_volume_decoupled;
+    info.has_branched_path &= device.has_branched_path;
+    info.has_adaptive_stop_per_sample &= device.has_adaptive_stop_per_sample;
    info.has_osl &= device.has_osl;
    info.has_profiling &= device.has_profiling;
    info.has_peer_memory |= device.has_peer_memory;
@@ -355,33 +689,60 @@ void Device::free_memory()
  devices_initialized_mask = 0;
  cuda_devices.free_memory();
  optix_devices.free_memory();
-  hip_devices.free_memory();
+  opencl_devices.free_memory();
  cpu_devices.free_memory();
-}
-
-unique_ptr<DeviceQueue> Device::gpu_queue_create()
-{
-  LOG(FATAL) << "Device does not support queues.";
-  return nullptr;
-}
-
-const CPUKernels *Device::get_cpu_kernels() const
-{
-  LOG(FATAL) << "Device does not support CPU kernels.";
-  return nullptr;
-}
-
-void Device::get_cpu_kernel_thread_globals(
-    vector<CPUKernelThreadGlobals> & /*kernel_thread_globals*/)
-{
-  LOG(FATAL) << "Device does not support CPU kernels.";
-}
-
-void *Device::get_cpu_osl_memory()
-{
-  return nullptr;
+  network_devices.free_memory();
 }

 /* DeviceInfo */

+void DeviceInfo::add_denoising_devices(DenoiserType denoiser_type)
+{
+  assert(denoising_devices.empty());
+
+  if (denoiser_type == DENOISER_OPTIX && type != DEVICE_OPTIX) {
+    vector<DeviceInfo> optix_devices = Device::available_devices(DEVICE_MASK_OPTIX);
+    if (!optix_devices.empty()) {
+      /* Convert to a special multi device with separate denoising devices. */
+      if (multi_devices.empty()) {
+        multi_devices.push_back(*this);
+      }
+
+      /* Try to use the same physical devices for denoising. */
+      for (const DeviceInfo &cuda_device : multi_devices) {
+        if (cuda_device.type == DEVICE_CUDA) {
+          for (const DeviceInfo &optix_device : optix_devices) {
+            if (cuda_device.num == optix_device.num) {
+              id += optix_device.id;
+              denoising_devices.push_back(optix_device);
+              break;
+            }
+          }
+        }
+      }
+
+      if (denoising_devices.empty()) {
+        /* Simply use the first available OptiX device. */
+        const DeviceInfo optix_device = optix_devices.front();
+        id += optix_device.id; /* Uniquely identify this special multi device. */
+        denoising_devices.push_back(optix_device);
+      }
+
+      denoisers = denoiser_type;
+    }
+  }
+  else if (denoiser_type == DENOISER_OPENIMAGEDENOISE && type != DEVICE_CPU) {
+    /* Convert to a special multi device with separate denoising devices. */
+    if (multi_devices.empty()) {
+      multi_devices.push_back(*this);
+    }
+
+    /* Add CPU denoising devices. */
+    DeviceInfo cpu_device = Device::available_devices(DEVICE_MASK_CPU).front();
+    denoising_devices.push_back(cpu_device);
+
+    denoisers = denoiser_type;
+  }
+}
+
 CCL_NAMESPACE_END
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -21,48 +21,52 @@

 #include "bvh/bvh_params.h"

-#include "device/device_denoise.h"
 #include "device/device_memory.h"
+#include "device/device_task.h"

-#include "util/util_function.h"
 #include "util/util_list.h"
-#include "util/util_logging.h"
 #include "util/util_stats.h"
 #include "util/util_string.h"
 #include "util/util_texture.h"
 #include "util/util_thread.h"
 #include "util/util_types.h"
-#include "util/util_unique_ptr.h"
 #include "util/util_vector.h"

 CCL_NAMESPACE_BEGIN

 class BVH;
-class DeviceQueue;
 class Progress;
-class CPUKernels;
-class CPUKernelThreadGlobals;
+class RenderTile;

 /* Device Types */

 enum DeviceType {
  DEVICE_NONE = 0,
  DEVICE_CPU,
+  DEVICE_OPENCL,
  DEVICE_CUDA,
+  DEVICE_NETWORK,
  DEVICE_MULTI,
  DEVICE_OPTIX,
-  DEVICE_HIP,
  DEVICE_DUMMY,
 };

 enum DeviceTypeMask {
  DEVICE_MASK_CPU = (1 << DEVICE_CPU),
+  DEVICE_MASK_OPENCL = (1 << DEVICE_OPENCL),
  DEVICE_MASK_CUDA = (1 << DEVICE_CUDA),
  DEVICE_MASK_OPTIX = (1 << DEVICE_OPTIX),
-  DEVICE_MASK_HIP = (1 << DEVICE_HIP),
+  DEVICE_MASK_NETWORK = (1 << DEVICE_NETWORK),
  DEVICE_MASK_ALL = ~0
 };

+enum DeviceKernelStatus {
+  DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE,
+  DEVICE_KERNEL_USING_FEATURE_KERNEL,
+  DEVICE_KERNEL_FEATURE_KERNEL_INVALID,
+  DEVICE_KERNEL_UNKNOWN,
+};
+
 #define DEVICE_MASK(type) (DeviceTypeMask)(1 << type)

 class DeviceInfo {
@@ -71,16 +75,20 @@ class DeviceInfo {
  string description;
  string id; /* used for user preferences, should stay fixed with changing hardware config */
  int num;
-  bool display_device;        /* GPU is used as a display device. */
-  bool has_nanovdb;           /* Support NanoVDB volumes. */
-  bool has_half_images;       /* Support half-float textures. */
-  bool has_osl;               /* Support Open Shading Language. */
-  bool has_profiling;         /* Supports runtime collection of profiling info. */
-  bool has_peer_memory;       /* GPU has P2P access to memory of another GPU. */
-  bool has_gpu_queue;         /* Device supports GPU queue. */
-  DenoiserTypeMask denoisers; /* Supported denoiser types. */
+  bool display_device;               /* GPU is used as a display device. */
+  bool has_half_images;              /* Support half-float textures. */
+  bool has_nanovdb;                  /* Support NanoVDB volumes. */
+  bool has_volume_decoupled;         /* Decoupled volume shading. */
+  bool has_branched_path;            /* Supports branched path tracing. */
+  bool has_adaptive_stop_per_sample; /* Per-sample adaptive sampling stopping. */
+  bool has_osl;                      /* Support Open Shading Language. */
+  bool use_split_kernel;             /* Use split or mega kernel. */
+  bool has_profiling;                /* Supports runtime collection of profiling info. */
+  bool has_peer_memory;              /* GPU has P2P access to memory of another GPU. */
+  DenoiserTypeMask denoisers;        /* Supported denoiser types. */
  int cpu_threads;
  vector<DeviceInfo> multi_devices;
+  vector<DeviceInfo> denoising_devices;
  string error_msg;

  DeviceInfo()
@@ -92,36 +100,228 @@ class DeviceInfo {
    display_device = false;
    has_half_images = false;
    has_nanovdb = false;
+    has_volume_decoupled = false;
+    has_branched_path = true;
+    has_adaptive_stop_per_sample = false;
    has_osl = false;
+    use_split_kernel = false;
    has_profiling = false;
    has_peer_memory = false;
-    has_gpu_queue = false;
    denoisers = DENOISER_NONE;
  }

-  bool operator==(const DeviceInfo &info) const
+  bool operator==(const DeviceInfo &info)
  {
    /* Multiple Devices with the same ID would be very bad. */
    assert(id != info.id ||
           (type == info.type && num == info.num && description == info.description));
    return id == info.id;
  }
+
+  /* Add additional devices needed for the specified denoiser. */
+  void add_denoising_devices(DenoiserType denoiser_type);
 };

+class DeviceRequestedFeatures {
+ public:
+  /* Use experimental feature set. */
+  bool experimental;
+
+  /* Selective nodes compilation. */
+
+  /* Identifier of a node group up to which all the nodes needs to be
+   * compiled in. Nodes from higher group indices will be ignores.
+   */
+  int max_nodes_group;
+
+  /* Features bitfield indicating which features from the requested group
+   * will be compiled in. Nodes which corresponds to features which are not
+   * in this bitfield will be ignored even if they're in the requested group.
+   */
+  int nodes_features;
+
+  /* BVH/sampling kernel features. */
+  bool use_hair;
+  bool use_hair_thick;
+  bool use_object_motion;
+  bool use_camera_motion;
+
+  /* Denotes whether baking functionality is needed. */
+  bool use_baking;
+
+  /* Use subsurface scattering materials. */
+  bool use_subsurface;
+
+  /* Use volume materials. */
+  bool use_volume;
+
+  /* Use branched integrator. */
+  bool use_integrator_branched;
+
+  /* Use OpenSubdiv patch evaluation */
+  bool use_patch_evaluation;
+
+  /* Use Transparent shadows */
+  bool use_transparent;
+
+  /* Use various shadow tricks, such as shadow catcher. */
+  bool use_shadow_tricks;
+
+  /* Per-uber shader usage flags. */
+  bool use_principled;
+
+  /* Denoising features. */
+  bool use_denoising;
+
+  /* Use raytracing in shaders. */
+  bool use_shader_raytrace;
+
+  /* Use true displacement */
+  bool use_true_displacement;
+
+  /* Use background lights */
+  bool use_background_light;
+
+  DeviceRequestedFeatures()
+  {
+    /* TODO(sergey): Find more meaningful defaults. */
+    max_nodes_group = 0;
+    nodes_features = 0;
+    use_hair = false;
+    use_hair_thick = false;
+    use_object_motion = false;
+    use_camera_motion = false;
+    use_baking = false;
+    use_subsurface = false;
+    use_volume = false;
+    use_integrator_branched = false;
+    use_patch_evaluation = false;
+    use_transparent = false;
+    use_shadow_tricks = false;
+    use_principled = false;
+    use_denoising = false;
+    use_shader_raytrace = false;
+    use_true_displacement = false;
+    use_background_light = false;
+  }
+
+  bool modified(const DeviceRequestedFeatures &requested_features)
+  {
+    return !(max_nodes_group == requested_features.max_nodes_group &&
+             nodes_features == requested_features.nodes_features &&
+             use_hair == requested_features.use_hair &&
+             use_hair_thick == requested_features.use_hair_thick &&
+             use_object_motion == requested_features.use_object_motion &&
+             use_camera_motion == requested_features.use_camera_motion &&
+             use_baking == requested_features.use_baking &&
+             use_subsurface == requested_features.use_subsurface &&
+             use_volume == requested_features.use_volume &&
+             use_integrator_branched == requested_features.use_integrator_branched &&
+             use_patch_evaluation == requested_features.use_patch_evaluation &&
+             use_transparent == requested_features.use_transparent &&
+             use_shadow_tricks == requested_features.use_shadow_tricks &&
+             use_principled == requested_features.use_principled &&
+             use_denoising == requested_features.use_denoising &&
+             use_shader_raytrace == requested_features.use_shader_raytrace &&
+             use_true_displacement == requested_features.use_true_displacement &&
+             use_background_light == requested_features.use_background_light);
+  }
+
+  /* Convert the requested features structure to a build options,
+   * which could then be passed to compilers.
+   */
+  string get_build_options() const
+  {
+    string build_options = "";
+    if (experimental) {
+      build_options += "-D__KERNEL_EXPERIMENTAL__ ";
+    }
+    build_options += "-D__NODES_MAX_GROUP__=" + string_printf("%d", max_nodes_group);
+    build_options += " -D__NODES_FEATURES__=" + string_printf("%d", nodes_features);
+    if (!use_hair) {
+      build_options += " -D__NO_HAIR__";
+    }
+    if (!use_object_motion) {
+      build_options += " -D__NO_OBJECT_MOTION__";
+    }
+    if (!use_camera_motion) {
+      build_options += " -D__NO_CAMERA_MOTION__";
+    }
+    if (!use_baking) {
+      build_options += " -D__NO_BAKING__";
+    }
+    if (!use_volume) {
+      build_options += " -D__NO_VOLUME__";
+    }
+    if (!use_subsurface) {
+      build_options += " -D__NO_SUBSURFACE__";
+    }
+    if (!use_integrator_branched) {
+      build_options += " -D__NO_BRANCHED_PATH__";
+    }
+    if (!use_patch_evaluation) {
+      build_options += " -D__NO_PATCH_EVAL__";
+    }
+    if (!use_transparent && !use_volume) {
+      build_options += " -D__NO_TRANSPARENT__";
+    }
+    if (!use_shadow_tricks) {
+      build_options += " -D__NO_SHADOW_TRICKS__";
+    }
+    if (!use_principled) {
+      build_options += " -D__NO_PRINCIPLED__";
+    }
+    if (!use_denoising) {
+      build_options += " -D__NO_DENOISING__";
+    }
+    if (!use_shader_raytrace) {
+      build_options += " -D__NO_SHADER_RAYTRACE__";
+    }
+    return build_options;
+  }
+};
+
+std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &requested_features);
+
 /* Device */

+struct DeviceDrawParams {
+  function<void()> bind_display_space_shader_cb;
+  function<void()> unbind_display_space_shader_cb;
+};
+
 class Device {
  friend class device_sub_ptr;

 protected:
-  Device(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
-      : info(info_), stats(stats_), profiler(profiler_)
+  enum {
+    FALLBACK_SHADER_STATUS_NONE = 0,
+    FALLBACK_SHADER_STATUS_ERROR,
+    FALLBACK_SHADER_STATUS_SUCCESS,
+  };
+
+  Device(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background)
+      : background(background),
+        vertex_buffer(0),
+        fallback_status(FALLBACK_SHADER_STATUS_NONE),
+        fallback_shader_program(0),
+        info(info_),
+        stats(stats_),
+        profiler(profiler_)
  {
  }

+  bool background;
  string error_msg;

-  virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, size_t /*offset*/, size_t /*size*/)
+  /* used for real time display */
+  unsigned int vertex_buffer;
+  int fallback_status, fallback_shader_program;
+  int image_texture_location, fullscreen_location;
+
+  bool bind_fallback_display_space_shader(const float width, const float height);
+
+  virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, int /*offset*/, int /*size*/)
  {
    /* Only required for devices that implement denoising. */
    assert(false);
@@ -161,31 +361,67 @@ class Device {
  Stats &stats;
  Profiler &profiler;

+  /* memory alignment */
+  virtual int mem_sub_ptr_alignment()
+  {
+    return MIN_ALIGNMENT_CPU_DATA_TYPES;
+  }
+
  /* constant memory */
  virtual void const_copy_to(const char *name, void *host, size_t size) = 0;

+  /* open shading language, only for CPU device */
+  virtual void *osl_memory()
+  {
+    return NULL;
+  }
+
  /* load/compile kernels, must be called before adding tasks */
-  virtual bool load_kernels(uint /*kernel_features*/)
+  virtual bool load_kernels(const DeviceRequestedFeatures & /*requested_features*/)
  {
    return true;
  }

-  /* GPU device only functions.
-   * These may not be used on CPU or multi-devices. */
+  /* Wait for device to become available to upload data and receive tasks
+   * This method is used by the OpenCL device to load the
+   * optimized kernels or when not (yet) available load the
+   * generic kernels (only during foreground rendering) */
+  virtual bool wait_for_availability(const DeviceRequestedFeatures & /*requested_features*/)
+  {
+    return true;
+  }
+  /* Check if there are 'better' kernels available to be used
+   * We can switch over to these kernels
+   * This method is used to determine if we can switch the preview kernels
+   * to regular kernels */
+  virtual DeviceKernelStatus get_active_kernel_switch_state()
+  {
+    return DEVICE_KERNEL_USING_FEATURE_KERNEL;
+  }

-  /* Create new queue for executing kernels in. */
-  virtual unique_ptr<DeviceQueue> gpu_queue_create();
+  /* tasks */
+  virtual int get_split_task_count(DeviceTask &)
+  {
+    return 1;
+  }

-  /* CPU device only functions.
-   * These may not be used on GPU or multi-devices. */
+  virtual void task_add(DeviceTask &task) = 0;
+  virtual void task_wait() = 0;
+  virtual void task_cancel() = 0;

-  /* Get CPU kernel functions for native instruction set. */
-  virtual const CPUKernels *get_cpu_kernels() const;
-  /* Get kernel globals to pass to kernels. */
-  virtual void get_cpu_kernel_thread_globals(
-      vector<CPUKernelThreadGlobals> & /*kernel_thread_globals*/);
-  /* Get OpenShadingLanguage memory buffer. */
-  virtual void *get_cpu_osl_memory();
+  /* opengl drawing */
+  virtual void draw_pixels(device_memory &mem,
+                           int y,
+                           int w,
+                           int h,
+                           int width,
+                           int height,
+                           int dx,
+                           int dy,
+                           int dw,
+                           int dh,
+                           bool transparent,
+                           const DeviceDrawParams &draw_params);

  /* acceleration structure building */
  virtual void build_bvh(BVH *bvh, Progress &progress, bool refit);
@@ -193,11 +429,25 @@ class Device {
  /* OptiX specific destructor. */
  virtual void release_optix_bvh(BVH * /*bvh*/){};

+#ifdef WITH_NETWORK
+  /* networking */
+  void server_run();
+#endif
+
  /* multi device */
+  virtual void map_tile(Device * /*sub_device*/, RenderTile & /*tile*/)
+  {
+  }
  virtual int device_number(Device * /*sub_device*/)
  {
    return 0;
  }
+  virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTileNeighbors & /*neighbors*/)
+  {
+  }
+  virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTileNeighbors & /*neighbors*/)
+  {
+  }

  virtual bool is_resident(device_ptr /*key*/, Device *sub_device)
  {
@@ -210,47 +460,11 @@ class Device {
    return false;
  }

-  /* Graphics resources interoperability.
-   *
-   * The interoperability comes here by the meaning that the device is capable of computing result
-   * directly into an OpenGL (or other graphics library) buffer. */
-
-  /* Check display is to be updated using graphics interoperability.
-   * The interoperability can not be used is it is not supported by the device. But the device
-   * might also force disable the interoperability if it detects that it will be slower than
-   * copying pixels from the render buffer. */
-  virtual bool should_use_graphics_interop()
-  {
-    return false;
-  }
-
-  /* Buffer denoising. */
-
-  /* Returns true if task is fully handled. */
-  virtual bool denoise_buffer(const DeviceDenoiseTask & /*task*/)
-  {
-    LOG(ERROR) << "Request buffer denoising from a device which does not support it.";
-    return false;
-  }
-
-  virtual DeviceQueue *get_denoise_queue()
-  {
-    LOG(ERROR) << "Request denoising queue from a device which does not support it.";
-    return nullptr;
-  }
-
-  /* Sub-devices */
-
-  /* Run given callback for every individual device which will be handling rendering.
-   * For the single device the callback is called for the device itself. For the multi-device the
-   * callback is only called for the sub-devices. */
-  virtual void foreach_device(const function<void(Device *)> &callback)
-  {
-    callback(this);
-  }
-
  /* static */
-  static Device *create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
+  static Device *create(DeviceInfo &info,
+                        Stats &stats,
+                        Profiler &profiler,
+                        bool background = true);

  static DeviceType type_from_string(const char *name);
  static string string_from_type(DeviceType type);
@@ -275,7 +489,7 @@ class Device {

  virtual void mem_alloc(device_memory &mem) = 0;
  virtual void mem_copy_to(device_memory &mem) = 0;
-  virtual void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) = 0;
+  virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) = 0;
  virtual void mem_zero(device_memory &mem) = 0;
  virtual void mem_free(device_memory &mem) = 0;

@@ -285,8 +499,9 @@ class Device {
  static thread_mutex device_mutex;
  static vector<DeviceInfo> cuda_devices;
  static vector<DeviceInfo> optix_devices;
+  static vector<DeviceInfo> opencl_devices;
  static vector<DeviceInfo> cpu_devices;
-  static vector<DeviceInfo> hip_devices;
+  static vector<DeviceInfo> network_devices;
  static uint devices_initialized_mask;
 };

--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -14,25 +14,21 @@
 * limitations under the License.
 */

-#include "device/cuda/device.h"
-
-#include "util/util_logging.h"
-
 #ifdef WITH_CUDA
-#  include "device/cuda/device_impl.h"
-#  include "device/device.h"

+#  include "device/cuda/device_cuda.h"
+#  include "device/device.h"
+#  include "device/device_intern.h"
+
+#  include "util/util_logging.h"
 #  include "util/util_string.h"
 #  include "util/util_windows.h"
-#endif /* WITH_CUDA */

 CCL_NAMESPACE_BEGIN

 bool device_cuda_init()
 {
-#if !defined(WITH_CUDA)
-  return false;
-#elif defined(WITH_CUDA_DYNLOAD)
+#  ifdef WITH_CUDA_DYNLOAD
  static bool initialized = false;
  static bool result = false;

@@ -63,27 +59,16 @@ bool device_cuda_init()
  }

  return result;
-#else  /* WITH_CUDA_DYNLOAD */
+#  else  /* WITH_CUDA_DYNLOAD */
  return true;
-#endif /* WITH_CUDA_DYNLOAD */
+#  endif /* WITH_CUDA_DYNLOAD */
 }

-Device *device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
+Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
 {
-#ifdef WITH_CUDA
-  return new CUDADevice(info, stats, profiler);
-#else
-  (void)info;
-  (void)stats;
-  (void)profiler;
-
-  LOG(FATAL) << "Request to create CUDA device without compiled-in support. Should never happen.";
-
-  return nullptr;
-#endif
+  return new CUDADevice(info, stats, profiler, background);
 }

-#ifdef WITH_CUDA
 static CUresult device_cuda_safe_init()
 {
 #  ifdef _WIN32
@@ -101,11 +86,9 @@ static CUresult device_cuda_safe_init()
  return cuInit(0);
 #  endif
 }
-#endif /* WITH_CUDA */

 void device_cuda_info(vector<DeviceInfo> &devices)
 {
-#ifdef WITH_CUDA
  CUresult result = device_cuda_safe_init();
  if (result != CUDA_SUCCESS) {
    if (result != CUDA_ERROR_NO_DEVICE)
@@ -146,9 +129,9 @@ void device_cuda_info(vector<DeviceInfo> &devices)

    info.has_half_images = (major >= 3);
    info.has_nanovdb = true;
-    info.denoisers = 0;
-
-    info.has_gpu_queue = true;
+    info.has_volume_decoupled = false;
+    info.has_adaptive_stop_per_sample = false;
+    info.denoisers = DENOISER_NLM;

    /* Check if the device has P2P access to any other device in the system. */
    for (int peer_num = 0; peer_num < count && !info.has_peer_memory; peer_num++) {
@@ -199,14 +182,10 @@ void device_cuda_info(vector<DeviceInfo> &devices)

  if (!display_devices.empty())
    devices.insert(devices.end(), display_devices.begin(), display_devices.end());
-#else  /* WITH_CUDA */
-  (void)devices;
-#endif /* WITH_CUDA */
 }

 string device_cuda_capabilities()
 {
-#ifdef WITH_CUDA
  CUresult result = device_cuda_safe_init();
  if (result != CUDA_SUCCESS) {
    if (result != CUDA_ERROR_NO_DEVICE) {
@@ -331,10 +310,8 @@ string device_cuda_capabilities()
  }

  return capabilities;
-
-#else  /* WITH_CUDA */
-  return "";
-#endif /* WITH_CUDA */
 }

 CCL_NAMESPACE_END
+
+#endif
--- a/intern/cycles/device/device_denoise.cpp
+++ b/intern/cycles/device/device_denoise.cpp
@@ -1,88 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/device_denoise.h"
-
-CCL_NAMESPACE_BEGIN
-
-const char *denoiserTypeToHumanReadable(DenoiserType type)
-{
-  switch (type) {
-    case DENOISER_OPTIX:
-      return "OptiX";
-    case DENOISER_OPENIMAGEDENOISE:
-      return "OpenImageDenoise";
-
-    case DENOISER_NUM:
-    case DENOISER_NONE:
-    case DENOISER_ALL:
-      return "UNKNOWN";
-  }
-
-  return "UNKNOWN";
-}
-
-const NodeEnum *DenoiseParams::get_type_enum()
-{
-  static NodeEnum type_enum;
-
-  if (type_enum.empty()) {
-    type_enum.insert("optix", DENOISER_OPTIX);
-    type_enum.insert("openimageio", DENOISER_OPENIMAGEDENOISE);
-  }
-
-  return &type_enum;
-}
-
-const NodeEnum *DenoiseParams::get_prefilter_enum()
-{
-  static NodeEnum prefilter_enum;
-
-  if (prefilter_enum.empty()) {
-    prefilter_enum.insert("none", DENOISER_PREFILTER_NONE);
-    prefilter_enum.insert("fast", DENOISER_PREFILTER_FAST);
-    prefilter_enum.insert("accurate", DENOISER_PREFILTER_ACCURATE);
-  }
-
-  return &prefilter_enum;
-}
-
-NODE_DEFINE(DenoiseParams)
-{
-  NodeType *type = NodeType::add("denoise_params", create);
-
-  const NodeEnum *type_enum = get_type_enum();
-  const NodeEnum *prefilter_enum = get_prefilter_enum();
-
-  SOCKET_BOOLEAN(use, "Use", false);
-
-  SOCKET_ENUM(type, "Type", *type_enum, DENOISER_OPENIMAGEDENOISE);
-
-  SOCKET_INT(start_sample, "Start Sample", 0);
-
-  SOCKET_BOOLEAN(use_pass_albedo, "Use Pass Albedo", true);
-  SOCKET_BOOLEAN(use_pass_normal, "Use Pass Normal", false);
-
-  SOCKET_ENUM(prefilter, "Prefilter", *prefilter_enum, DENOISER_PREFILTER_FAST);
-
-  return type;
-}
-
-DenoiseParams::DenoiseParams() : Node(get_node_type())
-{
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/device_denoise.h
+++ b/intern/cycles/device/device_denoise.h
@@ -1,110 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "device/device_memory.h"
-#include "graph/node.h"
-#include "render/buffers.h"
-
-CCL_NAMESPACE_BEGIN
-
-enum DenoiserType {
-  DENOISER_OPTIX = 2,
-  DENOISER_OPENIMAGEDENOISE = 4,
-  DENOISER_NUM,
-
-  DENOISER_NONE = 0,
-  DENOISER_ALL = ~0,
-};
-
-/* COnstruct human-readable string which denotes the denoiser type. */
-const char *denoiserTypeToHumanReadable(DenoiserType type);
-
-typedef int DenoiserTypeMask;
-
-enum DenoiserPrefilter {
-  /* Best quality of the result without extra processing time, but requires guiding passes to be
-   * noise-free. */
-  DENOISER_PREFILTER_NONE = 1,
-
-  /* Denoise color and guiding passes together.
-   * Improves quality when guiding passes are noisy using least amount of extra processing time. */
-  DENOISER_PREFILTER_FAST = 2,
-
-  /* Prefilter noisy guiding passes before denoising color.
-   * Improves quality when guiding passes are noisy using extra processing time. */
-  DENOISER_PREFILTER_ACCURATE = 3,
-
-  DENOISER_PREFILTER_NUM,
-};
-
-/* NOTE: Is not a real scene node. Using Node API for ease of (de)serialization.
- * The default values here do not really matter as they are always initialized from the
- * Integrator node. */
-class DenoiseParams : public Node {
- public:
-  NODE_DECLARE
-
-  /* Apply denoiser to image. */
-  bool use = false;
-
-  /* Denoiser type. */
-  DenoiserType type = DENOISER_OPENIMAGEDENOISE;
-
-  /* Viewport start sample. */
-  int start_sample = 0;
-
-  /* Auxiliary passes. */
-  bool use_pass_albedo = true;
-  bool use_pass_normal = true;
-
-  DenoiserPrefilter prefilter = DENOISER_PREFILTER_FAST;
-
-  static const NodeEnum *get_type_enum();
-  static const NodeEnum *get_prefilter_enum();
-
-  DenoiseParams();
-
-  bool modified(const DenoiseParams &other) const
-  {
-    return !(use == other.use && type == other.type && start_sample == other.start_sample &&
-             use_pass_albedo == other.use_pass_albedo &&
-             use_pass_normal == other.use_pass_normal && prefilter == other.prefilter);
-  }
-};
-
-/* All the parameters needed to perform buffer denoising on a device.
- * Is not really a task in its canonical terms (as in, is not an asynchronous running task). Is
- * more like a wrapper for all the arguments and parameters needed to perform denoising. Is a
- * single place where they are all listed, so that it's not required to modify all device methods
- * when these parameters do change. */
-class DeviceDenoiseTask {
- public:
-  DenoiseParams params;
-
-  int num_samples;
-
-  RenderBuffers *render_buffers;
-  BufferParams buffer_params;
-
-  /* Allow to do in-place modification of the input passes (scaling them down i.e.). This will
-   * lower the memory footprint of the denoiser but will make input passes "invalid" (from path
-   * tracer) point of view. */
-  bool allow_inplace_modification;
-};
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@ -0,0 +1,353 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/device_denoising.h"
+
+#include "kernel/filter/filter_defines.h"
+
+CCL_NAMESPACE_BEGIN
+
+DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task)
+    : tile_info_mem(device, "denoising tile info mem", MEM_READ_WRITE),
+      profiler(NULL),
+      storage(device),
+      buffer(device),
+      device(device)
+{
+  radius = task.denoising.radius;
+  nlm_k_2 = powf(2.0f, lerp(-5.0f, 3.0f, task.denoising.strength));
+  if (task.denoising.relative_pca) {
+    pca_threshold = -powf(10.0f, lerp(-8.0f, 0.0f, task.denoising.feature_strength));
+  }
+  else {
+    pca_threshold = powf(10.0f, lerp(-5.0f, 3.0f, task.denoising.feature_strength));
+  }
+
+  render_buffer.frame_stride = task.frame_stride;
+  render_buffer.pass_stride = task.pass_stride;
+  render_buffer.offset = task.pass_denoising_data;
+
+  target_buffer.pass_stride = task.target_pass_stride;
+  target_buffer.denoising_clean_offset = task.pass_denoising_clean;
+  target_buffer.offset = 0;
+
+  functions.map_neighbor_tiles = function_bind(task.map_neighbor_tiles, _1, device);
+  functions.unmap_neighbor_tiles = function_bind(task.unmap_neighbor_tiles, _1, device);
+
+  tile_info = (TileInfo *)tile_info_mem.alloc(sizeof(TileInfo) / sizeof(int));
+  tile_info->from_render = task.denoising_from_render ? 1 : 0;
+
+  tile_info->frames[0] = 0;
+  tile_info->num_frames = min(task.denoising_frames.size() + 1, DENOISE_MAX_FRAMES);
+  for (int i = 1; i < tile_info->num_frames; i++) {
+    tile_info->frames[i] = task.denoising_frames[i - 1];
+  }
+
+  do_prefilter = task.denoising.store_passes && task.denoising.type == DENOISER_NLM;
+  do_filter = task.denoising.use && task.denoising.type == DENOISER_NLM;
+}
+
+DenoisingTask::~DenoisingTask()
+{
+  storage.XtWX.free();
+  storage.XtWY.free();
+  storage.transform.free();
+  storage.rank.free();
+  buffer.mem.free();
+  buffer.temporary_mem.free();
+  tile_info_mem.free();
+}
+
+void DenoisingTask::set_render_buffer(RenderTileNeighbors &neighbors)
+{
+  for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+    RenderTile &rtile = neighbors.tiles[i];
+    tile_info->offsets[i] = rtile.offset;
+    tile_info->strides[i] = rtile.stride;
+    tile_info->buffers[i] = rtile.buffer;
+  }
+  tile_info->x[0] = neighbors.tiles[3].x;
+  tile_info->x[1] = neighbors.tiles[4].x;
+  tile_info->x[2] = neighbors.tiles[5].x;
+  tile_info->x[3] = neighbors.tiles[5].x + neighbors.tiles[5].w;
+  tile_info->y[0] = neighbors.tiles[1].y;
+  tile_info->y[1] = neighbors.tiles[4].y;
+  tile_info->y[2] = neighbors.tiles[7].y;
+  tile_info->y[3] = neighbors.tiles[7].y + neighbors.tiles[7].h;
+
+  target_buffer.offset = neighbors.target.offset;
+  target_buffer.stride = neighbors.target.stride;
+  target_buffer.ptr = neighbors.target.buffer;
+
+  if (do_prefilter && neighbors.target.buffers) {
+    target_buffer.denoising_output_offset =
+        neighbors.target.buffers->params.get_denoising_prefiltered_offset();
+  }
+  else {
+    target_buffer.denoising_output_offset = 0;
+  }
+
+  tile_info_mem.copy_to_device();
+}
+
+void DenoisingTask::setup_denoising_buffer()
+{
+  /* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring
+   * tiles */
+  rect = rect_from_shape(filter_area.x, filter_area.y, filter_area.z, filter_area.w);
+  rect = rect_expand(rect, radius);
+  rect = rect_clip(rect,
+                   make_int4(tile_info->x[0], tile_info->y[0], tile_info->x[3], tile_info->y[3]));
+
+  buffer.use_intensity = do_prefilter || (tile_info->num_frames > 1);
+  buffer.passes = buffer.use_intensity ? 15 : 14;
+  buffer.width = rect.z - rect.x;
+  buffer.stride = align_up(buffer.width, 4);
+  buffer.h = rect.w - rect.y;
+  int alignment_floats = divide_up(device->mem_sub_ptr_alignment(), sizeof(float));
+  buffer.pass_stride = align_up(buffer.stride * buffer.h, alignment_floats);
+  buffer.frame_stride = buffer.pass_stride * buffer.passes;
+  /* Pad the total size by four floats since the SIMD kernels might go a bit over the end. */
+  int mem_size = align_up(tile_info->num_frames * buffer.frame_stride + 4, alignment_floats);
+  buffer.mem.alloc_to_device(mem_size, false);
+  buffer.use_time = (tile_info->num_frames > 1);
+
+  /* CPUs process shifts sequentially while GPUs process them in parallel. */
+  int num_layers;
+  if (buffer.gpu_temporary_mem) {
+    /* Shadowing prefiltering uses a radius of 6, so allocate at least that much. */
+    int max_radius = max(radius, 6);
+    int num_shifts = (2 * max_radius + 1) * (2 * max_radius + 1);
+    num_layers = 2 * num_shifts + 1;
+  }
+  else {
+    num_layers = 3;
+  }
+  /* Allocate two layers per shift as well as one for the weight accumulation. */
+  buffer.temporary_mem.alloc_to_device(num_layers * buffer.pass_stride);
+}
+
+void DenoisingTask::prefilter_shadowing()
+{
+  device_ptr null_ptr = (device_ptr)0;
+
+  device_sub_ptr unfiltered_a(buffer.mem, 0, buffer.pass_stride);
+  device_sub_ptr unfiltered_b(buffer.mem, 1 * buffer.pass_stride, buffer.pass_stride);
+  device_sub_ptr sample_var(buffer.mem, 2 * buffer.pass_stride, buffer.pass_stride);
+  device_sub_ptr sample_var_var(buffer.mem, 3 * buffer.pass_stride, buffer.pass_stride);
+  device_sub_ptr buffer_var(buffer.mem, 5 * buffer.pass_stride, buffer.pass_stride);
+  device_sub_ptr filtered_var(buffer.mem, 6 * buffer.pass_stride, buffer.pass_stride);
+
+  /* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the
+   * sample variance and the buffer variance. */
+  functions.divide_shadow(*unfiltered_a, *unfiltered_b, *sample_var, *sample_var_var, *buffer_var);
+
+  /* Smooth the (generally pretty noisy) buffer variance using the spatial information from the
+   * sample variance. */
+  nlm_state.set_parameters(6, 3, 4.0f, 1.0f, false);
+  functions.non_local_means(*buffer_var, *sample_var, *sample_var_var, *filtered_var);
+
+  /* Reuse memory, the previous data isn't needed anymore. */
+  device_ptr filtered_a = *buffer_var, filtered_b = *sample_var;
+  /* Use the smoothed variance to filter the two shadow half images using each other for weight
+   * calculation. */
+  nlm_state.set_parameters(5, 3, 1.0f, 0.25f, false);
+  functions.non_local_means(*unfiltered_a, *unfiltered_b, *filtered_var, filtered_a);
+  functions.non_local_means(*unfiltered_b, *unfiltered_a, *filtered_var, filtered_b);
+
+  device_ptr residual_var = *sample_var_var;
+  /* Estimate the residual variance between the two filtered halves. */
+  functions.combine_halves(filtered_a, filtered_b, null_ptr, residual_var, 2, rect);
+
+  device_ptr final_a = *unfiltered_a, final_b = *unfiltered_b;
+  /* Use the residual variance for a second filter pass. */
+  nlm_state.set_parameters(4, 2, 1.0f, 0.5f, false);
+  functions.non_local_means(filtered_a, filtered_b, residual_var, final_a);
+  functions.non_local_means(filtered_b, filtered_a, residual_var, final_b);
+
+  /* Combine the two double-filtered halves to a final shadow feature. */
+  device_sub_ptr shadow_pass(buffer.mem, 4 * buffer.pass_stride, buffer.pass_stride);
+  functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect);
+}
+
+void DenoisingTask::prefilter_features()
+{
+  device_sub_ptr unfiltered(buffer.mem, 8 * buffer.pass_stride, buffer.pass_stride);
+  device_sub_ptr variance(buffer.mem, 9 * buffer.pass_stride, buffer.pass_stride);
+
+  int mean_from[] = {0, 1, 2, 12, 6, 7, 8};
+  int variance_from[] = {3, 4, 5, 13, 9, 10, 11};
+  int pass_to[] = {1, 2, 3, 0, 5, 6, 7};
+  for (int pass = 0; pass < 7; pass++) {
+    device_sub_ptr feature_pass(
+        buffer.mem, pass_to[pass] * buffer.pass_stride, buffer.pass_stride);
+    /* Get the unfiltered pass and its variance from the RenderBuffers. */
+    functions.get_feature(mean_from[pass],
+                          variance_from[pass],
+                          *unfiltered,
+                          *variance,
+                          1.0f / render_buffer.samples);
+    /* Smooth the pass and store the result in the denoising buffers. */
+    nlm_state.set_parameters(2, 2, 1.0f, 0.25f, false);
+    functions.non_local_means(*unfiltered, *unfiltered, *variance, *feature_pass);
+  }
+}
+
+void DenoisingTask::prefilter_color()
+{
+  int mean_from[] = {20, 21, 22};
+  int variance_from[] = {23, 24, 25};
+  int mean_to[] = {8, 9, 10};
+  int variance_to[] = {11, 12, 13};
+  int num_color_passes = 3;
+
+  device_only_memory<float> temporary_color(device, "denoising temporary color");
+  temporary_color.alloc_to_device(6 * buffer.pass_stride, false);
+
+  for (int pass = 0; pass < num_color_passes; pass++) {
+    device_sub_ptr color_pass(temporary_color, pass * buffer.pass_stride, buffer.pass_stride);
+    device_sub_ptr color_var_pass(
+        temporary_color, (pass + 3) * buffer.pass_stride, buffer.pass_stride);
+    functions.get_feature(mean_from[pass],
+                          variance_from[pass],
+                          *color_pass,
+                          *color_var_pass,
+                          1.0f / render_buffer.samples);
+  }
+
+  device_sub_ptr depth_pass(buffer.mem, 0, buffer.pass_stride);
+  device_sub_ptr color_var_pass(
+      buffer.mem, variance_to[0] * buffer.pass_stride, 3 * buffer.pass_stride);
+  device_sub_ptr output_pass(buffer.mem, mean_to[0] * buffer.pass_stride, 3 * buffer.pass_stride);
+  functions.detect_outliers(
+      temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
+
+  if (buffer.use_intensity) {
+    device_sub_ptr intensity_pass(buffer.mem, 14 * buffer.pass_stride, buffer.pass_stride);
+    nlm_state.set_parameters(radius, 4, 2.0f, nlm_k_2 * 4.0f, true);
+    functions.non_local_means(*output_pass, *output_pass, *color_var_pass, *intensity_pass);
+  }
+}
+
+void DenoisingTask::load_buffer()
+{
+  device_ptr null_ptr = (device_ptr)0;
+
+  int original_offset = render_buffer.offset;
+
+  int num_passes = buffer.use_intensity ? 15 : 14;
+  for (int i = 0; i < tile_info->num_frames; i++) {
+    for (int pass = 0; pass < num_passes; pass++) {
+      device_sub_ptr to_pass(
+          buffer.mem, i * buffer.frame_stride + pass * buffer.pass_stride, buffer.pass_stride);
+      bool is_variance = (pass >= 11) && (pass <= 13);
+      functions.get_feature(
+          pass, -1, *to_pass, null_ptr, is_variance ? (1.0f / render_buffer.samples) : 1.0f);
+    }
+    render_buffer.offset += render_buffer.frame_stride;
+  }
+
+  render_buffer.offset = original_offset;
+}
+
+void DenoisingTask::write_buffer()
+{
+  reconstruction_state.buffer_params = make_int4(target_buffer.offset,
+                                                 target_buffer.stride,
+                                                 target_buffer.pass_stride,
+                                                 target_buffer.denoising_clean_offset);
+  int num_passes = buffer.use_intensity ? 15 : 14;
+  for (int pass = 0; pass < num_passes; pass++) {
+    device_sub_ptr from_pass(buffer.mem, pass * buffer.pass_stride, buffer.pass_stride);
+    int out_offset = pass + target_buffer.denoising_output_offset;
+    functions.write_feature(out_offset, *from_pass, target_buffer.ptr);
+  }
+}
+
+void DenoisingTask::construct_transform()
+{
+  storage.w = filter_area.z;
+  storage.h = filter_area.w;
+
+  storage.transform.alloc_to_device(storage.w * storage.h * TRANSFORM_SIZE, false);
+  storage.rank.alloc_to_device(storage.w * storage.h, false);
+
+  functions.construct_transform();
+}
+
+void DenoisingTask::reconstruct()
+{
+  storage.XtWX.alloc_to_device(storage.w * storage.h * XTWX_SIZE, false);
+  storage.XtWY.alloc_to_device(storage.w * storage.h * XTWY_SIZE, false);
+  storage.XtWX.zero_to_device();
+  storage.XtWY.zero_to_device();
+
+  reconstruction_state.filter_window = rect_from_shape(
+      filter_area.x - rect.x, filter_area.y - rect.y, storage.w, storage.h);
+  int tile_coordinate_offset = filter_area.y * target_buffer.stride + filter_area.x;
+  reconstruction_state.buffer_params = make_int4(target_buffer.offset + tile_coordinate_offset,
+                                                 target_buffer.stride,
+                                                 target_buffer.pass_stride,
+                                                 target_buffer.denoising_clean_offset);
+  reconstruction_state.source_w = rect.z - rect.x;
+  reconstruction_state.source_h = rect.w - rect.y;
+
+  device_sub_ptr color_ptr(buffer.mem, 8 * buffer.pass_stride, 3 * buffer.pass_stride);
+  device_sub_ptr color_var_ptr(buffer.mem, 11 * buffer.pass_stride, 3 * buffer.pass_stride);
+  for (int f = 0; f < tile_info->num_frames; f++) {
+    device_ptr scale_ptr = 0;
+    device_sub_ptr *scale_sub_ptr = NULL;
+    if (tile_info->frames[f] != 0 && (tile_info->num_frames > 1)) {
+      scale_sub_ptr = new device_sub_ptr(buffer.mem, 14 * buffer.pass_stride, buffer.pass_stride);
+      scale_ptr = **scale_sub_ptr;
+    }
+
+    functions.accumulate(*color_ptr, *color_var_ptr, scale_ptr, f);
+    delete scale_sub_ptr;
+  }
+  functions.solve(target_buffer.ptr);
+}
+
+void DenoisingTask::run_denoising(RenderTile &tile)
+{
+  RenderTileNeighbors neighbors(tile);
+  functions.map_neighbor_tiles(neighbors);
+  set_render_buffer(neighbors);
+
+  setup_denoising_buffer();
+
+  if (tile_info->from_render) {
+    prefilter_shadowing();
+    prefilter_features();
+    prefilter_color();
+  }
+  else {
+    load_buffer();
+  }
+
+  if (do_filter) {
+    construct_transform();
+    reconstruct();
+  }
+
+  if (do_prefilter) {
+    write_buffer();
+  }
+
+  functions.unmap_neighbor_tiles(neighbors);
+}
+
+CCL_NAMESPACE_END
--- a/intern/cycles/device/device_denoising.h
+++ b/intern/cycles/device/device_denoising.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DEVICE_DENOISING_H__
+#define __DEVICE_DENOISING_H__
+
+#include "device/device.h"
+
+#include "render/buffers.h"
+
+#include "kernel/filter/filter_defines.h"
+
+#include "util/util_profiling.h"
+
+CCL_NAMESPACE_BEGIN
+
+class DenoisingTask {
+ public:
+  /* Parameters of the denoising algorithm. */
+  int radius;
+  float nlm_k_2;
+  float pca_threshold;
+
+  /* Parameters of the RenderBuffers. */
+  struct RenderBuffers {
+    int offset;
+    int pass_stride;
+    int frame_stride;
+    int samples;
+  } render_buffer;
+
+  /* Pointer and parameters of the target buffer. */
+  struct TargetBuffer {
+    int offset;
+    int stride;
+    int pass_stride;
+    int denoising_clean_offset;
+    int denoising_output_offset;
+    device_ptr ptr;
+  } target_buffer;
+
+  TileInfo *tile_info;
+  device_vector<int> tile_info_mem;
+
+  ProfilingState *profiler;
+
+  int4 rect;
+  int4 filter_area;
+
+  bool do_prefilter;
+  bool do_filter;
+
+  struct DeviceFunctions {
+    function<bool(
+        device_ptr image_ptr,    /* Contains the values that are smoothed. */
+        device_ptr guide_ptr,    /* Contains the values that are used to calculate weights. */
+        device_ptr variance_ptr, /* Contains the variance of the guide image. */
+        device_ptr out_ptr       /* The filtered output is written into this image. */
+        )>
+        non_local_means;
+    function<bool(
+        device_ptr color_ptr, device_ptr color_variance_ptr, device_ptr scale_ptr, int frame)>
+        accumulate;
+    function<bool(device_ptr output_ptr)> solve;
+    function<bool()> construct_transform;
+
+    function<bool(device_ptr a_ptr,
+                  device_ptr b_ptr,
+                  device_ptr mean_ptr,
+                  device_ptr variance_ptr,
+                  int r,
+                  int4 rect)>
+        combine_halves;
+    function<bool(device_ptr a_ptr,
+                  device_ptr b_ptr,
+                  device_ptr sample_variance_ptr,
+                  device_ptr sv_variance_ptr,
+                  device_ptr buffer_variance_ptr)>
+        divide_shadow;
+    function<bool(int mean_offset,
+                  int variance_offset,
+                  device_ptr mean_ptr,
+                  device_ptr variance_ptr,
+                  float scale)>
+        get_feature;
+    function<bool(device_ptr image_ptr,
+                  device_ptr variance_ptr,
+                  device_ptr depth_ptr,
+                  device_ptr output_ptr)>
+        detect_outliers;
+    function<bool(int out_offset, device_ptr frop_ptr, device_ptr buffer_ptr)> write_feature;
+    function<void(RenderTileNeighbors &neighbors)> map_neighbor_tiles;
+    function<void(RenderTileNeighbors &neighbors)> unmap_neighbor_tiles;
+  } functions;
+
+  /* Stores state of the current Reconstruction operation,
+   * which is accessed by the device in order to perform the operation. */
+  struct ReconstructionState {
+    int4 filter_window;
+    int4 buffer_params;
+
+    int source_w;
+    int source_h;
+  } reconstruction_state;
+
+  /* Stores state of the current NLM operation,
+   * which is accessed by the device in order to perform the operation. */
+  struct NLMState {
+    int r;     /* Search radius of the filter. */
+    int f;     /* Patch size of the filter. */
+    float a;   /* Variance compensation factor in the MSE estimation. */
+    float k_2; /* Squared value of the k parameter of the filter. */
+    bool is_color;
+
+    void set_parameters(int r_, int f_, float a_, float k_2_, bool is_color_)
+    {
+      r = r_;
+      f = f_;
+      a = a_, k_2 = k_2_;
+      is_color = is_color_;
+    }
+  } nlm_state;
+
+  struct Storage {
+    device_only_memory<float> transform;
+    device_only_memory<int> rank;
+    device_only_memory<float> XtWX;
+    device_only_memory<float3> XtWY;
+    int w;
+    int h;
+
+    Storage(Device *device)
+        : transform(device, "denoising transform"),
+          rank(device, "denoising rank"),
+          XtWX(device, "denoising XtWX"),
+          XtWY(device, "denoising XtWY")
+    {
+    }
+  } storage;
+
+  DenoisingTask(Device *device, const DeviceTask &task);
+  ~DenoisingTask();
+
+  void run_denoising(RenderTile &tile);
+
+  struct DenoiseBuffers {
+    int pass_stride;
+    int passes;
+    int stride;
+    int h;
+    int width;
+    int frame_stride;
+    device_only_memory<float> mem;
+    device_only_memory<float> temporary_mem;
+    bool use_time;
+    bool use_intensity;
+
+    bool gpu_temporary_mem;
+
+    DenoiseBuffers(Device *device)
+        : mem(device, "denoising pixel buffer"),
+          temporary_mem(device, "denoising temporary mem", true)
+    {
+    }
+  } buffer;
+
+ protected:
+  Device *device;
+
+  void set_render_buffer(RenderTileNeighbors &neighbors);
+  void setup_denoising_buffer();
+  void prefilter_shadowing();
+  void prefilter_features();
+  void prefilter_color();
+  void construct_transform();
+  void reconstruct();
+
+  void load_buffer();
+  void write_buffer();
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __DEVICE_DENOISING_H__ */
--- a/intern/cycles/device/device_dummy.cpp
+++ b/intern/cycles/device/device_dummy.cpp
@@ -14,10 +14,8 @@
 * limitations under the License.
 */

-#include "device/dummy/device.h"
-
 #include "device/device.h"
-#include "device/device_queue.h"
+#include "device/device_intern.h"

 CCL_NAMESPACE_BEGIN

@@ -25,8 +23,8 @@ CCL_NAMESPACE_BEGIN

 class DummyDevice : public Device {
 public:
-  DummyDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
-      : Device(info_, stats_, profiler_)
+  DummyDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
+      : Device(info_, stats_, profiler_, background_)
  {
    error_msg = info.error_msg;
  }
@@ -48,7 +46,7 @@ class DummyDevice : public Device {
  {
  }

-  virtual void mem_copy_from(device_memory &, size_t, size_t, size_t, size_t) override
+  virtual void mem_copy_from(device_memory &, int, int, int, int) override
  {
  }

@@ -63,11 +61,23 @@ class DummyDevice : public Device {
  virtual void const_copy_to(const char *, void *, size_t) override
  {
  }
+
+  virtual void task_add(DeviceTask &) override
+  {
+  }
+
+  virtual void task_wait() override
+  {
+  }
+
+  virtual void task_cancel() override
+  {
+  }
 };

-Device *device_dummy_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
+Device *device_dummy_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
 {
-  return new DummyDevice(info, stats, profiler);
+  return new DummyDevice(info, stats, profiler, background);
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/device/device_graphics_interop.h
+++ b/intern/cycles/device/device_graphics_interop.h
@@ -1,42 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "render/display_driver.h"
-
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Device-side graphics interoperability support.
- *
- * Takes care of holding all the handlers needed by the device to implement interoperability with
- * the graphics library. */
-class DeviceGraphicsInterop {
- public:
-  DeviceGraphicsInterop() = default;
-  virtual ~DeviceGraphicsInterop() = default;
-
-  /* Update this device-side graphics interoperability object with the given destination resource
-   * information. */
-  virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) = 0;
-
-  virtual device_ptr map() = 0;
-  virtual void unmap() = 0;
-};
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/device_intern.h
+++ b/intern/cycles/device/device_intern.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DEVICE_INTERN_H__
+#define __DEVICE_INTERN_H__
+
+#include "util/util_string.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Device;
+class DeviceInfo;
+class Profiler;
+class Stats;
+
+Device *device_cpu_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
+bool device_opencl_init();
+Device *device_opencl_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
+bool device_opencl_compile_kernel(const vector<string> &parameters);
+bool device_cuda_init();
+Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
+bool device_optix_init();
+Device *device_optix_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
+Device *device_dummy_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
+
+Device *device_network_create(DeviceInfo &info,
+                              Stats &stats,
+                              Profiler &profiler,
+                              const char *address);
+Device *device_multi_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
+
+void device_cpu_info(vector<DeviceInfo> &devices);
+void device_opencl_info(vector<DeviceInfo> &devices);
+void device_cuda_info(vector<DeviceInfo> &devices);
+void device_optix_info(const vector<DeviceInfo> &cuda_devices, vector<DeviceInfo> &devices);
+void device_network_info(vector<DeviceInfo> &devices);
+
+string device_cpu_capabilities();
+string device_opencl_capabilities();
+string device_cuda_capabilities();
+
+CCL_NAMESPACE_END
+
+#endif /* __DEVICE_INTERN_H__ */
--- a/intern/cycles/device/device_kernel.cpp
+++ b/intern/cycles/device/device_kernel.cpp
@@ -1,157 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/device_kernel.h"
-
-#include "util/util_logging.h"
-
-CCL_NAMESPACE_BEGIN
-
-const char *device_kernel_as_string(DeviceKernel kernel)
-{
-  switch (kernel) {
-    /* Integrator. */
-    case DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA:
-      return "integrator_init_from_camera";
-    case DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE:
-      return "integrator_init_from_bake";
-    case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
-      return "integrator_intersect_closest";
-    case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
-      return "integrator_intersect_shadow";
-    case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
-      return "integrator_intersect_subsurface";
-    case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
-      return "integrator_intersect_volume_stack";
-    case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
-      return "integrator_shade_background";
-    case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
-      return "integrator_shade_light";
-    case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
-      return "integrator_shade_shadow";
-    case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
-      return "integrator_shade_surface";
-    case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
-      return "integrator_shade_surface_raytrace";
-    case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
-      return "integrator_shade_volume";
-    case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL:
-      return "integrator_megakernel";
-    case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY:
-      return "integrator_queued_paths_array";
-    case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY:
-      return "integrator_queued_shadow_paths_array";
-    case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY:
-      return "integrator_active_paths_array";
-    case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY:
-      return "integrator_terminated_paths_array";
-    case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
-      return "integrator_sorted_paths_array";
-    case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
-      return "integrator_compact_paths_array";
-    case DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES:
-      return "integrator_compact_states";
-    case DEVICE_KERNEL_INTEGRATOR_RESET:
-      return "integrator_reset";
-    case DEVICE_KERNEL_INTEGRATOR_SHADOW_CATCHER_COUNT_POSSIBLE_SPLITS:
-      return "integrator_shadow_catcher_count_possible_splits";
-
-    /* Shader evaluation. */
-    case DEVICE_KERNEL_SHADER_EVAL_DISPLACE:
-      return "shader_eval_displace";
-    case DEVICE_KERNEL_SHADER_EVAL_BACKGROUND:
-      return "shader_eval_background";
-
-      /* Film. */
-
-#define FILM_CONVERT_KERNEL_AS_STRING(variant, variant_lowercase) \
-  case DEVICE_KERNEL_FILM_CONVERT_##variant: \
-    return "film_convert_" #variant_lowercase; \
-  case DEVICE_KERNEL_FILM_CONVERT_##variant##_HALF_RGBA: \
-    return "film_convert_" #variant_lowercase "_half_rgba";
-
-      FILM_CONVERT_KERNEL_AS_STRING(DEPTH, depth)
-      FILM_CONVERT_KERNEL_AS_STRING(MIST, mist)
-      FILM_CONVERT_KERNEL_AS_STRING(SAMPLE_COUNT, sample_count)
-      FILM_CONVERT_KERNEL_AS_STRING(FLOAT, float)
-      FILM_CONVERT_KERNEL_AS_STRING(LIGHT_PATH, light_path)
-      FILM_CONVERT_KERNEL_AS_STRING(FLOAT3, float3)
-      FILM_CONVERT_KERNEL_AS_STRING(MOTION, motion)
-      FILM_CONVERT_KERNEL_AS_STRING(CRYPTOMATTE, cryptomatte)
-      FILM_CONVERT_KERNEL_AS_STRING(SHADOW_CATCHER, shadow_catcher)
-      FILM_CONVERT_KERNEL_AS_STRING(SHADOW_CATCHER_MATTE_WITH_SHADOW,
-                                    shadow_catcher_matte_with_shadow)
-      FILM_CONVERT_KERNEL_AS_STRING(COMBINED, combined)
-      FILM_CONVERT_KERNEL_AS_STRING(FLOAT4, float4)
-
-#undef FILM_CONVERT_KERNEL_AS_STRING
-
-    /* Adaptive sampling. */
-    case DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_CHECK:
-      return "adaptive_sampling_convergence_check";
-    case DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_X:
-      return "adaptive_sampling_filter_x";
-    case DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_Y:
-      return "adaptive_sampling_filter_y";
-
-    /* Denoising. */
-    case DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS:
-      return "filter_guiding_preprocess";
-    case DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO:
-      return "filter_guiding_set_fake_albedo";
-    case DEVICE_KERNEL_FILTER_COLOR_PREPROCESS:
-      return "filter_color_preprocess";
-    case DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS:
-      return "filter_color_postprocess";
-
-    /* Cryptomatte. */
-    case DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS:
-      return "cryptomatte_postprocess";
-
-    /* Generic */
-    case DEVICE_KERNEL_PREFIX_SUM:
-      return "prefix_sum";
-
-    case DEVICE_KERNEL_NUM:
-      break;
-  };
-  LOG(FATAL) << "Unhandled kernel " << static_cast<int>(kernel) << ", should never happen.";
-  return "UNKNOWN";
-}
-
-std::ostream &operator<<(std::ostream &os, DeviceKernel kernel)
-{
-  os << device_kernel_as_string(kernel);
-  return os;
-}
-
-string device_kernel_mask_as_string(DeviceKernelMask mask)
-{
-  string str;
-
-  for (uint64_t i = 0; i < sizeof(DeviceKernelMask) * 8; i++) {
-    if (mask & (uint64_t(1) << i)) {
-      if (!str.empty()) {
-        str += " ";
-      }
-      str += device_kernel_as_string((DeviceKernel)i);
-    }
-  }
-
-  return str;
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/device_kernel.h
+++ b/intern/cycles/device/device_kernel.h
@@ -1,33 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/kernel_types.h"
-
-#include "util/util_string.h"
-
-#include <ostream>  // NOLINT
-
-CCL_NAMESPACE_BEGIN
-
-const char *device_kernel_as_string(DeviceKernel kernel);
-std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);
-
-typedef uint64_t DeviceKernelMask;
-string device_kernel_mask_as_string(DeviceKernelMask mask);
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/device_memory.cpp
+++ b/intern/cycles/device/device_memory.cpp
@@ -23,7 +23,7 @@ CCL_NAMESPACE_BEGIN

 device_memory::device_memory(Device *device, const char *name, MemoryType type)
    : data_type(device_type_traits<uchar>::data_type),
-      data_elements(device_type_traits<uchar>::num_elements_cpu),
+      data_elements(device_type_traits<uchar>::num_elements),
      data_size(0),
      device_size(0),
      data_width(0),
@@ -136,7 +136,7 @@ void device_memory::device_copy_to()
  }
 }

-void device_memory::device_copy_from(size_t y, size_t w, size_t h, size_t elem)
+void device_memory::device_copy_from(int y, int w, int h, int elem)
 {
  assert(type != MEM_TEXTURE && type != MEM_READ_ONLY && type != MEM_GLOBAL);
  device->mem_copy_from(*this, y, w, h, elem);
@@ -149,11 +149,6 @@ void device_memory::device_zero()
  }
 }

-bool device_memory::device_is_cpu()
-{
-  return (device->info.type == DEVICE_CPU);
-}
-
 void device_memory::swap_device(Device *new_device,
                                size_t new_device_size,
                                device_ptr new_device_ptr)
@@ -181,7 +176,7 @@ bool device_memory::is_resident(Device *sub_device) const

 /* Device Sub Ptr */

-device_sub_ptr::device_sub_ptr(device_memory &mem, size_t offset, size_t size) : device(mem.device)
+device_sub_ptr::device_sub_ptr(device_memory &mem, int offset, int size) : device(mem.device)
 {
  ptr = device->mem_alloc_sub_ptr(mem, offset, size);
 }
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -38,6 +38,7 @@ enum MemoryType {
  MEM_DEVICE_ONLY,
  MEM_GLOBAL,
  MEM_TEXTURE,
+  MEM_PIXELS
 };

 /* Supported Data Types */
@@ -53,7 +54,7 @@ enum DataType {
  TYPE_UINT64,
 };

-static constexpr size_t datatype_size(DataType datatype)
+static inline size_t datatype_size(DataType datatype)
 {
  switch (datatype) {
    case TYPE_UNKNOWN:
@@ -81,155 +82,112 @@ static constexpr size_t datatype_size(DataType datatype)

 template<typename T> struct device_type_traits {
  static const DataType data_type = TYPE_UNKNOWN;
-  static const size_t num_elements_cpu = sizeof(T);
-  static const size_t num_elements_gpu = sizeof(T);
+  static const int num_elements = sizeof(T);
 };

 template<> struct device_type_traits<uchar> {
  static const DataType data_type = TYPE_UCHAR;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
-  static_assert(sizeof(uchar) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 1;
 };

 template<> struct device_type_traits<uchar2> {
  static const DataType data_type = TYPE_UCHAR;
-  static const size_t num_elements_cpu = 2;
-  static const size_t num_elements_gpu = 2;
-  static_assert(sizeof(uchar2) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 2;
 };

 template<> struct device_type_traits<uchar3> {
  static const DataType data_type = TYPE_UCHAR;
-  static const size_t num_elements_cpu = 3;
-  static const size_t num_elements_gpu = 3;
-  static_assert(sizeof(uchar3) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 3;
 };

 template<> struct device_type_traits<uchar4> {
  static const DataType data_type = TYPE_UCHAR;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
-  static_assert(sizeof(uchar4) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 4;
 };

 template<> struct device_type_traits<uint> {
  static const DataType data_type = TYPE_UINT;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
-  static_assert(sizeof(uint) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 1;
 };

 template<> struct device_type_traits<uint2> {
  static const DataType data_type = TYPE_UINT;
-  static const size_t num_elements_cpu = 2;
-  static const size_t num_elements_gpu = 2;
-  static_assert(sizeof(uint2) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 2;
 };

 template<> struct device_type_traits<uint3> {
  static const DataType data_type = TYPE_UINT;
-  static const size_t num_elements_cpu = 3;
-  static const size_t num_elements_gpu = 3;
-  static_assert(sizeof(uint3) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 3;
 };

 template<> struct device_type_traits<uint4> {
  static const DataType data_type = TYPE_UINT;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
-  static_assert(sizeof(uint4) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 4;
 };

 template<> struct device_type_traits<int> {
  static const DataType data_type = TYPE_INT;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
-  static_assert(sizeof(int) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 1;
 };

 template<> struct device_type_traits<int2> {
  static const DataType data_type = TYPE_INT;
-  static const size_t num_elements_cpu = 2;
-  static const size_t num_elements_gpu = 2;
-  static_assert(sizeof(int2) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 2;
 };

 template<> struct device_type_traits<int3> {
  static const DataType data_type = TYPE_INT;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 3;
-  static_assert(sizeof(int3) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 3;
 };

 template<> struct device_type_traits<int4> {
  static const DataType data_type = TYPE_INT;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
-  static_assert(sizeof(int4) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 4;
 };

 template<> struct device_type_traits<float> {
  static const DataType data_type = TYPE_FLOAT;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
-  static_assert(sizeof(float) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 1;
 };

 template<> struct device_type_traits<float2> {
  static const DataType data_type = TYPE_FLOAT;
-  static const size_t num_elements_cpu = 2;
-  static const size_t num_elements_gpu = 2;
-  static_assert(sizeof(float2) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 2;
 };

 template<> struct device_type_traits<float3> {
  static const DataType data_type = TYPE_FLOAT;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 3;
-  static_assert(sizeof(float3) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 4;
 };

 template<> struct device_type_traits<float4> {
  static const DataType data_type = TYPE_FLOAT;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
-  static_assert(sizeof(float4) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 4;
 };

 template<> struct device_type_traits<half> {
  static const DataType data_type = TYPE_HALF;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
-  static_assert(sizeof(half) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 1;
 };

 template<> struct device_type_traits<ushort4> {
  static const DataType data_type = TYPE_UINT16;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
-  static_assert(sizeof(ushort4) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 4;
 };

 template<> struct device_type_traits<uint16_t> {
  static const DataType data_type = TYPE_UINT16;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
-  static_assert(sizeof(uint16_t) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 1;
 };

 template<> struct device_type_traits<half4> {
  static const DataType data_type = TYPE_HALF;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
-  static_assert(sizeof(half4) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 4;
 };

 template<> struct device_type_traits<uint64_t> {
  static const DataType data_type = TYPE_UINT64;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
-  static_assert(sizeof(uint64_t) == num_elements_cpu * datatype_size(data_type));
+  static const int num_elements = 1;
 };

 /* Device Memory
@@ -277,7 +235,6 @@ class device_memory {
 protected:
  friend class CUDADevice;
  friend class OptiXDevice;
-  friend class HIPDevice;

  /* Only create through subclasses. */
  device_memory(Device *device, const char *name, MemoryType type);
@@ -297,11 +254,9 @@ class device_memory {
  void device_alloc();
  void device_free();
  void device_copy_to();
-  void device_copy_from(size_t y, size_t w, size_t h, size_t elem);
+  void device_copy_from(int y, int w, int h, int elem);
  void device_zero();

-  bool device_is_cpu();
-
  device_ptr original_device_ptr;
  size_t original_device_size;
  Device *original_device;
@@ -320,9 +275,7 @@ template<typename T> class device_only_memory : public device_memory {
      : device_memory(device, name, allow_host_memory_fallback ? MEM_READ_WRITE : MEM_DEVICE_ONLY)
  {
    data_type = device_type_traits<T>::data_type;
-    data_elements = max(device_is_cpu() ? device_type_traits<T>::num_elements_cpu :
-                                          device_type_traits<T>::num_elements_gpu,
-                        1);
+    data_elements = max(device_type_traits<T>::num_elements, 1);
  }

  device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other))
@@ -378,15 +331,11 @@ template<typename T> class device_only_memory : public device_memory {

 template<typename T> class device_vector : public device_memory {
 public:
-  /* Can only use this for types that have the same size on CPU and GPU. */
-  static_assert(device_type_traits<T>::num_elements_cpu ==
-                device_type_traits<T>::num_elements_gpu);
-
  device_vector(Device *device, const char *name, MemoryType type)
      : device_memory(device, name, type)
  {
    data_type = device_type_traits<T>::data_type;
-    data_elements = device_type_traits<T>::num_elements_cpu;
+    data_elements = device_type_traits<T>::num_elements;
    modified = true;
    need_realloc_ = true;

@@ -528,11 +477,6 @@ template<typename T> class device_vector : public device_memory {
    return (T *)host_pointer;
  }

-  const T *data() const
-  {
-    return (T *)host_pointer;
-  }
-
  T &operator[](size_t i)
  {
    assert(i < data_size);
@@ -563,10 +507,10 @@ template<typename T> class device_vector : public device_memory {

  void copy_from_device()
  {
-    device_copy_from(0, data_width, (data_height == 0) ? 1 : data_height, sizeof(T));
+    device_copy_from(0, data_width, data_height, sizeof(T));
  }

-  void copy_from_device(size_t y, size_t w, size_t h)
+  void copy_from_device(int y, int w, int h)
  {
    device_copy_from(y, w, h, sizeof(T));
  }
@@ -591,6 +535,33 @@ template<typename T> class device_vector : public device_memory {
  }
 };

+/* Pixel Memory
+ *
+ * Device memory to efficiently draw as pixels to the screen in interactive
+ * rendering. Only copying pixels from the device is supported, not copying to. */
+
+template<typename T> class device_pixels : public device_vector<T> {
+ public:
+  device_pixels(Device *device, const char *name) : device_vector<T>(device, name, MEM_PIXELS)
+  {
+  }
+
+  void alloc_to_device(size_t width, size_t height, size_t depth = 0)
+  {
+    device_vector<T>::alloc(width, height, depth);
+
+    if (!device_memory::device_pointer) {
+      device_memory::device_alloc();
+    }
+  }
+
+  T *copy_from_device(int y, int w, int h)
+  {
+    device_memory::device_copy_from(y, w, h, sizeof(T));
+    return device_vector<T>::data();
+  }
+};
+
 /* Device Sub Memory
 *
 * Pointer into existing memory. It is not allocated separately, but created
@@ -602,7 +573,7 @@ template<typename T> class device_vector : public device_memory {

 class device_sub_ptr {
 public:
-  device_sub_ptr(device_memory &mem, size_t offset, size_t size);
+  device_sub_ptr(device_memory &mem, int offset, int size);
  ~device_sub_ptr();

  device_ptr operator*() const
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -0,0 +1,826 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sstream>
+#include <stdlib.h>
+
+#include "bvh/bvh_multi.h"
+
+#include "device/device.h"
+#include "device/device_intern.h"
+#include "device/device_network.h"
+
+#include "render/buffers.h"
+#include "render/geometry.h"
+
+#include "util/util_foreach.h"
+#include "util/util_list.h"
+#include "util/util_logging.h"
+#include "util/util_map.h"
+#include "util/util_time.h"
+
+CCL_NAMESPACE_BEGIN
+
+class MultiDevice : public Device {
+ public:
+  struct SubDevice {
+    Stats stats;
+    Device *device;
+    map<device_ptr, device_ptr> ptr_map;
+    int peer_island_index = -1;
+  };
+
+  list<SubDevice> devices, denoising_devices;
+  device_ptr unique_key;
+  vector<vector<SubDevice *>> peer_islands;
+  bool use_denoising;
+  bool matching_rendering_and_denoising_devices;
+
+  MultiDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
+      : Device(info, stats, profiler, background_),
+        unique_key(1),
+        use_denoising(!info.denoising_devices.empty())
+  {
+    foreach (DeviceInfo &subinfo, info.multi_devices) {
+      /* Always add CPU devices at the back since GPU devices can change
+       * host memory pointers, which CPU uses as device pointer. */
+      SubDevice *sub;
+      if (subinfo.type == DEVICE_CPU) {
+        devices.emplace_back();
+        sub = &devices.back();
+      }
+      else {
+        devices.emplace_front();
+        sub = &devices.front();
+      }
+
+      /* The pointer to 'sub->stats' will stay valid even after new devices
+       * are added, since 'devices' is a linked list. */
+      sub->device = Device::create(subinfo, sub->stats, profiler, background);
+    }
+
+    foreach (DeviceInfo &subinfo, info.denoising_devices) {
+      denoising_devices.emplace_front();
+      SubDevice *sub = &denoising_devices.front();
+
+      sub->device = Device::create(subinfo, sub->stats, profiler, background);
+    }
+
+    /* Build a list of peer islands for the available render devices */
+    foreach (SubDevice &sub, devices) {
+      /* First ensure that every device is in at least once peer island */
+      if (sub.peer_island_index < 0) {
+        peer_islands.emplace_back();
+        sub.peer_island_index = (int)peer_islands.size() - 1;
+        peer_islands[sub.peer_island_index].push_back(&sub);
+      }
+
+      if (!info.has_peer_memory) {
+        continue;
+      }
+
+      /* Second check peer access between devices and fill up the islands accordingly */
+      foreach (SubDevice &peer_sub, devices) {
+        if (peer_sub.peer_island_index < 0 &&
+            peer_sub.device->info.type == sub.device->info.type &&
+            peer_sub.device->check_peer_access(sub.device)) {
+          peer_sub.peer_island_index = sub.peer_island_index;
+          peer_islands[sub.peer_island_index].push_back(&peer_sub);
+        }
+      }
+    }
+
+    /* Try to re-use memory when denoising and render devices use the same physical devices
+     * (e.g. OptiX denoising and CUDA rendering device pointing to the same GPU).
+     * Ordering has to match as well, so that 'DeviceTask::split' behaves consistent. */
+    matching_rendering_and_denoising_devices = denoising_devices.empty() ||
+                                               (devices.size() == denoising_devices.size());
+    if (matching_rendering_and_denoising_devices) {
+      for (list<SubDevice>::iterator device_it = devices.begin(),
+                                     denoising_device_it = denoising_devices.begin();
+           device_it != devices.end() && denoising_device_it != denoising_devices.end();
+           ++device_it, ++denoising_device_it) {
+        const DeviceInfo &info = device_it->device->info;
+        const DeviceInfo &denoising_info = denoising_device_it->device->info;
+        if ((info.type != DEVICE_CUDA && info.type != DEVICE_OPTIX) ||
+            (denoising_info.type != DEVICE_CUDA && denoising_info.type != DEVICE_OPTIX) ||
+            info.num != denoising_info.num) {
+          matching_rendering_and_denoising_devices = false;
+          break;
+        }
+      }
+    }
+
+#ifdef WITH_NETWORK
+    /* try to add network devices */
+    ServerDiscovery discovery(true);
+    time_sleep(1.0);
+
+    vector<string> servers = discovery.get_server_list();
+
+    foreach (string &server, servers) {
+      Device *device = device_network_create(info, stats, profiler, server.c_str());
+      if (device)
+        devices.push_back(SubDevice(device));
+    }
+#endif
+  }
+
+  ~MultiDevice()
+  {
+    foreach (SubDevice &sub, devices)
+      delete sub.device;
+    foreach (SubDevice &sub, denoising_devices)
+      delete sub.device;
+  }
+
+  const string &error_message() override
+  {
+    error_msg.clear();
+
+    foreach (SubDevice &sub, devices)
+      error_msg += sub.device->error_message();
+    foreach (SubDevice &sub, denoising_devices)
+      error_msg += sub.device->error_message();
+
+    return error_msg;
+  }
+
+  virtual bool show_samples() const override
+  {
+    if (devices.size() > 1) {
+      return false;
+    }
+    return devices.front().device->show_samples();
+  }
+
+  virtual BVHLayoutMask get_bvh_layout_mask() const override
+  {
+    BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
+    BVHLayoutMask bvh_layout_mask_all = BVH_LAYOUT_NONE;
+    foreach (const SubDevice &sub_device, devices) {
+      BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask();
+      bvh_layout_mask &= device_bvh_layout_mask;
+      bvh_layout_mask_all |= device_bvh_layout_mask;
+    }
+
+    /* With multiple OptiX devices, every device needs its own acceleration structure */
+    if (bvh_layout_mask == BVH_LAYOUT_OPTIX) {
+      return BVH_LAYOUT_MULTI_OPTIX;
+    }
+
+    /* When devices do not share a common BVH layout, fall back to creating one for each */
+    const BVHLayoutMask BVH_LAYOUT_OPTIX_EMBREE = (BVH_LAYOUT_OPTIX | BVH_LAYOUT_EMBREE);
+    if ((bvh_layout_mask_all & BVH_LAYOUT_OPTIX_EMBREE) == BVH_LAYOUT_OPTIX_EMBREE) {
+      return BVH_LAYOUT_MULTI_OPTIX_EMBREE;
+    }
+
+    return bvh_layout_mask;
+  }
+
+  bool load_kernels(const DeviceRequestedFeatures &requested_features) override
+  {
+    foreach (SubDevice &sub, devices)
+      if (!sub.device->load_kernels(requested_features))
+        return false;
+
+    use_denoising = requested_features.use_denoising;
+    if (requested_features.use_denoising) {
+      /* Only need denoising feature, everything else is unused. */
+      DeviceRequestedFeatures denoising_features;
+      denoising_features.use_denoising = true;
+      foreach (SubDevice &sub, denoising_devices)
+        if (!sub.device->load_kernels(denoising_features))
+          return false;
+    }
+
+    return true;
+  }
+
+  bool wait_for_availability(const DeviceRequestedFeatures &requested_features) override
+  {
+    foreach (SubDevice &sub, devices)
+      if (!sub.device->wait_for_availability(requested_features))
+        return false;
+
+    if (requested_features.use_denoising) {
+      foreach (SubDevice &sub, denoising_devices)
+        if (!sub.device->wait_for_availability(requested_features))
+          return false;
+    }
+
+    return true;
+  }
+
+  DeviceKernelStatus get_active_kernel_switch_state() override
+  {
+    DeviceKernelStatus result = DEVICE_KERNEL_USING_FEATURE_KERNEL;
+
+    foreach (SubDevice &sub, devices) {
+      DeviceKernelStatus subresult = sub.device->get_active_kernel_switch_state();
+      switch (subresult) {
+        case DEVICE_KERNEL_FEATURE_KERNEL_INVALID:
+        case DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE:
+          return subresult;
+
+        case DEVICE_KERNEL_USING_FEATURE_KERNEL:
+        case DEVICE_KERNEL_UNKNOWN:
+          break;
+      }
+    }
+
+    return result;
+  }
+
+  void build_bvh(BVH *bvh, Progress &progress, bool refit) override
+  {
+    /* Try to build and share a single acceleration structure, if possible */
+    if (bvh->params.bvh_layout == BVH_LAYOUT_BVH2 || bvh->params.bvh_layout == BVH_LAYOUT_EMBREE) {
+      devices.back().device->build_bvh(bvh, progress, refit);
+      return;
+    }
+
+    assert(bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX ||
+           bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE);
+
+    BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh);
+    bvh_multi->sub_bvhs.resize(devices.size());
+
+    vector<BVHMulti *> geom_bvhs;
+    geom_bvhs.reserve(bvh->geometry.size());
+    foreach (Geometry *geom, bvh->geometry) {
+      geom_bvhs.push_back(static_cast<BVHMulti *>(geom->bvh));
+    }
+
+    /* Broadcast acceleration structure build to all render devices */
+    size_t i = 0;
+    foreach (SubDevice &sub, devices) {
+      /* Change geometry BVH pointers to the sub BVH */
+      for (size_t k = 0; k < bvh->geometry.size(); ++k) {
+        bvh->geometry[k]->bvh = geom_bvhs[k]->sub_bvhs[i];
+      }
+
+      if (!bvh_multi->sub_bvhs[i]) {
+        BVHParams params = bvh->params;
+        if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX)
+          params.bvh_layout = BVH_LAYOUT_OPTIX;
+        else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE)
+          params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
+                                                                      BVH_LAYOUT_EMBREE;
+
+        /* Skip building a bottom level acceleration structure for non-instanced geometry on Embree
+         * (since they are put into the top level directly, see bvh_embree.cpp) */
+        if (!params.top_level && params.bvh_layout == BVH_LAYOUT_EMBREE &&
+            !bvh->geometry[0]->is_instanced()) {
+          i++;
+          continue;
+        }
+
+        bvh_multi->sub_bvhs[i] = BVH::create(params, bvh->geometry, bvh->objects, sub.device);
+      }
+
+      sub.device->build_bvh(bvh_multi->sub_bvhs[i], progress, refit);
+      i++;
+    }
+
+    /* Change geometry BVH pointers back to the multi BVH. */
+    for (size_t k = 0; k < bvh->geometry.size(); ++k) {
+      bvh->geometry[k]->bvh = geom_bvhs[k];
+    }
+  }
+
+  virtual void *osl_memory() override
+  {
+    if (devices.size() > 1) {
+      return NULL;
+    }
+    return devices.front().device->osl_memory();
+  }
+
+  bool is_resident(device_ptr key, Device *sub_device) override
+  {
+    foreach (SubDevice &sub, devices) {
+      if (sub.device == sub_device) {
+        return find_matching_mem_device(key, sub)->device == sub_device;
+      }
+    }
+    return false;
+  }
+
+  SubDevice *find_matching_mem_device(device_ptr key, SubDevice &sub)
+  {
+    assert(key != 0 && (sub.peer_island_index >= 0 || sub.ptr_map.find(key) != sub.ptr_map.end()));
+
+    /* Get the memory owner of this key (first try current device, then peer devices) */
+    SubDevice *owner_sub = &sub;
+    if (owner_sub->ptr_map.find(key) == owner_sub->ptr_map.end()) {
+      foreach (SubDevice *island_sub, peer_islands[sub.peer_island_index]) {
+        if (island_sub != owner_sub &&
+            island_sub->ptr_map.find(key) != island_sub->ptr_map.end()) {
+          owner_sub = island_sub;
+        }
+      }
+    }
+    return owner_sub;
+  }
+
+  SubDevice *find_suitable_mem_device(device_ptr key, const vector<SubDevice *> &island)
+  {
+    assert(!island.empty());
+
+    /* Get the memory owner of this key or the device with the lowest memory usage when new */
+    SubDevice *owner_sub = island.front();
+    foreach (SubDevice *island_sub, island) {
+      if (key ? (island_sub->ptr_map.find(key) != island_sub->ptr_map.end()) :
+                (island_sub->device->stats.mem_used < owner_sub->device->stats.mem_used)) {
+        owner_sub = island_sub;
+      }
+    }
+    return owner_sub;
+  }
+
+  inline device_ptr find_matching_mem(device_ptr key, SubDevice &sub)
+  {
+    return find_matching_mem_device(key, sub)->ptr_map[key];
+  }
+
+  void mem_alloc(device_memory &mem) override
+  {
+    device_ptr key = unique_key++;
+
+    if (mem.type == MEM_PIXELS) {
+      /* Always allocate pixels memory on all devices
+       * This is necessary to ensure PBOs are registered everywhere, which FILM_CONVERT uses */
+      foreach (SubDevice &sub, devices) {
+        mem.device = sub.device;
+        mem.device_pointer = 0;
+        mem.device_size = 0;
+
+        sub.device->mem_alloc(mem);
+        sub.ptr_map[key] = mem.device_pointer;
+      }
+    }
+    else {
+      assert(mem.type == MEM_READ_ONLY || mem.type == MEM_READ_WRITE ||
+             mem.type == MEM_DEVICE_ONLY);
+      /* The remaining memory types can be distributed across devices */
+      foreach (const vector<SubDevice *> &island, peer_islands) {
+        SubDevice *owner_sub = find_suitable_mem_device(key, island);
+        mem.device = owner_sub->device;
+        mem.device_pointer = 0;
+        mem.device_size = 0;
+
+        owner_sub->device->mem_alloc(mem);
+        owner_sub->ptr_map[key] = mem.device_pointer;
+      }
+    }
+
+    mem.device = this;
+    mem.device_pointer = key;
+    stats.mem_alloc(mem.device_size);
+  }
+
+  void mem_copy_to(device_memory &mem) override
+  {
+    device_ptr existing_key = mem.device_pointer;
+    device_ptr key = (existing_key) ? existing_key : unique_key++;
+    size_t existing_size = mem.device_size;
+
+    /* The tile buffers are allocated on each device (see below), so copy to all of them */
+    if (strcmp(mem.name, "RenderBuffers") == 0 && use_denoising) {
+      foreach (SubDevice &sub, devices) {
+        mem.device = sub.device;
+        mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
+        mem.device_size = existing_size;
+
+        sub.device->mem_copy_to(mem);
+        sub.ptr_map[key] = mem.device_pointer;
+      }
+    }
+    else {
+      foreach (const vector<SubDevice *> &island, peer_islands) {
+        SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
+        mem.device = owner_sub->device;
+        mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
+        mem.device_size = existing_size;
+
+        owner_sub->device->mem_copy_to(mem);
+        owner_sub->ptr_map[key] = mem.device_pointer;
+
+        if (mem.type == MEM_GLOBAL || mem.type == MEM_TEXTURE) {
+          /* Need to create texture objects and update pointer in kernel globals on all devices */
+          foreach (SubDevice *island_sub, island) {
+            if (island_sub != owner_sub) {
+              island_sub->device->mem_copy_to(mem);
+            }
+          }
+        }
+      }
+    }
+
+    mem.device = this;
+    mem.device_pointer = key;
+    stats.mem_alloc(mem.device_size - existing_size);
+  }
+
+  void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override
+  {
+    device_ptr key = mem.device_pointer;
+    int i = 0, sub_h = h / devices.size();
+
+    foreach (SubDevice &sub, devices) {
+      int sy = y + i * sub_h;
+      int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;
+
+      SubDevice *owner_sub = find_matching_mem_device(key, sub);
+      mem.device = owner_sub->device;
+      mem.device_pointer = owner_sub->ptr_map[key];
+
+      owner_sub->device->mem_copy_from(mem, sy, w, sh, elem);
+      i++;
+    }
+
+    mem.device = this;
+    mem.device_pointer = key;
+  }
+
+  void mem_zero(device_memory &mem) override
+  {
+    device_ptr existing_key = mem.device_pointer;
+    device_ptr key = (existing_key) ? existing_key : unique_key++;
+    size_t existing_size = mem.device_size;
+
+    /* This is a hack to only allocate the tile buffers on denoising devices
+     * Similarly the tile buffers also need to be allocated separately on all devices so any
+     * overlap rendered for denoising does not interfere with each other */
+    if (strcmp(mem.name, "RenderBuffers") == 0 && use_denoising) {
+      vector<device_ptr> device_pointers;
+      device_pointers.reserve(devices.size());
+
+      foreach (SubDevice &sub, devices) {
+        mem.device = sub.device;
+        mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
+        mem.device_size = existing_size;
+
+        sub.device->mem_zero(mem);
+        sub.ptr_map[key] = mem.device_pointer;
+
+        device_pointers.push_back(mem.device_pointer);
+      }
+      foreach (SubDevice &sub, denoising_devices) {
+        if (matching_rendering_and_denoising_devices) {
+          sub.ptr_map[key] = device_pointers.front();
+          device_pointers.erase(device_pointers.begin());
+        }
+        else {
+          mem.device = sub.device;
+          mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
+          mem.device_size = existing_size;
+
+          sub.device->mem_zero(mem);
+          sub.ptr_map[key] = mem.device_pointer;
+        }
+      }
+    }
+    else {
+      foreach (const vector<SubDevice *> &island, peer_islands) {
+        SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
+        mem.device = owner_sub->device;
+        mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
+        mem.device_size = existing_size;
+
+        owner_sub->device->mem_zero(mem);
+        owner_sub->ptr_map[key] = mem.device_pointer;
+      }
+    }
+
+    mem.device = this;
+    mem.device_pointer = key;
+    stats.mem_alloc(mem.device_size - existing_size);
+  }
+
+  void mem_free(device_memory &mem) override
+  {
+    device_ptr key = mem.device_pointer;
+    size_t existing_size = mem.device_size;
+
+    /* Free memory that was allocated for all devices (see above) on each device */
+    if (mem.type == MEM_PIXELS || (strcmp(mem.name, "RenderBuffers") == 0 && use_denoising)) {
+      foreach (SubDevice &sub, devices) {
+        mem.device = sub.device;
+        mem.device_pointer = sub.ptr_map[key];
+        mem.device_size = existing_size;
+
+        sub.device->mem_free(mem);
+        sub.ptr_map.erase(sub.ptr_map.find(key));
+      }
+      foreach (SubDevice &sub, denoising_devices) {
+        if (matching_rendering_and_denoising_devices) {
+          sub.ptr_map.erase(key);
+        }
+        else {
+          mem.device = sub.device;
+          mem.device_pointer = sub.ptr_map[key];
+          mem.device_size = existing_size;
+
+          sub.device->mem_free(mem);
+          sub.ptr_map.erase(sub.ptr_map.find(key));
+        }
+      }
+    }
+    else {
+      foreach (const vector<SubDevice *> &island, peer_islands) {
+        SubDevice *owner_sub = find_matching_mem_device(key, *island.front());
+        mem.device = owner_sub->device;
+        mem.device_pointer = owner_sub->ptr_map[key];
+        mem.device_size = existing_size;
+
+        owner_sub->device->mem_free(mem);
+        owner_sub->ptr_map.erase(owner_sub->ptr_map.find(key));
+
+        if (mem.type == MEM_TEXTURE) {
+          /* Free texture objects on all devices */
+          foreach (SubDevice *island_sub, island) {
+            if (island_sub != owner_sub) {
+              island_sub->device->mem_free(mem);
+            }
+          }
+        }
+      }
+    }
+
+    mem.device = this;
+    mem.device_pointer = 0;
+    mem.device_size = 0;
+    stats.mem_free(existing_size);
+  }
+
+  void const_copy_to(const char *name, void *host, size_t size) override
+  {
+    foreach (SubDevice &sub, devices)
+      sub.device->const_copy_to(name, host, size);
+  }
+
+  void draw_pixels(device_memory &rgba,
+                   int y,
+                   int w,
+                   int h,
+                   int width,
+                   int height,
+                   int dx,
+                   int dy,
+                   int dw,
+                   int dh,
+                   bool transparent,
+                   const DeviceDrawParams &draw_params) override
+  {
+    assert(rgba.type == MEM_PIXELS);
+
+    device_ptr key = rgba.device_pointer;
+    int i = 0, sub_h = h / devices.size();
+    int sub_height = height / devices.size();
+
+    foreach (SubDevice &sub, devices) {
+      int sy = y + i * sub_h;
+      int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;
+      int sheight = (i == (int)devices.size() - 1) ? height - sub_height * i : sub_height;
+      int sdy = dy + i * sub_height;
+      /* adjust math for w/width */
+
+      rgba.device_pointer = sub.ptr_map[key];
+      sub.device->draw_pixels(
+          rgba, sy, w, sh, width, sheight, dx, sdy, dw, dh, transparent, draw_params);
+      i++;
+    }
+
+    rgba.device_pointer = key;
+  }
+
+  void map_tile(Device *sub_device, RenderTile &tile) override
+  {
+    if (!tile.buffer) {
+      return;
+    }
+
+    foreach (SubDevice &sub, devices) {
+      if (sub.device == sub_device) {
+        tile.buffer = find_matching_mem(tile.buffer, sub);
+        return;
+      }
+    }
+
+    foreach (SubDevice &sub, denoising_devices) {
+      if (sub.device == sub_device) {
+        tile.buffer = sub.ptr_map[tile.buffer];
+        return;
+      }
+    }
+  }
+
+  int device_number(Device *sub_device) override
+  {
+    int i = 0;
+
+    foreach (SubDevice &sub, devices) {
+      if (sub.device == sub_device)
+        return i;
+      i++;
+    }
+
+    foreach (SubDevice &sub, denoising_devices) {
+      if (sub.device == sub_device)
+        return i;
+      i++;
+    }
+
+    return -1;
+  }
+
+  void map_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors) override
+  {
+    for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+      RenderTile &tile = neighbors.tiles[i];
+
+      if (!tile.buffers) {
+        continue;
+      }
+
+      device_vector<float> &mem = tile.buffers->buffer;
+      tile.buffer = mem.device_pointer;
+
+      if (mem.device == this && matching_rendering_and_denoising_devices) {
+        /* Skip unnecessary copies in viewport mode (buffer covers the
+         * whole image), but still need to fix up the tile device pointer. */
+        map_tile(sub_device, tile);
+        continue;
+      }
+
+      /* If the tile was rendered on another device, copy its memory to
+       * to the current device now, for the duration of the denoising task.
+       * Note that this temporarily modifies the RenderBuffers and calls
+       * the device, so this function is not thread safe. */
+      if (mem.device != sub_device) {
+        /* Only copy from device to host once. This is faster, but
+         * also required for the case where a CPU thread is denoising
+         * a tile rendered on the GPU. In that case we have to avoid
+         * overwriting the buffer being de-noised by the CPU thread. */
+        if (!tile.buffers->map_neighbor_copied) {
+          tile.buffers->map_neighbor_copied = true;
+          mem.copy_from_device();
+        }
+
+        if (mem.device == this) {
+          /* Can re-use memory if tile is already allocated on the sub device. */
+          map_tile(sub_device, tile);
+          mem.swap_device(sub_device, mem.device_size, tile.buffer);
+        }
+        else {
+          mem.swap_device(sub_device, 0, 0);
+        }
+
+        mem.copy_to_device();
+
+        tile.buffer = mem.device_pointer;
+        tile.device_size = mem.device_size;
+
+        mem.restore_device();
+      }
+    }
+  }
+
+  void unmap_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors) override
+  {
+    RenderTile &target_tile = neighbors.target;
+    device_vector<float> &mem = target_tile.buffers->buffer;
+
+    if (mem.device == this && matching_rendering_and_denoising_devices) {
+      return;
+    }
+
+    /* Copy denoised result back to the host. */
+    mem.swap_device(sub_device, target_tile.device_size, target_tile.buffer);
+    mem.copy_from_device();
+    mem.restore_device();
+
+    /* Copy denoised result to the original device. */
+    mem.copy_to_device();
+
+    for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+      RenderTile &tile = neighbors.tiles[i];
+      if (!tile.buffers) {
+        continue;
+      }
+
+      device_vector<float> &mem = tile.buffers->buffer;
+
+      if (mem.device != sub_device && mem.device != this) {
+        /* Free up memory again if it was allocated for the copy above. */
+        mem.swap_device(sub_device, tile.device_size, tile.buffer);
+        sub_device->mem_free(mem);
+        mem.restore_device();
+      }
+    }
+  }
+
+  int get_split_task_count(DeviceTask &task) override
+  {
+    int total_tasks = 0;
+    list<DeviceTask> tasks;
+    task.split(tasks, devices.size());
+    foreach (SubDevice &sub, devices) {
+      if (!tasks.empty()) {
+        DeviceTask subtask = tasks.front();
+        tasks.pop_front();
+
+        total_tasks += sub.device->get_split_task_count(subtask);
+      }
+    }
+    return total_tasks;
+  }
+
+  void task_add(DeviceTask &task) override
+  {
+    list<SubDevice> task_devices = devices;
+    if (!denoising_devices.empty()) {
+      if (task.type == DeviceTask::DENOISE_BUFFER) {
+        /* Denoising tasks should be redirected to the denoising devices entirely. */
+        task_devices = denoising_devices;
+      }
+      else if (task.type == DeviceTask::RENDER && (task.tile_types & RenderTile::DENOISE)) {
+        const uint tile_types = task.tile_types;
+        /* For normal rendering tasks only redirect the denoising part to the denoising devices.
+         * Do not need to split the task here, since they all run through 'acquire_tile'. */
+        task.tile_types = RenderTile::DENOISE;
+        foreach (SubDevice &sub, denoising_devices) {
+          sub.device->task_add(task);
+        }
+        /* Rendering itself should still be executed on the rendering devices. */
+        task.tile_types = tile_types ^ RenderTile::DENOISE;
+      }
+    }
+
+    list<DeviceTask> tasks;
+    task.split(tasks, task_devices.size());
+
+    foreach (SubDevice &sub, task_devices) {
+      if (!tasks.empty()) {
+        DeviceTask subtask = tasks.front();
+        tasks.pop_front();
+
+        if (task.buffer)
+          subtask.buffer = find_matching_mem(task.buffer, sub);
+        if (task.rgba_byte)
+          subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
+        if (task.rgba_half)
+          subtask.rgba_half = sub.ptr_map[task.rgba_half];
+        if (task.shader_input)
+          subtask.shader_input = find_matching_mem(task.shader_input, sub);
+        if (task.shader_output)
+          subtask.shader_output = find_matching_mem(task.shader_output, sub);
+
+        sub.device->task_add(subtask);
+
+        if (task.buffers && task.buffers->buffer.device == this) {
+          /* Synchronize access to RenderBuffers, since 'map_neighbor_tiles' is not thread-safe. */
+          sub.device->task_wait();
+        }
+      }
+    }
+  }
+
+  void task_wait() override
+  {
+    foreach (SubDevice &sub, devices)
+      sub.device->task_wait();
+    foreach (SubDevice &sub, denoising_devices)
+      sub.device->task_wait();
+  }
+
+  void task_cancel() override
+  {
+    foreach (SubDevice &sub, devices)
+      sub.device->task_cancel();
+    foreach (SubDevice &sub, denoising_devices)
+      sub.device->task_cancel();
+  }
+};
+
+Device *device_multi_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
+{
+  return new MultiDevice(info, stats, profiler, background);
+}
+
+CCL_NAMESPACE_END
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Jacques Lucke	d26165747e	initial support for node groups	2021-09-20 18:08:47 +02:00
Jacques Lucke	6dc2045054	cleanup	2021-09-20 17:18:39 +02:00
Jacques Lucke	362bd7889b	improve visualization	2021-09-20 16:58:15 +02:00
Jacques Lucke	620da869f1	Merge branch 'master' into temp-field-visualization	2021-09-20 13:30:34 +02:00
Jacques Lucke	50df35e4a4	fix after merge	2021-09-17 13:37:21 +02:00
Jacques Lucke	6a72188b3e	Merge branch 'master' into temp-field-visualization	2021-09-17 13:35:54 +02:00
Jacques Lucke	5d183c5af3	improvements	2021-09-17 12:18:45 +02:00
Jacques Lucke	dcf72a30e1	Merge branch 'master' into temp-field-visualization	2021-09-17 10:56:46 +02:00
Jacques Lucke	0bec1f5dad	progress	2021-09-16 10:22:08 +02:00
Jacques Lucke	a5fbd81510	Merge branch 'master' into temp-field-visualization	2021-09-15 16:36:03 +02:00
Jacques Lucke	234de0bf71	cleanup	2021-09-15 16:30:29 +02:00
Jacques Lucke	a5b9323fd5	Merge branch 'master' into temp-field-visualization	2021-09-15 16:18:20 +02:00
Jacques Lucke	52de232811	continue	2021-09-15 11:45:20 +02:00
Jacques Lucke	1931878f57	initial changes	2021-09-15 11:33:00 +02:00