Fix compilation error after rebase.

Merge branch 'master' into temp-xr-virtual-camera-experiment
Add support for XrSession (untested).
2022-11-15 14:20:06 +01:00 · 2022-11-15 14:06:05 +01:00 · 2022-11-11 16:06:09 +01:00 · 2022-11-11 15:07:23 +01:00 · 2022-11-11 14:53:21 +01:00 · 2022-11-11 14:09:19 +01:00
983 changed files with 22466 additions and 50556 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -162,7 +162,6 @@ PenaltyBreakString: 1000000
 ForEachMacros:
  - BEGIN_ANIMFILTER_SUBCHANNELS
  - BKE_pbvh_vertex_iter_begin
-  - BKE_pbvh_face_iter_begin
  - BLI_FOREACH_SPARSE_RANGE
  - BLI_SMALLSTACK_ITER_BEGIN
  - BMO_ITER
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -257,12 +257,6 @@ if(UNIX AND NOT (APPLE OR HAIKU))

    option(WITH_GHOST_WAYLAND_DYNLOAD  "Enable runtime dynamic WAYLAND libraries loading" ON)
    mark_as_advanced(WITH_GHOST_WAYLAND_DYNLOAD)
-
-    set(WITH_GHOST_WAYLAND_APP_ID "" CACHE STRING "\
-The application ID used for Blender (use default when an empty string), \
-this can be used to differentiate Blender instances by version or branch for example."
-    )
-    mark_as_advanced(WITH_GHOST_WAYLAND_APP_ID)
  endif()
 endif()

@@ -345,12 +339,8 @@ if(APPLE)
 else()
  set(WITH_COREAUDIO OFF)
 endif()
-if(NOT WIN32)
-  if(APPLE)
-    option(WITH_JACK          "Enable JACK Support (http://www.jackaudio.org)" OFF)
-  else()
-    option(WITH_JACK          "Enable JACK Support (http://www.jackaudio.org)" ON)
-  endif()
+if(UNIX AND NOT APPLE)
+  option(WITH_JACK          "Enable JACK Support (http://www.jackaudio.org)" ON)
  option(WITH_JACK_DYNLOAD  "Enable runtime dynamic JACK libraries loading" OFF)
 else()
  set(WITH_JACK OFF)
@@ -467,6 +457,7 @@ if(NOT APPLE)

  option(WITH_CYCLES_CUDA_BINARIES     "Build Cycles NVIDIA CUDA binaries" OFF)
  set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for")
+  option(WITH_CYCLES_CUBIN_COMPILER    "Build cubins with nvrtc based compiler instead of nvcc" OFF)
  option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF)
  option(WITH_CUDA_DYNLOAD             "Dynamically load CUDA libraries at runtime (for developers, makes cuda-gdb work)" ON)

@@ -474,6 +465,7 @@ if(NOT APPLE)
  set(CYCLES_RUNTIME_OPTIX_ROOT_DIR    "" CACHE PATH "Path to the OptiX SDK root directory. When set, this path will be used at runtime to compile OptiX kernels.")

  mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
+  mark_as_advanced(WITH_CYCLES_CUBIN_COMPILER)
  mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
  mark_as_advanced(WITH_CUDA_DYNLOAD)
  mark_as_advanced(OPTIX_ROOT_DIR)
@@ -484,7 +476,7 @@ endif()
 if(NOT APPLE)
  option(WITH_CYCLES_DEVICE_HIP        "Enable Cycles AMD HIP support" ON)
  option(WITH_CYCLES_HIP_BINARIES      "Build Cycles AMD HIP binaries" OFF)
-  set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
+  set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 CACHE STRING "AMD HIP architectures to build binaries for")
  mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
  mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
 endif()
@@ -1239,11 +1231,12 @@ if(WITH_OPENGL)
  add_definitions(-DWITH_OPENGL)
 endif()

-#-----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
 # Configure Vulkan.

 if(WITH_VULKAN_BACKEND)
-  list(APPEND BLENDER_GL_LIBRARIES ${VULKAN_LIBRARIES})
+  add_definitions(-DWITH_VULKAN_BACKEND)
 endif()

 # -----------------------------------------------------------------------------
--- a/build_files/build_environment/install_deps.sh
+++ b/build_files/build_environment/install_deps.sh
@@ -40,15 +40,15 @@ ver-ocio:,ver-oiio:,ver-llvm:,ver-osl:,ver-osd:,ver-openvdb:,ver-xr-openxr:,ver-
 force-all,force-python,force-boost,force-tbb,\
 force-ocio,force-imath,force-openexr,force-oiio,force-llvm,force-osl,force-osd,force-openvdb,\
 force-ffmpeg,force-opencollada,force-alembic,force-embree,force-oidn,force-usd,\
-force-xr-openxr,force-level-zero,force-openpgl,\
+force-xr-openxr,force-level-zero, force-openpgl,\
 build-all,build-python,build-boost,build-tbb,\
 build-ocio,build-imath,build-openexr,build-oiio,build-llvm,build-osl,build-osd,build-openvdb,\
 build-ffmpeg,build-opencollada,build-alembic,build-embree,build-oidn,build-usd,\
-build-xr-openxr,build-level-zero,build-openpgl,\
+build-xr-openxr,build-level-zero, build-openpgl,\
 skip-python,skip-boost,skip-tbb,\
 skip-ocio,skip-imath,skip-openexr,skip-oiio,skip-llvm,skip-osl,skip-osd,skip-openvdb,\
 skip-ffmpeg,skip-opencollada,skip-alembic,skip-embree,skip-oidn,skip-usd,\
-skip-xr-openxr,skip-level-zero,skip-openpgl \
+skip-xr-openxr,skip-level-zero, skip-openpgl \
 -- "$@" \
 )

--- a/build_files/cmake/Modules/FindMoltenVK.cmake
+++ b/build_files/cmake/Modules/FindMoltenVK.cmake
@@ -1,59 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright 2022 Blender Foundation.
-
-# - Find MoltenVK libraries
-# Find the MoltenVK includes and libraries
-# This module defines
-#  MOLTENVK_INCLUDE_DIRS, where to find MoltenVK headers, Set when
-#                        MOLTENVK_INCLUDE_DIR is found.
-#  MOLTENVK_LIBRARIES, libraries to link against to use MoltenVK.
-#  MOLTENVK_ROOT_DIR, The base directory to search for MoltenVK.
-#                    This can also be an environment variable.
-#  MOLTENVK_FOUND, If false, do not try to use MoltenVK.
-#
-
-# If MOLTENVK_ROOT_DIR was defined in the environment, use it.
-IF(NOT MOLTENVK_ROOT_DIR AND NOT $ENV{MOLTENVK_ROOT_DIR} STREQUAL "")
-  SET(MOLTENVK_ROOT_DIR $ENV{MOLTENVK_ROOT_DIR})
-ENDIF()
-
-SET(_moltenvk_SEARCH_DIRS
-  ${MOLTENVK_ROOT_DIR}
-  ${LIBDIR}/vulkan/MoltenVK
-)
-
-
-FIND_PATH(MOLTENVK_INCLUDE_DIR
-  NAMES
-    MoltenVK/vk_mvk_moltenvk.h
-  HINTS
-    ${_moltenvk_SEARCH_DIRS}
-  PATH_SUFFIXES
-    include
-)
-
-FIND_LIBRARY(MOLTENVK_LIBRARY
-  NAMES
-    MoltenVK
-  HINTS
-    ${_moltenvk_SEARCH_DIRS}
-  PATH_SUFFIXES
-    dylib/macOS
-)
-
-# handle the QUIETLY and REQUIRED arguments and set MOLTENVK_FOUND to TRUE if
-# all listed variables are TRUE
-INCLUDE(FindPackageHandleStandardArgs)
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(MoltenVK DEFAULT_MSG MOLTENVK_LIBRARY MOLTENVK_INCLUDE_DIR)
-
-IF(MOLTENVK_FOUND)
-  SET(MOLTENVK_LIBRARIES ${MOLTENVK_LIBRARY})
-  SET(MOLTENVK_INCLUDE_DIRS ${MOLTENVK_INCLUDE_DIR})
-ENDIF()
-
-MARK_AS_ADVANCED(
-  MOLTENVK_INCLUDE_DIR
-  MOLTENVK_LIBRARY
-)
-
-UNSET(_moltenvk_SEARCH_DIRS)
--- a/build_files/cmake/buildinfo.cmake
+++ b/build_files/cmake/buildinfo.cmake
@@ -103,6 +103,10 @@ if(EXISTS ${SOURCE_DIR}/.git)
      endif()
    endif()

+    if(MY_WC_BRANCH MATCHES "^blender-v")
+      set(MY_WC_BRANCH "master")
+    endif()
+
    unset(_git_below_check)
  endif()

--- a/build_files/cmake/packaging.cmake
+++ b/build_files/cmake/packaging.cmake
@@ -106,8 +106,8 @@ if(WIN32)
  set(CPACK_WIX_LIGHT_EXTRA_FLAGS -dcl:medium)
 endif()

-set(CPACK_PACKAGE_EXECUTABLES "blender-launcher" "Blender ${MAJOR_VERSION}.${MINOR_VERSION}")
-set(CPACK_CREATE_DESKTOP_LINKS "blender-launcher" "Blender ${MAJOR_VERSION}.${MINOR_VERSION}")
+set(CPACK_PACKAGE_EXECUTABLES "blender-launcher" "Blender")
+set(CPACK_CREATE_DESKTOP_LINKS "blender-launcher" "Blender")

 include(CPack)

--- a/build_files/cmake/platform/platform_apple.cmake
+++ b/build_files/cmake/platform/platform_apple.cmake
@@ -100,23 +100,6 @@ if(WITH_USD)
  find_package(USD REQUIRED)
 endif()

-if(WITH_VULKAN_BACKEND)
-  find_package(MoltenVK REQUIRED)
-
-  if(EXISTS ${LIBDIR}/vulkan)
-    set(VULKAN_FOUND On)
-    set(VULKAN_ROOT_DIR ${LIBDIR}/vulkan/macOS)
-    set(VULKAN_INCLUDE_DIR ${VULKAN_ROOT_DIR}/include)
-    set(VULKAN_LIBRARY ${VULKAN_ROOT_DIR}/lib/libvulkan.1.dylib)
-
-    set(VULKAN_INCLUDE_DIRS ${VULKAN_INCLUDE_DIR} ${MOLTENVK_INCLUDE_DIRS})
-    set(VULKAN_LIBRARIES ${VULKAN_LIBRARY} ${MOLTENVK_LIBRARIES})
-  else()
-    message(WARNING "Vulkan SDK was not found, disabling WITH_VULKAN_BACKEND")
-    set(WITH_VULKAN_BACKEND OFF)
-  endif()
-endif()
-
 if(WITH_OPENSUBDIV)
  find_package(OpenSubdiv)
 endif()
--- a/build_files/cmake/platform/platform_unix.cmake
+++ b/build_files/cmake/platform/platform_unix.cmake
@@ -108,10 +108,6 @@ find_package_wrapper(ZLIB REQUIRED)
 find_package_wrapper(Zstd REQUIRED)
 find_package_wrapper(Epoxy REQUIRED)

-if(WITH_VULKAN_BACKEND)
-  find_package_wrapper(Vulkan REQUIRED)
-endif()
-
 function(check_freetype_for_brotli)
  include(CheckSymbolExists)
  set(CMAKE_REQUIRED_INCLUDES ${FREETYPE_INCLUDE_DIRS})
@@ -326,10 +322,9 @@ if(WITH_CYCLES AND WITH_CYCLES_DEVICE_ONEAPI)
  file(GLOB _sycl_runtime_libraries
    ${SYCL_ROOT_DIR}/lib/libsycl.so
    ${SYCL_ROOT_DIR}/lib/libsycl.so.*
-    ${SYCL_ROOT_DIR}/lib/libpi_*.so
+    ${SYCL_ROOT_DIR}/lib/libpi_level_zero.so
  )
  list(FILTER _sycl_runtime_libraries EXCLUDE REGEX ".*\.py")
-  list(REMOVE_ITEM _sycl_runtime_libraries "${SYCL_ROOT_DIR}/lib/libpi_opencl.so")
  list(APPEND PLATFORM_BUNDLED_LIBRARIES ${_sycl_runtime_libraries})
  unset(_sycl_runtime_libraries)
 endif()
@@ -970,9 +965,16 @@ if(WITH_COMPILER_CCACHE)
  endif()
 endif()

-# Always link with libatomic if available, as it is required for data types
-# which don't have intrinsics.
-function(configure_atomic_lib_if_needed)
+# On some platforms certain atomic operations are not possible with assembly and/or intrinsics and
+# they are emulated in software with locks. For example, on armel there is no intrinsics to grant
+# 64 bit atomic operations and STL library uses libatomic to offload software emulation of atomics
+# to.
+# This function will check whether libatomic is required and if so will configure linker flags.
+# If atomic operations are possible without libatomic then linker flags are left as-is.
+function(CONFIGURE_ATOMIC_LIB_IF_NEEDED)
+  # Source which is used to enforce situation when software emulation of atomics is required.
+  # Assume that using 64bit integer gives a definitive answer (as in, if 64bit atomic operations
+  # are possible using assembly/intrinsics 8, 16, and 32 bit operations will also be possible.
  set(_source
      "#include <atomic>
      #include <cstdint>
@@ -983,12 +985,25 @@ function(configure_atomic_lib_if_needed)
  )

  include(CheckCXXSourceCompiles)
-  set(CMAKE_REQUIRED_LIBRARIES atomic)
-  check_cxx_source_compiles("${_source}" ATOMIC_OPS_WITH_LIBATOMIC)
-  unset(CMAKE_REQUIRED_LIBRARIES)
+  check_cxx_source_compiles("${_source}" ATOMIC_OPS_WITHOUT_LIBATOMIC)

-  if(ATOMIC_OPS_WITH_LIBATOMIC)
-    set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} -latomic" PARENT_SCOPE)
+  if(NOT ATOMIC_OPS_WITHOUT_LIBATOMIC)
+    # Compilation of the test program has failed.
+    # Try it again with -latomic to see if this is what is needed, or whether something else is
+    # going on.
+
+    set(CMAKE_REQUIRED_LIBRARIES atomic)
+    check_cxx_source_compiles("${_source}" ATOMIC_OPS_WITH_LIBATOMIC)
+    unset(CMAKE_REQUIRED_LIBRARIES)
+
+    if(ATOMIC_OPS_WITH_LIBATOMIC)
+      set(PLATFORM_LINKFLAGS "${PLATFORM_LINKFLAGS} -latomic" PARENT_SCOPE)
+    else()
+      # Atomic operations are required part of Blender and it is not possible to process forward.
+      # We expect that either standard library or libatomic will make atomics to work. If both
+      # cases has failed something fishy o na bigger scope is going on.
+      message(FATAL_ERROR "Failed to detect required configuration for atomic operations")
+    endif()
  endif()
 endfunction()

--- a/build_files/cmake/platform/platform_win32.cmake
+++ b/build_files/cmake/platform/platform_win32.cmake
@@ -926,20 +926,6 @@ if(WITH_HARU)
  set(HARU_LIBRARIES ${HARU_ROOT_DIR}/lib/libhpdfs.lib)
 endif()

-if(WITH_VULKAN_BACKEND)
-  if(EXISTS ${LIBDIR}/vulkan)
-    set(VULKAN_FOUND On)
-    set(VULKAN_ROOT_DIR ${LIBDIR}/vulkan)
-    set(VULKAN_INCLUDE_DIR ${VULKAN_ROOT_DIR}/include)
-    set(VULKAN_INCLUDE_DIRS ${VULKAN_INCLUDE_DIR})
-    set(VULKAN_LIBRARY ${VULKAN_ROOT_DIR}/lib/vulkan-1.lib)
-    set(VULKAN_LIBRARIES ${VULKAN_LIBRARY})
-  else()
-    message(WARNING "Vulkan SDK was not found, disabling WITH_VULKAN_BACKEND")
-    set(WITH_VULKAN_BACKEND OFF)
-  endif()
-endif()
-
 if(WITH_CYCLES AND WITH_CYCLES_PATH_GUIDING)
  find_package(openpgl QUIET)
  if(openpgl_FOUND)
@@ -972,13 +958,7 @@ if(WITH_CYCLES AND WITH_CYCLES_DEVICE_ONEAPI)
  endforeach()
  unset(_sycl_runtime_libraries_glob)

-  file(GLOB _sycl_pi_runtime_libraries_glob
-    ${SYCL_ROOT_DIR}/bin/pi_*.dll
-  )
-  list(REMOVE_ITEM _sycl_pi_runtime_libraries_glob "${SYCL_ROOT_DIR}/bin/pi_opencl.dll")
-  list (APPEND _sycl_runtime_libraries ${_sycl_pi_runtime_libraries_glob})
-  unset(_sycl_pi_runtime_libraries_glob)
-
+  list(APPEND _sycl_runtime_libraries ${SYCL_ROOT_DIR}/bin/pi_level_zero.dll)
  list(APPEND PLATFORM_BUNDLED_LIBRARIES ${_sycl_runtime_libraries})
  unset(_sycl_runtime_libraries)
 endif()
--- a/build_files/config/pipeline_config.yaml
+++ b/build_files/config/pipeline_config.yaml
@@ -55,7 +55,7 @@ buildbot:
    cuda11:
        version: '11.4.1'
    hip:
-        version: '5.3.22480'
+        version: '5.2.21440'
    optix:
        version: '7.3.0'
    ocloc:
--- a/build_files/utils/make_bpy_wheel.py
+++ b/build_files/utils/make_bpy_wheel.py
@@ -35,41 +35,10 @@ from typing import (
    Tuple,
 )

-# ------------------------------------------------------------------------------
-# Long Description
-
-long_description = """# Blender
-
-[Blender](https://www.blender.org) is the free and open source 3D creation suite. It supports the entirety of the 3D pipeline—modeling, rigging, animation, simulation, rendering, compositing and motion tracking, even video editing.
-
-This package provides Blender as a Python module for use in studio pipelines, web services, scientific research, and more.
-
-## Documentation
-
-* [Blender Python API](https://docs.blender.org/api/current/)
-* [Blender as a Python Module](https://docs.blender.org/api/current/info_advanced_blender_as_bpy.html)
-
-## Requirements
-
-[System requirements](https://www.blender.org/download/requirements/) are the same as Blender.
-
-Each Blender release supports one Python version, and the package is only compatible with that version.
-
-## Source Code
-
-* [Releases](https://download.blender.org/source/)
-* Repository: [git.blender.org/blender.git](https://git.blender.org/gitweb/gitweb.cgi/blender.git)
-
-## Credits
-
-Created by the [Blender developer community](https://www.blender.org/about/credits/).
-
-Thanks to Tyler Alden Gubala for maintaining the original version of this package."""

 # ------------------------------------------------------------------------------
 # Generic Functions

-
 def find_dominating_file(
    path: str,
    search: Sequence[str],
@@ -226,8 +195,6 @@ def main() -> None:
        options={"bdist_wheel": {"plat_name": platform_tag}},

        description="Blender as a Python module",
-        long_description=long_description,
-        long_description_content_type='text/markdown',
        license="GPL-3.0",
        author="Blender Foundation",
        author_email="bf-committers@blender.org",
--- a/build_files/utils/make_update.py
+++ b/build_files/utils/make_update.py
@@ -59,11 +59,10 @@ def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None

    # Checkout precompiled libraries
    if sys.platform == 'darwin':
-        # Check platform.version to detect arm64 with x86_64 python binary.
-        if platform.machine() == 'arm64' or ('ARM64' in platform.version()):
-            lib_platform = "darwin_arm64"
-        elif platform.machine() == 'x86_64':
+        if platform.machine() == 'x86_64':
            lib_platform = "darwin"
+        elif platform.machine() == 'arm64':
+            lib_platform = "darwin_arm64"
        else:
            lib_platform = None
    elif sys.platform == 'win32':
--- a/doc/python_api/rst/info_gotcha.rst
+++ b/doc/python_api/rst/info_gotcha.rst
@@ -870,26 +870,6 @@ an issue but, due to internal implementation details, currently are:
  thus breaking any current iteration over ``Collection.all_objects``.


-.. rubric:: Do not:
-
-.. code-block:: python
-
-   # `all_objects` is an iterator. Using it directly while performing operations on its members that will update
-   # the memory accessed by the `all_objects` iterator will lead to invalid memory accesses and crashes.
-   for object in bpy.data.collections["Collection"].all_objects:
-        object.hide_viewport = True
-
-
-.. rubric:: Do:
-
-.. code-block:: python
-
-   # `all_objects[:]` is an independent list generated from the iterator. As long as no objects are deleted,
-   # its content will remain valid even if the data accessed by the `all_objects` iterator is modified.
-   for object in bpy.data.collections["Collection"].all_objects[:]:
-        object.hide_viewport = True
-
-
 sys.exit
 ========

--- a/doc/python_api/sphinx_doc_gen.py
+++ b/doc/python_api/sphinx_doc_gen.py
@@ -1294,7 +1294,6 @@ def pycontext2sphinx(basepath):

            type_descr = prop.get_type_description(
                class_fmt=":class:`bpy.types.%s`",
-                mathutils_fmt=":class:`mathutils.%s`",
                collection_id=_BPY_PROP_COLLECTION_ID,
                enum_descr_override=enum_descr_override,
            )
@@ -1447,7 +1446,6 @@ def pyrna2sphinx(basepath):
            identifier = " %s" % prop.identifier

        kwargs["class_fmt"] = ":class:`%s`"
-        kwargs["mathutils_fmt"] = ":class:`mathutils.%s`"

        kwargs["collection_id"] = _BPY_PROP_COLLECTION_ID

@@ -1567,7 +1565,6 @@ def pyrna2sphinx(basepath):

            type_descr = prop.get_type_description(
                class_fmt=":class:`%s`",
-                mathutils_fmt=":class:`mathutils.%s`",
                collection_id=_BPY_PROP_COLLECTION_ID,
                enum_descr_override=enum_descr_override,
            )
@@ -1634,7 +1631,6 @@ def pyrna2sphinx(basepath):

                    type_descr = prop.get_type_description(
                        as_ret=True, class_fmt=":class:`%s`",
-                        mathutils_fmt=":class:`mathutils.%s`",
                        collection_id=_BPY_PROP_COLLECTION_ID,
                        enum_descr_override=enum_descr_override,
                    )
--- a/extern/CMakeLists.txt
+++ b/extern/CMakeLists.txt
@@ -91,7 +91,3 @@ endif()
 if(WITH_COMPOSITOR_CPU)
  add_subdirectory(smaa_areatex)
 endif()
-
-if(WITH_VULKAN_BACKEND)
-  add_subdirectory(vulkan_memory_allocator)
-endif()
--- a/extern/vulkan_memory_allocator/CMakeLists.txt
+++ b/extern/vulkan_memory_allocator/CMakeLists.txt
@@ -1,24 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-or-later
-# Copyright 2022 Blender Foundation. All rights reserved.
-
-set(INC
-  .
-)
-
-set(INC_SYS
-  ${VULKAN_INCLUDE_DIRS}
-)
-
-set(SRC
-  vk_mem_alloc_impl.cc
-
-  vk_mem_alloc.h
-)
-
-blender_add_lib(extern_vulkan_memory_allocator "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
-
-if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
-  target_compile_options(extern_vulkan_memory_allocator
-    PRIVATE "-Wno-nullability-completeness"
-  )
-endif()
--- a/extern/vulkan_memory_allocator/LICENSE.txt
+++ b/extern/vulkan_memory_allocator/LICENSE.txt
@@ -1,19 +0,0 @@
-Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
--- a/extern/vulkan_memory_allocator/README.blender
+++ b/extern/vulkan_memory_allocator/README.blender
@@ -1,5 +0,0 @@
-Project: VulkanMemoryAllocator
-URL: https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator
-License: MIT
-Upstream version: a6bfc23
-Local modifications: None
--- a/extern/vulkan_memory_allocator/README.md
+++ b/extern/vulkan_memory_allocator/README.md
@@ -1,175 +0,0 @@
-# Vulkan Memory Allocator
-
-Easy to integrate Vulkan memory allocation library.
-
-**Documentation:** Browse online: [Vulkan Memory Allocator](https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/) (generated from Doxygen-style comments in [include/vk_mem_alloc.h](include/vk_mem_alloc.h))
-
-**License:** MIT. See [LICENSE.txt](LICENSE.txt)
-
-**Changelog:** See [CHANGELOG.md](CHANGELOG.md)
-
-**Product page:** [Vulkan Memory Allocator on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/)
-
-**Build status:**
-
- Windows: [![Build status](https://ci.appveyor.com/api/projects/status/4vlcrb0emkaio2pn/branch/master?svg=true)](https://ci.appveyor.com/project/adam-sawicki-amd/vulkanmemoryallocator/branch/master)  
- Linux: [![Build Status](https://app.travis-ci.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.svg?branch=master)](https://app.travis-ci.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator)
-
-[![Average time to resolve an issue](http://isitmaintained.com/badge/resolution/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.svg)](http://isitmaintained.com/project/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator "Average time to resolve an issue")
-
-# Problem
-
-Memory allocation and resource (buffer and image) creation in Vulkan is difficult (comparing to older graphics APIs, like D3D11 or OpenGL) for several reasons:
-
- It requires a lot of boilerplate code, just like everything else in Vulkan, because it is a low-level and high-performance API.
- There is additional level of indirection: `VkDeviceMemory` is allocated separately from creating `VkBuffer`/`VkImage` and they must be bound together.
- Driver must be queried for supported memory heaps and memory types. Different GPU vendors provide different types of it.
- It is recommended to allocate bigger chunks of memory and assign parts of them to particular resources, as there is a limit on maximum number of memory blocks that can be allocated.
-
-# Features
-
-This library can help game developers to manage memory allocations and resource creation by offering some higher-level functions:
-
-1. Functions that help to choose correct and optimal memory type based on intended usage of the memory.
-   - Required or preferred traits of the memory are expressed using higher-level description comparing to Vulkan flags.
-2. Functions that allocate memory blocks, reserve and return parts of them (`VkDeviceMemory` + offset + size) to the user.
-   - Library keeps track of allocated memory blocks, used and unused ranges inside them, finds best matching unused ranges for new allocations, respects all the rules of alignment and buffer/image granularity.
-3. Functions that can create an image/buffer, allocate memory for it and bind them together - all in one call.
-
-Additional features:
-
- Well-documented - description of all functions and structures provided, along with chapters that contain general description and example code.
- Thread-safety: Library is designed to be used in multithreaded code. Access to a single device memory block referred by different buffers and textures (binding, mapping) is synchronized internally. Memory mapping is reference-counted.
- Configuration: Fill optional members of `VmaAllocatorCreateInfo` structure to provide custom CPU memory allocator, pointers to Vulkan functions and other parameters.
- Customization and integration with custom engines: Predefine appropriate macros to provide your own implementation of all external facilities used by the library like assert, mutex, atomic.
- Support for memory mapping, reference-counted internally. Support for persistently mapped memory: Just allocate with appropriate flag and access the pointer to already mapped memory.
- Support for non-coherent memory. Functions that flush/invalidate memory. `nonCoherentAtomSize` is respected automatically.
- Support for resource aliasing (overlap).
- Support for sparse binding and sparse residency: Convenience functions that allocate or free multiple memory pages at once.
- Custom memory pools: Create a pool with desired parameters (e.g. fixed or limited maximum size) and allocate memory out of it.
- Linear allocator: Create a pool with linear algorithm and use it for much faster allocations and deallocations in free-at-once, stack, double stack, or ring buffer fashion.
- Support for Vulkan 1.0, 1.1, 1.2, 1.3.
- Support for extensions (and equivalent functionality included in new Vulkan versions):
-   - VK_KHR_dedicated_allocation: Just enable it and it will be used automatically by the library.
-   - VK_KHR_buffer_device_address: Flag `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR` is automatically added to memory allocations where needed.
-   - VK_EXT_memory_budget: Used internally if available to query for current usage and budget. If not available, it falls back to an estimation based on memory heap sizes.
-   - VK_EXT_memory_priority: Set `priority` of allocations or custom pools and it will be set automatically using this extension.
-   - VK_AMD_device_coherent_memory
- Defragmentation of GPU and CPU memory: Let the library move data around to free some memory blocks and make your allocations better compacted.
- Statistics: Obtain brief or detailed statistics about the amount of memory used, unused, number of allocated blocks, number of allocations etc. - globally, per memory heap, and per memory type.
- Debug annotations: Associate custom `void* pUserData` and debug `char* pName` with each allocation.
- JSON dump: Obtain a string in JSON format with detailed map of internal state, including list of allocations, their string names, and gaps between them.
- Convert this JSON dump into a picture to visualize your memory. See [tools/GpuMemDumpVis](tools/GpuMemDumpVis/README.md).
- Debugging incorrect memory usage: Enable initialization of all allocated memory with a bit pattern to detect usage of uninitialized or freed memory. Enable validation of a magic number after every allocation to detect out-of-bounds memory corruption.
- Support for interoperability with OpenGL.
- Virtual allocator: Interface for using core allocation algorithm to allocate any custom data, e.g. pieces of one large buffer.
-
-# Prerequisites
-
- Self-contained C++ library in single header file. No external dependencies other than standard C and C++ library and of course Vulkan. Some features of C++14 used. STL containers, RTTI, or C++ exceptions are not used.
- Public interface in C, in same convention as Vulkan API. Implementation in C++.
- Error handling implemented by returning `VkResult` error codes - same way as in Vulkan.
- Interface documented using Doxygen-style comments.
- Platform-independent, but developed and tested on Windows using Visual Studio. Continuous integration setup for Windows and Linux. Used also on Android, MacOS, and other platforms.
-
-# Example
-
-Basic usage of this library is very simple. Advanced features are optional. After you created global `VmaAllocator` object, a complete code needed to create a buffer may look like this:
-
-```cpp
-VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
-bufferInfo.size = 65536;
-bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
-
-VmaAllocationCreateInfo allocInfo = {};
-allocInfo.usage = VMA_MEMORY_USAGE_AUTO;
-
-VkBuffer buffer;
-VmaAllocation allocation;
-vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr);
-```
-
-With this one function call:
-
-1. `VkBuffer` is created.
-2. `VkDeviceMemory` block is allocated if needed.
-3. An unused region of the memory block is bound to this buffer.
-
-`VmaAllocation` is an object that represents memory assigned to this buffer. It can be queried for parameters like `VkDeviceMemory` handle and offset.
-
-# How to build
-
-On Windows it is recommended to use [CMake UI](https://cmake.org/runningcmake/). Alternatively you can generate a Visual Studio project map using CMake in command line: `cmake -B./build/ -DCMAKE_BUILD_TYPE=Debug -G "Visual Studio 16 2019" -A x64 ./`
-
-On Linux:
-
-```
-mkdir build
-cd build
-cmake ..
-make
-```
-
-The following targets are available
-
-| Target | Description | CMake option | Default setting |
-| ------------- | ------------- | ------------- | ------------- |
-| VmaSample | VMA sample application | `VMA_BUILD_SAMPLE` | `OFF` |
-| VmaBuildSampleShaders | Shaders for VmaSample | `VMA_BUILD_SAMPLE_SHADERS` | `OFF` |
-
-Please note that while VulkanMemoryAllocator library is supported on other platforms besides Windows, VmaSample is not.
-
-These CMake options are available
-
-| CMake option | Description | Default setting |
-| ------------- | ------------- | ------------- |
-| `VMA_RECORDING_ENABLED` | Enable VMA memory recording for debugging | `OFF` |
-| `VMA_USE_STL_CONTAINERS` | Use C++ STL containers instead of VMA's containers | `OFF` |
-| `VMA_STATIC_VULKAN_FUNCTIONS` | Link statically with Vulkan API | `OFF` |
-| `VMA_DYNAMIC_VULKAN_FUNCTIONS` | Fetch pointers to Vulkan functions internally (no static linking) | `ON` |
-| `VMA_DEBUG_ALWAYS_DEDICATED_MEMORY` | Every allocation will have its own memory block | `OFF` |
-| `VMA_DEBUG_INITIALIZE_ALLOCATIONS` | Automatically fill new allocations and destroyed allocations with some bit pattern | `OFF` |
-| `VMA_DEBUG_GLOBAL_MUTEX` | Enable single mutex protecting all entry calls to the library | `OFF` |
-| `VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT` | Never exceed [VkPhysicalDeviceLimits::maxMemoryAllocationCount](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#limits-maxMemoryAllocationCount) and return error | `OFF` |
-
-# Binaries
-
-The release comes with precompiled binary executable for "VulkanSample" application which contains test suite. It is compiled using Visual Studio 2019, so it requires appropriate libraries to work, including "MSVCP140.dll", "VCRUNTIME140.dll", "VCRUNTIME140_1.dll". If the launch fails with error message telling about those files missing, please download and install [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads), "x64" version.
-
-# Read more
-
-See **[Documentation](https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/)**.
-
-# Software using this library
-
- **[X-Plane](https://x-plane.com/)**
- **[Detroit: Become Human](https://gpuopen.com/learn/porting-detroit-3/)**
- **[Vulkan Samples](https://github.com/LunarG/VulkanSamples)** - official Khronos Vulkan samples. License: Apache-style.
- **[Anvil](https://github.com/GPUOpen-LibrariesAndSDKs/Anvil)** - cross-platform framework for Vulkan. License: MIT.
- **[Filament](https://github.com/google/filament)** - physically based rendering engine for Android, Windows, Linux and macOS, from Google. Apache License 2.0.
- **[Atypical Games - proprietary game engine](https://developer.samsung.com/galaxy-gamedev/gamedev-blog/infinitejet.html)**
- **[Flax Engine](https://flaxengine.com/)**
- **[Godot Engine](https://github.com/godotengine/godot/)** - multi-platform 2D and 3D game engine. License: MIT.
- **[Lightweight Java Game Library (LWJGL)](https://www.lwjgl.org/)** - includes binding of the library for Java. License: BSD.
- **[PowerVR SDK](https://github.com/powervr-graphics/Native_SDK)** - C++ cross-platform 3D graphics SDK, from Imagination. License: MIT.
- **[Skia](https://github.com/google/skia)** - complete 2D graphic library for drawing Text, Geometries, and Images, from Google.
- **[The Forge](https://github.com/ConfettiFX/The-Forge)** - cross-platform rendering framework. Apache License 2.0.
- **[VK9](https://github.com/disks86/VK9)** - Direct3D 9 compatibility layer using Vulkan. Zlib lincese.
- **[vkDOOM3](https://github.com/DustinHLand/vkDOOM3)** - Vulkan port of GPL DOOM 3 BFG Edition. License: GNU GPL.
- **[vkQuake2](https://github.com/kondrak/vkQuake2)** - vanilla Quake 2 with Vulkan support. License: GNU GPL.
- **[Vulkan Best Practice for Mobile Developers](https://github.com/ARM-software/vulkan_best_practice_for_mobile_developers)** from ARM. License: MIT.
- **[RPCS3](https://github.com/RPCS3/rpcs3)** - PlayStation 3 emulator/debugger. License: GNU GPLv2.
- **[PPSSPP](https://github.com/hrydgard/ppsspp)** - Playstation Portable emulator/debugger. License: GNU GPLv2+.
-
-[Many other projects on GitHub](https://github.com/search?q=AMD_VULKAN_MEMORY_ALLOCATOR_H&type=Code) and some game development studios that use Vulkan in their games.
-
-# See also
-
- **[D3D12 Memory Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator)** - equivalent library for Direct3D 12. License: MIT.
- **[Awesome Vulkan](https://github.com/vinjn/awesome-vulkan)** - a curated list of awesome Vulkan libraries, debuggers and resources.
- **[VulkanMemoryAllocator-Hpp](https://github.com/malte-v/VulkanMemoryAllocator-Hpp)** - C++ binding for this library. License: CC0-1.0.
- **[PyVMA](https://github.com/realitix/pyvma)** - Python wrapper for this library. Author: Jean-Sébastien B. (@realitix). License: Apache 2.0.
- **[vk-mem](https://github.com/gwihlidal/vk-mem-rs)** - Rust binding for this library. Author: Graham Wihlidal. License: Apache 2.0 or MIT.
- **[Haskell bindings](https://hackage.haskell.org/package/VulkanMemoryAllocator)**, **[github](https://github.com/expipiplus1/vulkan/tree/master/VulkanMemoryAllocator)** - Haskell bindings for this library. Author: Ellie Hermaszewska (@expipiplus1). License BSD-3-Clause.
- **[vma_sample_sdl](https://github.com/rextimmy/vma_sample_sdl)** - SDL port of the sample app of this library (with the goal of running it on multiple platforms, including MacOS). Author: @rextimmy. License: MIT.
- **[vulkan-malloc](https://github.com/dylanede/vulkan-malloc)** - Vulkan memory allocation library for Rust. Based on version 1 of this library. Author: Dylan Ede (@dylanede). License: MIT / Apache 2.0.
--- a/extern/vulkan_memory_allocator/vk_mem_alloc.h
+++ b/extern/vulkan_memory_allocator/vk_mem_alloc.h
--- a/extern/vulkan_memory_allocator/vk_mem_alloc_impl.cc
+++ b/extern/vulkan_memory_allocator/vk_mem_alloc_impl.cc
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later
- * Copyright 2022 Blender Foundation. All rights reserved. */
-
-#ifdef __APPLE__
-#  include <MoltenVK/vk_mvk_moltenvk.h>
-#else
-#  include <vulkan/vulkan.h>
-#endif
-
-#define VMA_IMPLEMENTATION
-
-#include "vk_mem_alloc.h"
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -253,33 +253,6 @@ if(WITH_CYCLES_OSL)
  )
 endif()

-if(WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX)
-  add_definitions(-DWITH_CUDA)
-
-  if(WITH_CUDA_DYNLOAD)
-    include_directories(
-      ../../extern/cuew/include
-    )
-    add_definitions(-DWITH_CUDA_DYNLOAD)
-  else()
-    include_directories(
-      SYSTEM
-      ${CUDA_TOOLKIT_INCLUDE}
-    )
-  endif()
-endif()
-
-if(WITH_CYCLES_DEVICE_HIP)
-  add_definitions(-DWITH_HIP)
-
-  if(WITH_HIP_DYNLOAD)
-    include_directories(
-      ../../extern/hipew/include
-    )
-    add_definitions(-DWITH_HIP_DYNLOAD)
-  endif()
-endif()
-
 if(WITH_CYCLES_DEVICE_OPTIX)
  find_package(OptiX 7.3.0)

@@ -288,16 +261,12 @@ if(WITH_CYCLES_DEVICE_OPTIX)
    include_directories(
      SYSTEM
      ${OPTIX_INCLUDE_DIR}
-    )
+      )
  else()
    set_and_warn_library_found("OptiX" OPTIX_FOUND WITH_CYCLES_DEVICE_OPTIX)
  endif()
 endif()

-if(WITH_CYCLES_DEVICE_METAL)
-  add_definitions(-DWITH_METAL)
-endif()
-
 if (WITH_CYCLES_DEVICE_ONEAPI)
  add_definitions(-DWITH_ONEAPI)
 endif()
@@ -423,7 +392,7 @@ if(WITH_CYCLES_HYDRA_RENDER_DELEGATE AND (NOT WITH_BLENDER) AND (NOT WITH_CYCLES
  set(CYCLES_INSTALL_PATH ${CYCLES_INSTALL_PATH}/hdCycles/resources)
 endif()

-if(WITH_CYCLES_CUDA_BINARIES)
+if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
  if(MSVC)
    set(MAX_MSVC 1800)
    if(${CUDA_VERSION} EQUAL "8.0")
@@ -435,7 +404,24 @@ if(WITH_CYCLES_CUDA_BINARIES)
    elseif(${CUDA_VERSION} VERSION_GREATER_EQUAL 10.0)
      set(MAX_MSVC 1999)
    endif()
+    if(NOT MSVC_VERSION LESS ${MAX_MSVC} OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+      message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
+      set(WITH_CYCLES_CUBIN_COMPILER ON)
+    endif()
    unset(MAX_MSVC)
+  elseif(APPLE)
+    if(NOT (${XCODE_VERSION} VERSION_LESS 10.0))
+      message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
+      set(WITH_CYCLES_CUBIN_COMPILER ON)
+    endif()
+  endif()
+endif()
+
+# NVRTC gives wrong rendering result in CUDA 10.0, so we must use NVCC.
+if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER AND NOT WITH_CYCLES_CUBIN_COMPILER_OVERRRIDE)
+  if(NOT (${CUDA_VERSION} VERSION_LESS 10.0))
+    message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.")
+    set(WITH_CYCLES_CUBIN_COMPILER OFF)
  endif()
 endif()

--- a/intern/cycles/app/CMakeLists.txt
+++ b/intern/cycles/app/CMakeLists.txt
@@ -103,3 +103,32 @@ if(WITH_CYCLES_STANDALONE)
    $<TARGET_FILE:cycles>
    DESTINATION ${CMAKE_INSTALL_PREFIX})
 endif()
+
+#####################################################################
+# Cycles cubin compiler executable
+#####################################################################
+
+if(WITH_CYCLES_CUBIN_COMPILER)
+  # 32 bit windows is special, nvrtc is not supported on x86, so even
+  # though we are building 32 bit blender a 64 bit cubin_cc will have
+  # to be build to compile the cubins.
+  if(MSVC AND NOT CMAKE_CL_64)
+    message("Building with CUDA not supported on 32 bit, skipped")
+    set(WITH_CYCLES_CUDA_BINARIES OFF CACHE BOOL "" FORCE)
+  else()
+    set(SRC
+      cycles_cubin_cc.cpp
+    )
+    set(INC
+      ../../../extern/cuew/include
+    )
+    set(LIB
+    )
+    cycles_external_libraries_append(LIB)
+    add_executable(cycles_cubin_cc ${SRC})
+    include_directories(${INC})
+    target_link_libraries(cycles_cubin_cc PRIVATE ${LIB})
+    unset(SRC)
+    unset(INC)
+  endif()
+endif()
--- a/intern/cycles/app/cycles_cubin_cc.cpp
+++ b/intern/cycles/app/cycles_cubin_cc.cpp
@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2017-2022 Blender Foundation */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <string>
+#include <vector>
+
+#include <OpenImageIO/argparse.h>
+#include <OpenImageIO/filesystem.h>
+
+#include "cuew.h"
+
+#ifdef _MSC_VER
+#  include <Windows.h>
+#endif
+
+using std::string;
+using std::vector;
+
+namespace std {
+template<typename T> std::string to_string(const T &n)
+{
+  std::ostringstream s;
+  s << n;
+  return s.str();
+}
+}  // namespace std
+
+class CompilationSettings {
+ public:
+  CompilationSettings()
+      : target_arch(0), bits(64), verbose(false), fast_math(false), ptx_only(false)
+  {
+  }
+
+  string cuda_toolkit_dir;
+  string input_file;
+  string output_file;
+  string ptx_file;
+  vector<string> defines;
+  vector<string> includes;
+  int target_arch;
+  int bits;
+  bool verbose;
+  bool fast_math;
+  bool ptx_only;
+};
+
+static bool compile_cuda(CompilationSettings &settings)
+{
+  const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h", "stddef.h"};
+  const char *header_content[] = {"\n", "\n", "\n", "\n", "\n"};
+
+  printf("Building %s\n", settings.input_file.c_str());
+
+  string code;
+  if (!OIIO::Filesystem::read_text_file(settings.input_file, code)) {
+    fprintf(stderr, "Error: unable to read %s\n", settings.input_file.c_str());
+    return false;
+  }
+
+  vector<string> options;
+  for (size_t i = 0; i < settings.includes.size(); i++) {
+    options.push_back("-I" + settings.includes[i]);
+  }
+
+  for (size_t i = 0; i < settings.defines.size(); i++) {
+    options.push_back("-D" + settings.defines[i]);
+  }
+  options.push_back("-D__KERNEL_CUDA_VERSION__=" + std::to_string(cuewNvrtcVersion()));
+  options.push_back("-arch=compute_" + std::to_string(settings.target_arch));
+  options.push_back("--device-as-default-execution-space");
+  options.push_back("-DCYCLES_CUBIN_CC");
+  options.push_back("--std=c++11");
+  if (settings.fast_math)
+    options.push_back("--use_fast_math");
+
+  nvrtcProgram prog;
+  nvrtcResult result = nvrtcCreateProgram(&prog,
+                                          code.c_str(),                      // buffer
+                                          NULL,                              // name
+                                          sizeof(headers) / sizeof(void *),  // numHeaders
+                                          header_content,                    // headers
+                                          headers);                          // includeNames
+
+  if (result != NVRTC_SUCCESS) {
+    fprintf(stderr, "Error: nvrtcCreateProgram failed (%d)\n\n", (int)result);
+    return false;
+  }
+
+  /* Transfer options to a classic C array. */
+  vector<const char *> opts(options.size());
+  for (size_t i = 0; i < options.size(); i++) {
+    opts[i] = options[i].c_str();
+  }
+
+  result = nvrtcCompileProgram(prog, options.size(), &opts[0]);
+
+  if (result != NVRTC_SUCCESS) {
+    fprintf(stderr, "Error: nvrtcCompileProgram failed (%d)\n\n", (int)result);
+
+    size_t log_size;
+    nvrtcGetProgramLogSize(prog, &log_size);
+
+    vector<char> log(log_size);
+    nvrtcGetProgramLog(prog, &log[0]);
+    fprintf(stderr, "%s\n", &log[0]);
+
+    return false;
+  }
+
+  /* Retrieve the ptx code. */
+  size_t ptx_size;
+  result = nvrtcGetPTXSize(prog, &ptx_size);
+  if (result != NVRTC_SUCCESS) {
+    fprintf(stderr, "Error: nvrtcGetPTXSize failed (%d)\n\n", (int)result);
+    return false;
+  }
+
+  vector<char> ptx_code(ptx_size);
+  result = nvrtcGetPTX(prog, &ptx_code[0]);
+  if (result != NVRTC_SUCCESS) {
+    fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result);
+    return false;
+  }
+  if (settings.ptx_only) {
+    settings.ptx_file = settings.output_file;
+  }
+  else {
+    /* Write a file in the temp folder with the ptx code. */
+    settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" +
+                        OIIO::Filesystem::unique_path();
+  }
+  FILE *f = fopen(settings.ptx_file.c_str(), "wb");
+  fwrite(&ptx_code[0], 1, ptx_size, f);
+  fclose(f);
+
+  return true;
+}
+
+static bool link_ptxas(CompilationSettings &settings)
+{
+  string cudapath = "";
+  if (settings.cuda_toolkit_dir.size())
+    cudapath = settings.cuda_toolkit_dir + "/bin/";
+
+  string ptx = "\"" + cudapath + "ptxas\" " + settings.ptx_file + " -o " + settings.output_file +
+               " --gpu-name sm_" + std::to_string(settings.target_arch) + " -m" +
+               std::to_string(settings.bits);
+
+  if (settings.verbose) {
+    ptx += " --verbose";
+    printf("%s\n", ptx.c_str());
+  }
+
+  int pxresult = system(ptx.c_str());
+  if (pxresult) {
+    fprintf(stderr, "Error: ptxas failed (%d)\n\n", pxresult);
+    return false;
+  }
+
+  if (!OIIO::Filesystem::remove(settings.ptx_file)) {
+    fprintf(stderr, "Error: removing %s\n\n", settings.ptx_file.c_str());
+  }
+
+  return true;
+}
+
+static bool init(CompilationSettings &settings)
+{
+#ifdef _MSC_VER
+  if (settings.cuda_toolkit_dir.size()) {
+    SetDllDirectory((settings.cuda_toolkit_dir + "/bin").c_str());
+  }
+#else
+  (void)settings;
+#endif
+
+  int cuewresult = cuewInit(CUEW_INIT_NVRTC);
+  if (cuewresult != CUEW_SUCCESS) {
+    fprintf(stderr, "Error: cuew init fialed (0x%d)\n\n", cuewresult);
+    return false;
+  }
+
+  if (cuewNvrtcVersion() < 80) {
+    fprintf(stderr, "Error: only cuda 8 and higher is supported, %d\n\n", cuewCompilerVersion());
+    return false;
+  }
+
+  if (!nvrtcCreateProgram) {
+    fprintf(stderr, "Error: nvrtcCreateProgram not resolved\n");
+    return false;
+  }
+
+  if (!nvrtcCompileProgram) {
+    fprintf(stderr, "Error: nvrtcCompileProgram not resolved\n");
+    return false;
+  }
+
+  if (!nvrtcGetProgramLogSize) {
+    fprintf(stderr, "Error: nvrtcGetProgramLogSize not resolved\n");
+    return false;
+  }
+
+  if (!nvrtcGetProgramLog) {
+    fprintf(stderr, "Error: nvrtcGetProgramLog not resolved\n");
+    return false;
+  }
+
+  if (!nvrtcGetPTXSize) {
+    fprintf(stderr, "Error: nvrtcGetPTXSize not resolved\n");
+    return false;
+  }
+
+  if (!nvrtcGetPTX) {
+    fprintf(stderr, "Error: nvrtcGetPTX not resolved\n");
+    return false;
+  }
+
+  return true;
+}
+
+static bool parse_parameters(int argc, const char **argv, CompilationSettings &settings)
+{
+  OIIO::ArgParse ap;
+  ap.options("Usage: cycles_cubin_cc [options]",
+             "-target %d",
+             &settings.target_arch,
+             "target shader model",
+             "-m %d",
+             &settings.bits,
+             "Cuda architecture bits",
+             "-i %s",
+             &settings.input_file,
+             "Input source filename",
+             "-o %s",
+             &settings.output_file,
+             "Output cubin filename",
+             "-I %L",
+             &settings.includes,
+             "Add additional includepath",
+             "-D %L",
+             &settings.defines,
+             "Add additional defines",
+             "-ptx",
+             &settings.ptx_only,
+             "emit PTX code",
+             "-v",
+             &settings.verbose,
+             "Use verbose logging",
+             "--use_fast_math",
+             &settings.fast_math,
+             "Use fast math",
+             "-cuda-toolkit-dir %s",
+             &settings.cuda_toolkit_dir,
+             "path to the cuda toolkit binary directory",
+             NULL);
+
+  if (ap.parse(argc, argv) < 0) {
+    fprintf(stderr, "%s\n", ap.geterror().c_str());
+    ap.usage();
+    return false;
+  }
+
+  if (!settings.output_file.size()) {
+    fprintf(stderr, "Error: Output file not set(-o), required\n\n");
+    return false;
+  }
+
+  if (!settings.input_file.size()) {
+    fprintf(stderr, "Error: Input file not set(-i, required\n\n");
+    return false;
+  }
+
+  if (!settings.target_arch) {
+    fprintf(stderr, "Error: target shader model not set (-target), required\n\n");
+    return false;
+  }
+
+  return true;
+}
+
+int main(int argc, const char **argv)
+{
+  CompilationSettings settings;
+
+  if (!parse_parameters(argc, argv, settings)) {
+    fprintf(stderr, "Error: invalid parameters, exiting\n");
+    exit(EXIT_FAILURE);
+  }
+
+  if (!init(settings)) {
+    fprintf(stderr, "Error: initialization error, exiting\n");
+    exit(EXIT_FAILURE);
+  }
+
+  if (!compile_cuda(settings)) {
+    fprintf(stderr, "Error: compilation error, exiting\n");
+    exit(EXIT_FAILURE);
+  }
+
+  if (!settings.ptx_only) {
+    if (!link_ptxas(settings)) {
+      exit(EXIT_FAILURE);
+    }
+  }
+
+  return 0;
+}
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -204,6 +204,7 @@ def list_render_passes(scene, srl):
    if crl.use_pass_volume_indirect:       yield ("VolumeInd",     "RGB",  'COLOR')
    if srl.use_pass_emit:                  yield ("Emit",          "RGB",  'COLOR')
    if srl.use_pass_environment:           yield ("Env",           "RGB",  'COLOR')
+    if srl.use_pass_shadow:                yield ("Shadow",        "RGB",  'COLOR')
    if srl.use_pass_ambient_occlusion:     yield ("AO",            "RGB",  'COLOR')
    if crl.use_pass_shadow_catcher:        yield ("Shadow Catcher",      "RGB",  'COLOR')
    # autopep8: on
--- a/intern/cycles/blender/addon/presets.py
+++ b/intern/cycles/blender/addon/presets.py
@@ -91,7 +91,7 @@ class AddPresetPerformance(AddPresetBase, Operator):
    preset_menu = "CYCLES_PT_performance_presets"

    preset_defines = [
-        "render = bpy.context.scene.render",
+        "render = bpy.context.scene.render"
        "cycles = bpy.context.scene.cycles"
    ]

--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -86,29 +86,6 @@ enum_sampling_pattern = (
    ('PROGRESSIVE_MULTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern", 1),
 )

-enum_emission_sampling = (
-    ('NONE',
-     'None',
-     "Do not use this surface as a light for sampling",
-     0),
-    ('AUTO',
-     'Auto',
-     "Automatically determine if the surface should be treated as a light for sampling, based on estimated emission intensity",
-     1),
-    ('FRONT',
-     'Front',
-     "Treat only front side of the surface as a light, usually for closed meshes whose interior is not visible",
-     2),
-    ('BACK',
-     'Back',
-     "Treat only back side of the surface as a light for sampling",
-     3),
-    ('FRONT_BACK',
-     'Front and Back',
-     "Treat surface as a light for sampling, emitting from both the front and back side",
-     4),
-)
-
 enum_volume_sampling = (
    ('DISTANCE',
     "Distance",
@@ -170,6 +147,7 @@ enum_view3d_shading_render_pass = (
    ('EMISSION', "Emission", "Show the Emission render pass"),
    ('BACKGROUND', "Background", "Show the Background render pass"),
    ('AO', "Ambient Occlusion", "Show the Ambient Occlusion render pass"),
+    ('SHADOW', "Shadow", "Show the Shadow render pass"),
    ('SHADOW_CATCHER', "Shadow Catcher", "Show the Shadow Catcher render pass"),

    ('', "Light", ""),
@@ -503,12 +481,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
        default='MULTIPLE_IMPORTANCE_SAMPLING',
    )

-    use_light_tree: BoolProperty(
-        name="Light Tree",
-        description="Sample multiple lights more efficiently based on estimated contribution at every shading point",
-        default=True,
-    )
-
    min_light_bounces: IntProperty(
        name="Min Light Bounces",
        description="Minimum number of light bounces. Setting this higher reduces noise in the first bounces, "
@@ -650,7 +622,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):

    transparent_max_bounces: IntProperty(
        name="Transparent Max Bounces",
-        description="Maximum number of transparent bounces. This is independent of maximum number of other bounces",
+        description="Maximum number of transparent bounces. This is independent of maximum number of other bounces ",
        min=0, max=1024,
        default=8,
    )
@@ -1071,13 +1043,13 @@ class CyclesCameraSettings(bpy.types.PropertyGroup):

 class CyclesMaterialSettings(bpy.types.PropertyGroup):

-    emission_sampling: EnumProperty(
-        name="Emission Sampling",
-        description="Sampling strategy for emissive surfaces",
-        items=enum_emission_sampling,
-        default="AUTO",
+    sample_as_light: BoolProperty(
+        name="Multiple Importance Sample",
+        description="Use multiple importance sampling for this material, "
+        "disabling may reduce overall noise for large "
+        "objects that emit little light compared to other light sources",
+        default=True,
    )
-
    use_transparent_shadow: BoolProperty(
        name="Transparent Shadows",
        description="Use transparent shadows for this material if it contains a Transparent BSDF, "
@@ -1670,7 +1642,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
                    col.label(text="and Windows driver version 101.3430 or newer", icon='BLANK1')
                elif sys.platform.startswith("linux"):
                    col.label(text="Requires Intel GPU with Xe-HPG architecture and", icon='BLANK1')
-                    col.label(text="  - intel-level-zero-gpu version 1.3.23904 or newer", icon='BLANK1')
+                    col.label(text="  - Linux driver version xx.xx.23904 or newer", icon='BLANK1')
                    col.label(text="  - oneAPI Level-Zero Loader", icon='BLANK1')
            elif device_type == 'METAL':
                col.label(text="Requires Apple Silicon with macOS 12.2 or newer", icon='BLANK1')
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -154,9 +154,8 @@ def use_mnee(context):
    # The MNEE kernel doesn't compile on macOS < 13.
    if use_metal(context):
        import platform
-        version, _, _ = platform.mac_ver()
-        major_version = version.split(".")[0]
-        if int(major_version) < 13:
+        v, _, _ = platform.mac_ver()
+        if float(v) < 13.0:
            return False
    return True

@@ -314,11 +313,10 @@ class CYCLES_RENDER_PT_sampling_path_guiding(CyclesButtonsPanel, Panel):
        layout.use_property_decorate = False
        layout.active = cscene.use_guiding

-        layout.prop(cscene, "guiding_training_samples")
-
        col = layout.column(align=True)
-        col.prop(cscene, "use_surface_guiding", text="Surface")
-        col.prop(cscene, "use_volume_guiding", text="Volume")
+        col.prop(cscene, "use_surface_guiding")
+        col.prop(cscene, "use_volume_guiding")
+        col.prop(cscene, "guiding_training_samples")


 class CYCLES_RENDER_PT_sampling_path_guiding_debug(CyclesDebugButtonsPanel, Panel):
@@ -383,6 +381,7 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
        col = layout.column(align=True)
        col.prop(cscene, "min_light_bounces")
        col.prop(cscene, "min_transparent_bounces")
+        col.prop(cscene, "light_sampling_threshold", text="Light Threshold")

        for view_layer in scene.view_layers:
            if view_layer.samples > 0:
@@ -391,31 +390,6 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
                break


-class CYCLES_RENDER_PT_sampling_lights(CyclesButtonsPanel, Panel):
-    bl_label = "Lights"
-    bl_parent_id = "CYCLES_RENDER_PT_sampling"
-    bl_options = {'DEFAULT_CLOSED'}
-
-    def draw_header(self, context):
-        layout = self.layout
-        scene = context.scene
-        cscene = scene.cycles
-
-    def draw(self, context):
-        layout = self.layout
-        layout.use_property_split = True
-        layout.use_property_decorate = False
-
-        scene = context.scene
-        cscene = scene.cycles
-
-        col = layout.column(align=True)
-        col.prop(cscene, "use_light_tree")
-        sub = col.row()
-        sub.prop(cscene, "light_sampling_threshold", text="Light Threshold")
-        sub.active = not cscene.use_light_tree
-
-
 class CYCLES_RENDER_PT_subdivision(CyclesButtonsPanel, Panel):
    bl_label = "Subdivision"
    bl_options = {'DEFAULT_CLOSED'}
@@ -978,6 +952,7 @@ class CYCLES_RENDER_PT_passes_light(CyclesButtonsPanel, Panel):
        col = layout.column(heading="Other", align=True)
        col.prop(view_layer, "use_pass_emit", text="Emission")
        col.prop(view_layer, "use_pass_environment")
+        col.prop(view_layer, "use_pass_shadow")
        col.prop(view_layer, "use_pass_ambient_occlusion", text="Ambient Occlusion")
        col.prop(cycles_view_layer, "use_pass_shadow_catcher")

@@ -1855,9 +1830,9 @@ class CYCLES_MATERIAL_PT_settings_surface(CyclesButtonsPanel, Panel):
        cmat = mat.cycles

        col = layout.column()
-        col.prop(cmat, "displacement_method", text="Displacement")
-        col.prop(cmat, "emission_sampling")
+        col.prop(cmat, "sample_as_light", text="Multiple Importance")
        col.prop(cmat, "use_transparent_shadow")
+        col.prop(cmat, "displacement_method", text="Displacement")

    def draw(self, context):
        self.draw_shared(self, context.material)
@@ -2389,7 +2364,6 @@ classes = (
    CYCLES_RENDER_PT_sampling_render_denoise,
    CYCLES_RENDER_PT_sampling_path_guiding,
    CYCLES_RENDER_PT_sampling_path_guiding_debug,
-    CYCLES_RENDER_PT_sampling_lights,
    CYCLES_RENDER_PT_sampling_advanced,
    CYCLES_RENDER_PT_light_paths,
    CYCLES_RENDER_PT_light_paths_max_bounces,
--- a/intern/cycles/blender/addon/version_update.py
+++ b/intern/cycles/blender/addon/version_update.py
@@ -99,7 +99,7 @@ def do_versions(self):
        library_versions.setdefault(library.version, []).append(library)

    # Do versioning per library, since they might have different versions.
-    max_need_versioning = (3, 5, 2)
+    max_need_versioning = (3, 0, 25)
    for version, libraries in library_versions.items():
        if version > max_need_versioning:
            continue
@@ -297,8 +297,3 @@ def do_versions(self):
                cmat = mat.cycles
                if not cmat.is_property_set("displacement_method"):
                    cmat.displacement_method = 'DISPLACEMENT'
-
-            if version <= (3, 5, 3):
-                cmat = mat.cycles
-                if not cmat.get("sample_as_light", True):
-                    cmat.emission_sampling = 'NONE'
--- a/intern/cycles/blender/display_driver.cpp
+++ b/intern/cycles/blender/display_driver.cpp
--- a/intern/cycles/blender/display_driver.h
+++ b/intern/cycles/blender/display_driver.h
@@ -15,10 +15,6 @@
 #include "util/unique_ptr.h"
 #include "util/vector.h"

-typedef struct GPUContext GPUContext;
-typedef struct GPUFence GPUFence;
-typedef struct GPUShader GPUShader;
-
 CCL_NAMESPACE_BEGIN

 /* Base class of shader used for display driver rendering. */
@@ -33,7 +29,7 @@ class BlenderDisplayShader {
  BlenderDisplayShader() = default;
  virtual ~BlenderDisplayShader() = default;

-  virtual GPUShader *bind(int width, int height) = 0;
+  virtual void bind(int width, int height) = 0;
  virtual void unbind() = 0;

  /* Get attribute location for position and texture coordinate respectively.
@@ -44,7 +40,7 @@ class BlenderDisplayShader {
 protected:
  /* Get program of this display shader.
   * NOTE: The shader needs to be bound to have access to this. */
-  virtual GPUShader *get_shader_program() = 0;
+  virtual uint get_shader_program() = 0;

  /* Cached values of various OpenGL resources. */
  int position_attribute_location_ = -1;
@@ -55,16 +51,16 @@ class BlenderDisplayShader {
 * display space shader. */
 class BlenderFallbackDisplayShader : public BlenderDisplayShader {
 public:
-  virtual GPUShader *bind(int width, int height) override;
+  virtual void bind(int width, int height) override;
  virtual void unbind() override;

 protected:
-  virtual GPUShader *get_shader_program() override;
+  virtual uint get_shader_program() override;

  void create_shader_if_needed();
  void destroy_shader();

-  GPUShader *shader_program_ = 0;
+  uint shader_program_ = 0;
  int image_texture_location_ = -1;
  int fullscreen_location_ = -1;

@@ -77,17 +73,17 @@ class BlenderDisplaySpaceShader : public BlenderDisplayShader {
 public:
  BlenderDisplaySpaceShader(BL::RenderEngine &b_engine, BL::Scene &b_scene);

-  virtual GPUShader *bind(int width, int height) override;
+  virtual void bind(int width, int height) override;
  virtual void unbind() override;

 protected:
-  virtual GPUShader *get_shader_program() override;
+  virtual uint get_shader_program() override;

  BL::RenderEngine b_engine_;
  BL::Scene &b_scene_;

  /* Cached values of various OpenGL resources. */
-  GPUShader *shader_program_ = nullptr;
+  uint shader_program_ = 0;
 };

 /* Display driver implementation which is specific for Blender viewport integration. */
@@ -126,9 +122,6 @@ class BlenderDisplayDriver : public DisplayDriver {
  void gpu_context_lock();
  void gpu_context_unlock();

-  /* Create GPU resources used by the display driver. */
-  bool gpu_resources_create();
-
  /* Destroy all GPU resources which are being used by this object. */
  void gpu_resources_destroy();

@@ -144,8 +137,8 @@ class BlenderDisplayDriver : public DisplayDriver {
  struct Tiles;
  unique_ptr<Tiles> tiles_;

-  GPUFence *gpu_render_sync_ = nullptr;
-  GPUFence *gpu_upload_sync_ = nullptr;
+  void *gl_render_sync_ = nullptr;
+  void *gl_upload_sync_ = nullptr;

  float2 zoom_ = make_float2(1.0f, 1.0f);
 };
--- a/intern/cycles/blender/image.cpp
+++ b/intern/cycles/blender/image.cpp
@@ -72,11 +72,6 @@ bool BlenderImageLoader::load_metadata(const ImageDeviceFeatures &, ImageMetaDat
    metadata.colorspace = u_colorspace_raw;
  }
  else {
-    /* In some cases (e.g. T94135), the colorspace setting in Blender gets updated as part of the
-     * metadata queries in this function, so update the colorspace setting here. */
-    PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
-    metadata.colorspace = get_enum_identifier(colorspace_ptr, "name");
-
    if (metadata.channels == 1) {
      metadata.type = IMAGE_DATA_TYPE_BYTE;
    }
--- a/intern/cycles/blender/mesh.cpp
+++ b/intern/cycles/blender/mesh.cpp
@@ -1085,11 +1085,11 @@ static void create_subd_mesh(Scene *scene,
  const int edges_num = b_mesh.edges.length();

  if (edges_num != 0 && b_mesh.edge_creases.length() > 0) {
-    BL::MeshEdgeCreaseLayer creases = b_mesh.edge_creases[0];
-
    size_t num_creases = 0;
+    const float *creases = static_cast<float *>(b_mesh.edge_creases[0].ptr.data);
+
    for (int i = 0; i < edges_num; i++) {
-      if (creases.data[i].value() != 0.0f) {
+      if (creases[i] != 0.0f) {
        num_creases++;
      }
    }
@@ -1098,18 +1098,17 @@ static void create_subd_mesh(Scene *scene,

    const MEdge *edges = static_cast<MEdge *>(b_mesh.edges[0].ptr.data);
    for (int i = 0; i < edges_num; i++) {
-      const float crease = creases.data[i].value();
-      if (crease != 0.0f) {
+      if (creases[i] != 0.0f) {
        const MEdge &b_edge = edges[i];
-        mesh->add_edge_crease(b_edge.v1, b_edge.v2, crease);
+        mesh->add_edge_crease(b_edge.v1, b_edge.v2, creases[i]);
      }
    }
-  }

-  for (BL::MeshVertexCreaseLayer &c : b_mesh.vertex_creases) {
-    for (int i = 0; i < c.data.length(); ++i) {
-      if (c.data[i].value() != 0.0f) {
-        mesh->add_vertex_crease(i, c.data[i].value());
+    for (BL::MeshVertexCreaseLayer &c : b_mesh.vertex_creases) {
+      for (int i = 0; i < c.data.length(); ++i) {
+        if (c.data[i].value() != 0.0f) {
+          mesh->add_vertex_crease(i, c.data[i].value());
+        }
      }
    }
  }
--- a/intern/cycles/blender/python.cpp
+++ b/intern/cycles/blender/python.cpp
@@ -18,6 +18,7 @@
 #include "util/guiding.h"
 #include "util/log.h"
 #include "util/md5.h"
+#include "util/opengl.h"
 #include "util/openimagedenoise.h"
 #include "util/path.h"
 #include "util/string.h"
@@ -25,8 +26,6 @@
 #include "util/tbb.h"
 #include "util/types.h"

-#include "GPU_state.h"
-
 #ifdef WITH_OSL
 #  include "scene/osl.h"

@@ -338,7 +337,7 @@ static PyObject *view_draw_func(PyObject * /*self*/, PyObject *args)
  if (PyLong_AsVoidPtr(pyrv3d)) {
    /* 3d view drawing */
    int viewport[4];
-    GPU_viewport_size_get_i(viewport);
+    glGetIntegerv(GL_VIEWPORT, viewport);

    session->view_draw(viewport[2], viewport[3]);
  }
@@ -479,7 +478,6 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)

    /* Read metadata. */
    bool is_bool_param = false;
-    bool hide_value = !param->validdefault;
    ustring param_label = param->name;

    for (const OSL::OSLQuery::Parameter &metadata : param->metadata) {
@@ -489,9 +487,6 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
          if (metadata.sdefault[0] == "boolean" || metadata.sdefault[0] == "checkBox") {
            is_bool_param = true;
          }
-          else if (metadata.sdefault[0] == "null") {
-            hide_value = true;
-          }
        }
        else if (metadata.name == "label") {
          /* Socket label. */
@@ -601,9 +596,6 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
            if (b_sock.name() != param_label) {
              b_sock.name(param_label.string());
            }
-            if (b_sock.hide_value() != hide_value) {
-              b_sock.hide_value(hide_value);
-            }
            used_sockets.insert(b_sock.ptr.data);
            found_existing = true;
          }
@@ -643,8 +635,6 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
        set_boolean(b_sock.ptr, "default_value", default_boolean);
      }

-      b_sock.hide_value(hide_value);
-
      used_sockets.insert(b_sock.ptr.data);
    }
  }
--- a/intern/cycles/blender/session.cpp
+++ b/intern/cycles/blender/session.cpp
@@ -559,6 +559,11 @@ static bool bake_setup_pass(Scene *scene, const string &bake_type_str, const int
                                     0);
    integrator->set_use_emission((bake_filter & BL::BakeSettings::pass_filter_EMIT) != 0);
  }
+  /* Shadow pass. */
+  else if (strcmp(bake_type, "SHADOW") == 0) {
+    type = PASS_SHADOW;
+    use_direct_light = true;
+  }
  /* Light component passes. */
  else if (strcmp(bake_type, "DIFFUSE") == 0) {
    if ((bake_filter & BL::BakeSettings::pass_filter_DIRECT) &&
--- a/intern/cycles/blender/shader.cpp
+++ b/intern/cycles/blender/shader.cpp
@@ -61,12 +61,6 @@ static DisplacementMethod get_displacement_method(PointerRNA &ptr)
      ptr, "displacement_method", DISPLACE_NUM_METHODS, DISPLACE_BUMP);
 }

-static EmissionSampling get_emission_sampling(PointerRNA &ptr)
-{
-  return (EmissionSampling)get_enum(
-      ptr, "emission_sampling", EMISSION_SAMPLING_NUM, EMISSION_SAMPLING_AUTO);
-}
-
 static int validate_enum_value(int value, int num_values, int default_value)
 {
  if (value >= num_values) {
@@ -1565,7 +1559,7 @@ void BlenderSync::sync_materials(BL::Depsgraph &b_depsgraph, bool update_all)

      /* settings */
      PointerRNA cmat = RNA_pointer_get(&b_mat.ptr, "cycles");
-      shader->set_emission_sampling_method(get_emission_sampling(cmat));
+      shader->set_use_mis(get_boolean(cmat, "sample_as_light"));
      shader->set_use_transparent_shadow(get_boolean(cmat, "use_transparent_shadow"));
      shader->set_heterogeneous_volume(!get_boolean(cmat, "homogeneous_volume"));
      shader->set_volume_sampling_method(get_volume_sampling(cmat));
--- a/intern/cycles/blender/sync.cpp
+++ b/intern/cycles/blender/sync.cpp
@@ -26,6 +26,7 @@
 #include "util/foreach.h"
 #include "util/hash.h"
 #include "util/log.h"
+#include "util/opengl.h"
 #include "util/openimagedenoise.h"

 CCL_NAMESPACE_BEGIN
@@ -347,14 +348,7 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
    integrator->set_motion_blur(view_layer.use_motion_blur);
  }

-  bool use_light_tree = get_boolean(cscene, "use_light_tree");
-  integrator->set_use_light_tree(use_light_tree);
-  integrator->set_light_sampling_threshold(
-      (use_light_tree) ? 0.0f : get_float(cscene, "light_sampling_threshold"));
-
-  if (integrator->use_light_tree_is_modified()) {
-    scene->light_manager->tag_update(scene, LightManager::UPDATE_ALL);
-  }
+  integrator->set_light_sampling_threshold(get_float(cscene, "light_sampling_threshold"));

  SamplingPattern sampling_pattern = (SamplingPattern)get_enum(
      cscene, "sampling_pattern", SAMPLING_NUM_PATTERNS, SAMPLING_PATTERN_PMJ);
@@ -623,6 +617,7 @@ static bool get_known_pass_type(BL::RenderPass &b_pass, PassType &type, PassMode
  MAP_PASS("Emit", PASS_EMISSION, false);
  MAP_PASS("Env", PASS_BACKGROUND, false);
  MAP_PASS("AO", PASS_AO, false);
+  MAP_PASS("Shadow", PASS_SHADOW, false);

  MAP_PASS("BakePrimitive", PASS_BAKE_PRIMITIVE, false);
  MAP_PASS("BakeDifferential", PASS_BAKE_DIFFERENTIAL, false);
--- a/intern/cycles/cmake/external_libs.cmake
+++ b/intern/cycles/cmake/external_libs.cmake
@@ -1,6 +1,584 @@
 # SPDX-License-Identifier: Apache-2.0
 # Copyright 2011-2022 Blender Foundation

+###########################################################################
+# Helper macros
+###########################################################################
+
+macro(_set_default variable value)
+  if(NOT ${variable})
+    set(${variable} ${value})
+  endif()
+endmacro()
+
+###########################################################################
+# Precompiled libraries detection
+#
+# Use precompiled libraries from Blender repository
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY)
+  if(APPLE)
+    if("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "x86_64")
+      set(_cycles_lib_dir "${CMAKE_SOURCE_DIR}/../lib/darwin")
+    else()
+      set(_cycles_lib_dir "${CMAKE_SOURCE_DIR}/../lib/darwin_arm64")
+    endif()
+
+    # Always use system zlib
+    find_package(ZLIB REQUIRED)
+  elseif(WIN32)
+    if(CMAKE_CL_64)
+      set(_cycles_lib_dir "${CMAKE_SOURCE_DIR}/../lib/win64_vc15")
+    else()
+      message(FATAL_ERROR "Unsupported Visual Studio Version")
+    endif()
+  else()
+    # Path to a locally compiled libraries.
+    set(LIBDIR_NAME ${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR})
+    string(TOLOWER ${LIBDIR_NAME} LIBDIR_NAME)
+    set(LIBDIR_NATIVE_ABI ${CMAKE_SOURCE_DIR}/../lib/${LIBDIR_NAME})
+
+    # Path to precompiled libraries with known CentOS 7 ABI.
+    set(LIBDIR_CENTOS7_ABI ${CMAKE_SOURCE_DIR}/../lib/linux_centos7_x86_64)
+
+    # Choose the best suitable libraries.
+    if(EXISTS ${LIBDIR_NATIVE_ABI})
+      set(_cycles_lib_dir ${LIBDIR_NATIVE_ABI})
+    elseif(EXISTS ${LIBDIR_CENTOS7_ABI})
+      set(_cycles_lib_dir ${LIBDIR_CENTOS7_ABI})
+      set(WITH_CXX11_ABI OFF)
+
+      if(CMAKE_COMPILER_IS_GNUCC AND
+         CMAKE_C_COMPILER_VERSION VERSION_LESS 9.3)
+        message(FATAL_ERROR "GCC version must be at least 9.3 for precompiled libraries, found ${CMAKE_C_COMPILER_VERSION}")
+      endif()
+    endif()
+
+    if(DEFINED _cycles_lib_dir)
+      message(STATUS "Using precompiled libraries at ${_cycles_lib_dir}")
+    endif()
+
+    # Avoid namespace pollustion.
+    unset(LIBDIR_NATIVE_ABI)
+    unset(LIBDIR_CENTOS7_ABI)
+  endif()
+
+  if(EXISTS ${_cycles_lib_dir})
+    _set_default(ALEMBIC_ROOT_DIR "${_cycles_lib_dir}/alembic")
+    _set_default(BOOST_ROOT "${_cycles_lib_dir}/boost")
+    _set_default(BLOSC_ROOT_DIR "${_cycles_lib_dir}/blosc")
+    _set_default(EMBREE_ROOT_DIR "${_cycles_lib_dir}/embree")
+    _set_default(EPOXY_ROOT_DIR "${_cycles_lib_dir}/epoxy")
+    _set_default(IMATH_ROOT_DIR "${_cycles_lib_dir}/imath")
+    _set_default(GLEW_ROOT_DIR "${_cycles_lib_dir}/glew")
+    _set_default(JPEG_ROOT "${_cycles_lib_dir}/jpeg")
+    _set_default(LLVM_ROOT_DIR "${_cycles_lib_dir}/llvm")
+    _set_default(CLANG_ROOT_DIR "${_cycles_lib_dir}/llvm")
+    _set_default(NANOVDB_ROOT_DIR "${_cycles_lib_dir}/openvdb")
+    _set_default(OPENCOLORIO_ROOT_DIR "${_cycles_lib_dir}/opencolorio")
+    _set_default(OPENEXR_ROOT_DIR "${_cycles_lib_dir}/openexr")
+    _set_default(OPENIMAGEDENOISE_ROOT_DIR "${_cycles_lib_dir}/openimagedenoise")
+    _set_default(OPENIMAGEIO_ROOT_DIR "${_cycles_lib_dir}/openimageio")
+    _set_default(OPENJPEG_ROOT_DIR "${_cycles_lib_dir}/openjpeg")
+    _set_default(OPENSUBDIV_ROOT_DIR "${_cycles_lib_dir}/opensubdiv")
+    _set_default(OPENVDB_ROOT_DIR "${_cycles_lib_dir}/openvdb")
+    _set_default(OSL_ROOT_DIR "${_cycles_lib_dir}/osl")
+    _set_default(PNG_ROOT "${_cycles_lib_dir}/png")
+    _set_default(PUGIXML_ROOT_DIR "${_cycles_lib_dir}/pugixml")
+    _set_default(SDL2_ROOT_DIR "${_cycles_lib_dir}/sdl")
+    _set_default(TBB_ROOT_DIR "${_cycles_lib_dir}/tbb")
+    _set_default(TIFF_ROOT "${_cycles_lib_dir}/tiff")
+    _set_default(USD_ROOT_DIR "${_cycles_lib_dir}/usd")
+    _set_default(WEBP_ROOT_DIR "${_cycles_lib_dir}/webp")
+    _set_default(ZLIB_ROOT "${_cycles_lib_dir}/zlib")
+    if(WIN32)
+      set(LEVEL_ZERO_ROOT_DIR ${_cycles_lib_dir}/level_zero)
+    else()
+      set(LEVEL_ZERO_ROOT_DIR ${_cycles_lib_dir}/level-zero)
+    endif()
+    _set_default(SYCL_ROOT_DIR "${_cycles_lib_dir}/dpcpp")
+
+    # Ignore system libraries
+    set(CMAKE_IGNORE_PATH "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES};${CMAKE_SYSTEM_INCLUDE_PATH};${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES};${CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES}")
+  else()
+    unset(_cycles_lib_dir)
+  endif()
+endif()
+
+###########################################################################
+# Zlib
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY)
+  if(MSVC AND EXISTS ${_cycles_lib_dir})
+    set(ZLIB_INCLUDE_DIRS ${_cycles_lib_dir}/zlib/include)
+    set(ZLIB_LIBRARIES ${_cycles_lib_dir}/zlib/lib/libz_st.lib)
+    set(ZLIB_INCLUDE_DIR ${_cycles_lib_dir}/zlib/include)
+    set(ZLIB_LIBRARY ${_cycles_lib_dir}/zlib/lib/libz_st.lib)
+    set(ZLIB_DIR ${_cycles_lib_dir}/zlib)
+    set(ZLIB_FOUND ON)
+  elseif(NOT APPLE)
+    find_package(ZLIB REQUIRED)
+  endif()
+endif()
+
+###########################################################################
+# PThreads
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY)
+  if(MSVC AND EXISTS ${_cycles_lib_dir})
+    set(PTHREADS_LIBRARIES "${_cycles_lib_dir}/pthreads/lib/pthreadVC3.lib")
+    include_directories("${_cycles_lib_dir}/pthreads/include")
+  else()
+    set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
+    find_package(Threads REQUIRED)
+    set(PTHREADS_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
+  endif()
+endif()
+
+###########################################################################
+# OpenImageIO and image libraries
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY)
+  if(MSVC AND EXISTS ${_cycles_lib_dir})
+    add_definitions(
+      # OIIO changed the name of this define in newer versions
+      # we define both, so it would work with both old and new
+      # versions.
+      -DOIIO_STATIC_BUILD
+      -DOIIO_STATIC_DEFINE
+    )
+
+    set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO_ROOT_DIR}/include)
+    set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR} ${OPENIMAGEIO_INCLUDE_DIR}/OpenImageIO)
+    # Special exceptions for libraries which needs explicit debug version
+    set(OPENIMAGEIO_LIBRARIES
+      optimized ${OPENIMAGEIO_ROOT_DIR}/lib/OpenImageIO.lib
+      optimized ${OPENIMAGEIO_ROOT_DIR}/lib/OpenImageIO_Util.lib
+      debug ${OPENIMAGEIO_ROOT_DIR}/lib/OpenImageIO_d.lib
+      debug ${OPENIMAGEIO_ROOT_DIR}/lib/OpenImageIO_Util_d.lib
+    )
+
+    set(PUGIXML_INCLUDE_DIR ${PUGIXML_ROOT_DIR}/include)
+    set(PUGIXML_LIBRARIES
+      optimized ${PUGIXML_ROOT_DIR}/lib/pugixml.lib
+      debug ${PUGIXML_ROOT_DIR}/lib/pugixml_d.lib
+    )
+  else()
+    find_package(OpenImageIO REQUIRED)
+    if(OPENIMAGEIO_PUGIXML_FOUND)
+      set(PUGIXML_INCLUDE_DIR "${OPENIMAGEIO_INCLUDE_DIR}/OpenImageIO")
+      set(PUGIXML_LIBRARIES "")
+    else()
+      find_package(PugiXML REQUIRED)
+    endif()
+  endif()
+
+  # Dependencies
+  if(MSVC AND EXISTS ${_cycles_lib_dir})
+    set(OPENJPEG_INCLUDE_DIR ${OPENJPEG}/include/openjpeg-2.3)
+    set(OPENJPEG_LIBRARIES ${_cycles_lib_dir}/openjpeg/lib/openjp2${CMAKE_STATIC_LIBRARY_SUFFIX})
+  else()
+    find_package(OpenJPEG REQUIRED)
+  endif()
+
+  find_package(JPEG REQUIRED)
+  find_package(TIFF REQUIRED)
+  find_package(WebP)
+
+  if(EXISTS ${_cycles_lib_dir})
+    set(PNG_NAMES png16 libpng16 png libpng)
+  endif()
+  find_package(PNG REQUIRED)
+endif()
+
+###########################################################################
+# OpenEXR
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY)
+  if(MSVC AND EXISTS ${_cycles_lib_dir})
+    set(OPENEXR_INCLUDE_DIR ${OPENEXR_ROOT_DIR}/include)
+    set(OPENEXR_INCLUDE_DIRS ${OPENEXR_INCLUDE_DIR} ${OPENEXR_ROOT_DIR}/include/OpenEXR ${IMATH_ROOT_DIR}/include ${IMATH_ROOT_DIR}/include/Imath)
+    set(OPENEXR_LIBRARIES
+      optimized ${OPENEXR_ROOT_DIR}/lib/OpenEXR_s.lib
+      optimized ${OPENEXR_ROOT_DIR}/lib/OpenEXRCore_s.lib
+      optimized ${OPENEXR_ROOT_DIR}/lib/Iex_s.lib
+      optimized ${IMATH_ROOT_DIR}/lib/Imath_s.lib
+      optimized ${OPENEXR_ROOT_DIR}/lib/IlmThread_s.lib
+      debug ${OPENEXR_ROOT_DIR}/lib/OpenEXR_s_d.lib
+      debug ${OPENEXR_ROOT_DIR}/lib/OpenEXRCore_s_d.lib
+      debug ${OPENEXR_ROOT_DIR}/lib/Iex_s_d.lib
+      debug ${IMATH_ROOT_DIR}/lib/Imath_s_d.lib
+      debug ${OPENEXR_ROOT_DIR}/lib/IlmThread_s_d.lib
+    )
+  else()
+    find_package(OpenEXR REQUIRED)
+  endif()
+endif()
+
+###########################################################################
+# OpenShadingLanguage & LLVM
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_OSL)
+  if(EXISTS ${_cycles_lib_dir})
+    set(LLVM_STATIC ON)
+  endif()
+
+  if(MSVC AND EXISTS ${_cycles_lib_dir})
+    # TODO(sergey): On Windows llvm-config doesn't give proper results for the
+    # library names, use hardcoded libraries for now.
+    file(GLOB _llvm_libs_release ${LLVM_ROOT_DIR}/lib/*.lib)
+    file(GLOB _llvm_libs_debug ${LLVM_ROOT_DIR}/debug/lib/*.lib)
+    set(_llvm_libs)
+    foreach(_llvm_lib_path ${_llvm_libs_release})
+      get_filename_component(_llvm_lib_name ${_llvm_lib_path} ABSOLUTE)
+      list(APPEND _llvm_libs optimized ${_llvm_lib_name})
+    endforeach()
+    foreach(_llvm_lib_path ${_llvm_libs_debug})
+      get_filename_component(_llvm_lib_name ${_llvm_lib_path} ABSOLUTE)
+      list(APPEND _llvm_libs debug ${_llvm_lib_name})
+    endforeach()
+    set(LLVM_LIBRARY ${_llvm_libs})
+    unset(_llvm_lib_name)
+    unset(_llvm_lib_path)
+    unset(_llvm_libs)
+    unset(_llvm_libs_debug)
+    unset(_llvm_libs_release)
+
+    set(OSL_INCLUDE_DIR ${OSL_ROOT_DIR}/include)
+    set(OSL_LIBRARIES
+      optimized ${OSL_ROOT_DIR}/lib/oslcomp.lib
+      optimized ${OSL_ROOT_DIR}/lib/oslexec.lib
+      optimized ${OSL_ROOT_DIR}/lib/oslquery.lib
+      debug ${OSL_ROOT_DIR}/lib/oslcomp_d.lib
+      debug ${OSL_ROOT_DIR}/lib/oslexec_d.lib
+      debug ${OSL_ROOT_DIR}/lib/oslquery_d.lib
+      ${PUGIXML_LIBRARIES}
+    )
+
+    find_program(OSL_COMPILER NAMES oslc PATHS ${OSL_ROOT_DIR}/bin)
+  else()
+    find_package(OSL REQUIRED)
+    find_package(LLVM REQUIRED)
+    find_package(Clang REQUIRED)
+  endif()
+endif()
+
+###########################################################################
+# OpenPGL
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_PATH_GUIDING)
+  if(NOT openpgl_DIR AND EXISTS ${_cycles_lib_dir})
+    set(openpgl_DIR ${_cycles_lib_dir}/openpgl/lib/cmake/openpgl)
+  endif()
+
+  find_package(openpgl QUIET)
+  if(openpgl_FOUND)
+    if(WIN32)
+      get_target_property(OPENPGL_LIBRARIES_RELEASE openpgl::openpgl LOCATION_RELEASE)
+      get_target_property(OPENPGL_LIBRARIES_DEBUG openpgl::openpgl LOCATION_DEBUG)
+      set(OPENPGL_LIBRARIES optimized ${OPENPGL_LIBRARIES_RELEASE} debug ${OPENPGL_LIBRARIES_DEBUG})
+    else()
+      get_target_property(OPENPGL_LIBRARIES openpgl::openpgl LOCATION)
+    endif()
+    get_target_property(OPENPGL_INCLUDE_DIR openpgl::openpgl INTERFACE_INCLUDE_DIRECTORIES)
+  else()
+    set_and_warn_library_found("OpenPGL" openpgl_FOUND WITH_CYCLES_PATH_GUIDING)
+  endif()
+endif()
+
+###########################################################################
+# OpenColorIO
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_OPENCOLORIO)
+  set(WITH_OPENCOLORIO ON)
+
+  if(NOT USD_OVERRIDE_OPENCOLORIO)
+    if(MSVC AND EXISTS ${_cycles_lib_dir})
+      set(OPENCOLORIO_INCLUDE_DIRS ${OPENCOLORIO_ROOT_DIR}/include)
+      set(OPENCOLORIO_LIBRARIES
+        optimized ${OPENCOLORIO_ROOT_DIR}/lib/OpenColorIO.lib
+        optimized ${OPENCOLORIO_ROOT_DIR}/lib/libyaml-cpp.lib
+        optimized ${OPENCOLORIO_ROOT_DIR}/lib/libexpatMD.lib
+        optimized ${OPENCOLORIO_ROOT_DIR}/lib/pystring.lib
+        debug ${OPENCOLORIO_ROOT_DIR}/lib/OpencolorIO_d.lib
+        debug ${OPENCOLORIO_ROOT_DIR}/lib/libyaml-cpp_d.lib
+        debug ${OPENCOLORIO_ROOT_DIR}/lib/libexpatdMD.lib
+        debug ${OPENCOLORIO_ROOT_DIR}/lib/pystring_d.lib
+      )
+    else()
+      find_package(OpenColorIO REQUIRED)
+    endif()
+  endif()
+endif()
+
+###########################################################################
+# Boost
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY)
+  if(EXISTS ${_cycles_lib_dir})
+    if(MSVC)
+      set(Boost_USE_STATIC_RUNTIME OFF)
+      set(Boost_USE_MULTITHREADED ON)
+      set(Boost_USE_STATIC_LIBS ON)
+    else()
+      set(BOOST_LIBRARYDIR ${_cycles_lib_dir}/boost/lib)
+      set(Boost_NO_BOOST_CMAKE ON)
+      set(Boost_NO_SYSTEM_PATHS ON)
+    endif()
+  endif()
+
+  if(MSVC AND EXISTS ${_cycles_lib_dir})
+    set(BOOST_INCLUDE_DIR ${BOOST_ROOT}/include)
+    set(BOOST_VERSION_HEADER ${BOOST_INCLUDE_DIR}/boost/version.hpp)
+    if(EXISTS ${BOOST_VERSION_HEADER})
+      file(STRINGS "${BOOST_VERSION_HEADER}" BOOST_LIB_VERSION REGEX "#define BOOST_LIB_VERSION ")
+      if(BOOST_LIB_VERSION MATCHES "#define BOOST_LIB_VERSION \"([0-9_]+)\"")
+        set(BOOST_VERSION "${CMAKE_MATCH_1}")
+      endif()
+    endif()
+    if(NOT BOOST_VERSION)
+      message(FATAL_ERROR "Unable to determine Boost version")
+    endif()
+    set(BOOST_POSTFIX "vc142-mt-x64-${BOOST_VERSION}.lib")
+    set(BOOST_DEBUG_POSTFIX "vc142-mt-gd-x64-${BOOST_VERSION}.lib")
+    set(BOOST_LIBRARIES
+      optimized ${BOOST_ROOT}/lib/libboost_date_time-${BOOST_POSTFIX}
+      optimized ${BOOST_ROOT}/lib/libboost_iostreams-${BOOST_POSTFIX}
+      optimized ${BOOST_ROOT}/lib/libboost_filesystem-${BOOST_POSTFIX}
+      optimized ${BOOST_ROOT}/lib/libboost_regex-${BOOST_POSTFIX}
+      optimized ${BOOST_ROOT}/lib/libboost_system-${BOOST_POSTFIX}
+      optimized ${BOOST_ROOT}/lib/libboost_thread-${BOOST_POSTFIX}
+      optimized ${BOOST_ROOT}/lib/libboost_chrono-${BOOST_POSTFIX}
+      debug ${BOOST_ROOT}/lib/libboost_date_time-${BOOST_DEBUG_POSTFIX}
+      debug ${BOOST_ROOT}/lib/libboost_iostreams-${BOOST_DEBUG_POSTFIX}
+      debug ${BOOST_ROOT}/lib/libboost_filesystem-${BOOST_DEBUG_POSTFIX}
+      debug ${BOOST_ROOT}/lib/libboost_regex-${BOOST_DEBUG_POSTFIX}
+      debug ${BOOST_ROOT}/lib/libboost_system-${BOOST_DEBUG_POSTFIX}
+      debug ${BOOST_ROOT}/lib/libboost_thread-${BOOST_DEBUG_POSTFIX}
+      debug ${BOOST_ROOT}/lib/libboost_chrono-${BOOST_DEBUG_POSTFIX}
+    )
+    if(WITH_CYCLES_OSL)
+      set(BOOST_LIBRARIES ${BOOST_LIBRARIES}
+        optimized ${BOOST_ROOT}/lib/libboost_wave-${BOOST_POSTFIX}
+        debug ${BOOST_ROOT}/lib/libboost_wave-${BOOST_DEBUG_POSTFIX})
+    endif()
+  else()
+    set(__boost_packages iostreams filesystem regex system thread date_time)
+    if(WITH_CYCLES_OSL)
+      list(APPEND __boost_packages wave)
+    endif()
+    find_package(Boost 1.48 COMPONENTS ${__boost_packages} REQUIRED)
+    if(NOT Boost_FOUND)
+      # Try to find non-multithreaded if -mt not found, this flag
+      # doesn't matter for us, it has nothing to do with thread
+      # safety, but keep it to not disturb build setups.
+      set(Boost_USE_MULTITHREADED OFF)
+      find_package(Boost 1.48 COMPONENTS ${__boost_packages})
+    endif()
+    unset(__boost_packages)
+
+    set(BOOST_INCLUDE_DIR ${Boost_INCLUDE_DIRS})
+    set(BOOST_LIBRARIES ${Boost_LIBRARIES})
+    set(BOOST_LIBPATH ${Boost_LIBRARY_DIRS})
+  endif()
+
+  set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB ${BOOST_DEFINITIONS}")
+endif()
+
+###########################################################################
+# Embree
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_EMBREE)
+  if(MSVC AND EXISTS ${_cycles_lib_dir})
+    set(EMBREE_INCLUDE_DIRS ${EMBREE_ROOT_DIR}/include)
+    set(EMBREE_LIBRARIES
+      optimized ${EMBREE_ROOT_DIR}/lib/embree3.lib
+      optimized ${EMBREE_ROOT_DIR}/lib/embree_avx2.lib
+      optimized ${EMBREE_ROOT_DIR}/lib/embree_avx.lib
+      optimized ${EMBREE_ROOT_DIR}/lib/embree_sse42.lib
+      optimized ${EMBREE_ROOT_DIR}/lib/lexers.lib
+      optimized ${EMBREE_ROOT_DIR}/lib/math.lib
+      optimized ${EMBREE_ROOT_DIR}/lib/simd.lib
+      optimized ${EMBREE_ROOT_DIR}/lib/tasking.lib
+      optimized ${EMBREE_ROOT_DIR}/lib/sys.lib
+      debug ${EMBREE_ROOT_DIR}/lib/embree3_d.lib
+      debug ${EMBREE_ROOT_DIR}/lib/embree_avx2_d.lib
+      debug ${EMBREE_ROOT_DIR}/lib/embree_avx_d.lib
+      debug ${EMBREE_ROOT_DIR}/lib/embree_sse42_d.lib
+      debug ${EMBREE_ROOT_DIR}/lib/lexers_d.lib
+      debug ${EMBREE_ROOT_DIR}/lib/math_d.lib
+      debug ${EMBREE_ROOT_DIR}/lib/simd_d.lib
+      debug ${EMBREE_ROOT_DIR}/lib/sys_d.lib
+      debug ${EMBREE_ROOT_DIR}/lib/tasking_d.lib
+    )
+  else()
+    find_package(Embree 3.8.0 REQUIRED)
+  endif()
+endif()
+
+###########################################################################
+# Logging
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_LOGGING)
+  find_package(Glog REQUIRED)
+  find_package(Gflags REQUIRED)
+endif()
+
+###########################################################################
+# OpenSubdiv
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_OPENSUBDIV)
+  set(WITH_OPENSUBDIV ON)
+
+  if(NOT USD_OVERRIDE_OPENSUBDIV)
+    if(MSVC AND EXISTS ${_cycles_lib_dir})
+      set(OPENSUBDIV_INCLUDE_DIRS ${OPENSUBDIV_ROOT_DIR}/include)
+      set(OPENSUBDIV_LIBRARIES
+        optimized ${OPENSUBDIV_ROOT_DIR}/lib/osdCPU.lib
+        optimized ${OPENSUBDIV_ROOT_DIR}/lib/osdGPU.lib
+        debug ${OPENSUBDIV_ROOT_DIR}/lib/osdCPU_d.lib
+        debug ${OPENSUBDIV_ROOT_DIR}/lib/osdGPU_d.lib
+      )
+    else()
+      find_package(OpenSubdiv REQUIRED)
+    endif()
+  endif()
+endif()
+
+###########################################################################
+# OpenVDB
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_OPENVDB)
+  set(WITH_OPENVDB ON)
+  set(OPENVDB_DEFINITIONS -DNOMINMAX -D_USE_MATH_DEFINES)
+
+  if(NOT USD_OVERRIDE_OPENVDB)
+    find_package(OpenVDB REQUIRED)
+
+    if(MSVC AND EXISTS ${_cycles_lib_dir})
+      set(BLOSC_LIBRARY
+        optimized ${BLOSC_ROOT_DIR}/lib/libblosc.lib
+        debug ${BLOSC_ROOT_DIR}/lib/libblosc_d.lib
+      )
+    else()
+      find_package(Blosc REQUIRED)
+    endif()
+  endif()
+endif()
+
+###########################################################################
+# NanoVDB
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_NANOVDB)
+  set(WITH_NANOVDB ON)
+
+  if(MSVC AND EXISTS ${_cycles_lib_dir})
+    set(NANOVDB_INCLUDE_DIR ${NANOVDB_ROOT_DIR}/include)
+    set(NANOVDB_INCLUDE_DIRS ${NANOVDB_INCLUDE_DIR})
+  else()
+    find_package(NanoVDB REQUIRED)
+  endif()
+endif()
+
+###########################################################################
+# OpenImageDenoise
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY AND WITH_CYCLES_OPENIMAGEDENOISE)
+  set(WITH_OPENIMAGEDENOISE ON)
+
+  if(MSVC AND EXISTS ${_cycles_lib_dir})
+    set(OPENIMAGEDENOISE_INCLUDE_DIRS ${OPENIMAGEDENOISE_ROOT_DIR}/include)
+    set(OPENIMAGEDENOISE_LIBRARIES
+      optimized ${OPENIMAGEDENOISE_ROOT_DIR}/lib/OpenImageDenoise.lib
+      optimized ${OPENIMAGEDENOISE_ROOT_DIR}/lib/common.lib
+      optimized ${OPENIMAGEDENOISE_ROOT_DIR}/lib/dnnl.lib
+      debug ${OPENIMAGEDENOISE_ROOT_DIR}/lib/OpenImageDenoise_d.lib
+      debug ${OPENIMAGEDENOISE_ROOT_DIR}/lib/common_d.lib
+      debug ${OPENIMAGEDENOISE_ROOT_DIR}/lib/dnnl_d.lib
+    )
+  else()
+    find_package(OpenImageDenoise REQUIRED)
+  endif()
+endif()
+
+###########################################################################
+# TBB
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY)
+  if(NOT USD_OVERRIDE_TBB)
+    if(MSVC AND EXISTS ${_cycles_lib_dir})
+      set(TBB_INCLUDE_DIRS ${TBB_ROOT_DIR}/include)
+      set(TBB_LIBRARIES
+        optimized ${TBB_ROOT_DIR}/lib/tbb.lib
+        debug ${TBB_ROOT_DIR}/lib/tbb_debug.lib
+      )
+    else()
+      find_package(TBB REQUIRED)
+    endif()
+  endif()
+endif()
+
+###########################################################################
+# Epoxy
+###########################################################################
+
+if(CYCLES_STANDALONE_REPOSITORY)
+  if((WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI) OR
+     WITH_CYCLES_HYDRA_RENDER_DELEGATE)
+    if(MSVC AND EXISTS ${_cycles_lib_dir})
+      set(Epoxy_LIBRARIES "${_cycles_lib_dir}/epoxy/lib/epoxy.lib")
+      set(Epoxy_INCLUDE_DIRS "${_cycles_lib_dir}/epoxy/include")
+    else()
+      find_package(Epoxy REQUIRED)
+    endif()
+  endif()
+endif()
+
+###########################################################################
+# Alembic
+###########################################################################
+
+if(WITH_CYCLES_ALEMBIC)
+  if(CYCLES_STANDALONE_REPOSITORY)
+    if(MSVC AND EXISTS ${_cycles_lib_dir})
+      set(ALEMBIC_INCLUDE_DIRS ${_cycles_lib_dir}/alembic/include)
+      set(ALEMBIC_LIBRARIES
+        optimized ${_cycles_lib_dir}/alembic/lib/Alembic.lib
+        debug ${_cycles_lib_dir}/alembic/lib/Alembic_d.lib)
+    else()
+      find_package(Alembic REQUIRED)
+    endif()
+
+    set(WITH_ALEMBIC ON)
+  endif()
+endif()
+
+###########################################################################
+# System Libraries
+###########################################################################
+
+# Detect system libraries again
+if(EXISTS ${_cycles_lib_dir})
+  unset(CMAKE_IGNORE_PATH)
+  unset(_cycles_lib_dir)
+endif()
+
 ###########################################################################
 # SDL
 ###########################################################################
@@ -109,3 +687,5 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
    set(WITH_CYCLES_DEVICE_ONEAPI OFF)
  endif()
 endif()
+
+unset(_cycles_lib_dir)
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -8,13 +8,28 @@ set(INC
 set(INC_SYS )

 if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
-  if(NOT WITH_CUDA_DYNLOAD)
+  if(WITH_CUDA_DYNLOAD)
+    list(APPEND INC
+      ../../../extern/cuew/include
+    )
+    add_definitions(-DWITH_CUDA_DYNLOAD)
+  else()
+    list(APPEND INC_SYS
+      ${CUDA_TOOLKIT_INCLUDE}
+    )
    add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
  endif()

  add_definitions(-DCYCLES_RUNTIME_OPTIX_ROOT_DIR="${CYCLES_RUNTIME_OPTIX_ROOT_DIR}")
 endif()

+if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
+  list(APPEND INC
+    ../../../extern/hipew/include
+  )
+  add_definitions(-DWITH_HIP_DYNLOAD)
+endif()
+
 set(SRC_BASE
  device.cpp
  denoise.cpp
@@ -153,15 +168,24 @@ if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
  )
 endif()

+if(WITH_CYCLES_DEVICE_CUDA)
+  add_definitions(-DWITH_CUDA)
+endif()
+if(WITH_CYCLES_DEVICE_HIP)
+  add_definitions(-DWITH_HIP)
+endif()
+if(WITH_CYCLES_DEVICE_OPTIX)
+  add_definitions(-DWITH_OPTIX)
+endif()
 if(WITH_CYCLES_DEVICE_METAL)
  list(APPEND LIB
    ${METAL_LIBRARY}
  )
+  add_definitions(-DWITH_METAL)
  list(APPEND SRC
    ${SRC_METAL}
  )
 endif()
-
 if (WITH_CYCLES_DEVICE_ONEAPI)
  if(WITH_CYCLES_ONEAPI_BINARIES)
    set(cycles_kernel_oneapi_lib_suffix "_aot")
@@ -179,6 +203,7 @@ if (WITH_CYCLES_DEVICE_ONEAPI)
  else()
    list(APPEND LIB ${SYCL_LIBRARY})
  endif()
+  add_definitions(-DWITH_ONEAPI)
  list(APPEND SRC
    ${SRC_ONEAPI}
  )
--- a/intern/cycles/device/cuda/graphics_interop.h
+++ b/intern/cycles/device/cuda/graphics_interop.h
@@ -38,7 +38,7 @@ class CUDADeviceGraphicsInterop : public DeviceGraphicsInterop {
  CUDADevice *device_ = nullptr;

  /* OpenGL PBO which is currently registered as the destination for the CUDA buffer. */
-  int64_t opengl_pbo_id_ = 0;
+  uint opengl_pbo_id_ = 0;
  /* Buffer area in pixels of the corresponding PBO. */
  int64_t buffer_area_ = 0;

--- a/intern/cycles/device/denoise.h
+++ b/intern/cycles/device/denoise.h
@@ -78,4 +78,24 @@ class DenoiseParams : public Node {
  }
 };

+/* All the parameters needed to perform buffer denoising on a device.
+ * Is not really a task in its canonical terms (as in, is not an asynchronous running task). Is
+ * more like a wrapper for all the arguments and parameters needed to perform denoising. Is a
+ * single place where they are all listed, so that it's not required to modify all device methods
+ * when these parameters do change. */
+class DeviceDenoiseTask {
+ public:
+  DenoiseParams params;
+
+  int num_samples;
+
+  RenderBuffers *render_buffers;
+  BufferParams buffer_params;
+
+  /* Allow to do in-place modification of the input passes (scaling them down i.e.). This will
+   * lower the memory footprint of the denoiser but will make input passes "invalid" (from path
+   * tracer) point of view. */
+  bool allow_inplace_modification;
+};
+
 CCL_NAMESPACE_END
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -351,7 +351,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
  info.num = 0;

  info.has_nanovdb = true;
-  info.has_light_tree = true;
  info.has_osl = true;
  info.has_guiding = true;
  info.has_profiling = true;
@@ -400,7 +399,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,

    /* Accumulate device info. */
    info.has_nanovdb &= device.has_nanovdb;
-    info.has_light_tree &= device.has_light_tree;
    info.has_osl &= device.has_osl;
    info.has_guiding &= device.has_guiding;
    info.has_profiling &= device.has_profiling;
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -65,7 +65,6 @@ class DeviceInfo {
  int num;
  bool display_device;        /* GPU is used as a display device. */
  bool has_nanovdb;           /* Support NanoVDB volumes. */
-  bool has_light_tree;        /* Support light tree. */
  bool has_osl;               /* Support Open Shading Language. */
  bool has_guiding;           /* Support path guiding. */
  bool has_profiling;         /* Supports runtime collection of profiling info. */
@@ -85,7 +84,6 @@ class DeviceInfo {
    cpu_threads = 0;
    display_device = false;
    has_nanovdb = false;
-    has_light_tree = true;
    has_osl = false;
    has_guiding = false;
    has_profiling = false;
@@ -235,6 +233,21 @@ class Device {
    return nullptr;
  }

+  /* Buffer denoising. */
+
+  /* Returns true if task is fully handled. */
+  virtual bool denoise_buffer(const DeviceDenoiseTask & /*task*/)
+  {
+    LOG(ERROR) << "Request buffer denoising from a device which does not support it.";
+    return false;
+  }
+
+  virtual DeviceQueue *get_denoise_queue()
+  {
+    LOG(ERROR) << "Request denoising queue from a device which does not support it.";
+    return nullptr;
+  }
+
  /* Sub-devices */

  /* Run given callback for every individual device which will be handling rendering.
--- a/intern/cycles/device/hip/device.cpp
+++ b/intern/cycles/device/hip/device.cpp
@@ -137,7 +137,6 @@ void device_hip_info(vector<DeviceInfo> &devices)
    info.num = num;

    info.has_nanovdb = true;
-    info.has_light_tree = false;
    info.denoisers = 0;

    info.has_gpu_queue = true;
--- a/intern/cycles/device/hip/graphics_interop.h
+++ b/intern/cycles/device/hip/graphics_interop.h
@@ -36,7 +36,7 @@ class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
  HIPDevice *device_ = nullptr;

  /* OpenGL PBO which is currently registered as the destination for the HIP buffer. */
-  int64_t opengl_pbo_id_ = 0;
+  uint opengl_pbo_id_ = 0;
  /* Buffer area in pixels of the corresponding PBO. */
  int64_t buffer_area_ = 0;

--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -117,8 +117,6 @@ class MetalDevice : public Device {
  /* ------------------------------------------------------------------ */
  /* low-level memory management */

-  bool max_working_set_exceeded(size_t safety_margin = 8 * 1024 * 1024) const;
-
  MetalMem *generic_alloc(device_memory &mem);

  void generic_copy_to(device_memory &mem);
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -446,14 +446,6 @@ void MetalDevice::erase_allocation(device_memory &mem)
  }
 }

-bool MetalDevice::max_working_set_exceeded(size_t safety_margin) const
-{
-  /* We're allowed to allocate beyond the safe working set size, but then if all resources are made
-   * resident we will get command buffer failures at render time. */
-  size_t available = [mtlDevice recommendedMaxWorkingSetSize] - safety_margin;
-  return (stats.mem_used > available);
-}
-
 MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem)
 {
  size_t size = mem.memory_size();
@@ -531,11 +523,6 @@ MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem)
    mmem->use_UMA = false;
  }

-  if (max_working_set_exceeded()) {
-    set_error("System is out of GPU memory");
-    return nullptr;
-  }
-
  return mmem;
 }

@@ -934,8 +921,9 @@ void MetalDevice::tex_alloc(device_texture &mem)
              << string_human_readable_size(mem.memory_size()) << ")";

    mtlTexture = [mtlDevice newTextureWithDescriptor:desc];
+    assert(mtlTexture);
+
    if (!mtlTexture) {
-      set_error("System is out of GPU memory");
      return;
    }

@@ -967,10 +955,7 @@ void MetalDevice::tex_alloc(device_texture &mem)
              << string_human_readable_size(mem.memory_size()) << ")";

    mtlTexture = [mtlDevice newTextureWithDescriptor:desc];
-    if (!mtlTexture) {
-      set_error("System is out of GPU memory");
-      return;
-    }
+    assert(mtlTexture);

    [mtlTexture replaceRegion:MTLRegionMake2D(0, 0, mem.data_width, mem.data_height)
                  mipmapLevel:0
@@ -1032,10 +1017,6 @@ void MetalDevice::tex_alloc(device_texture &mem)
  need_texture_info = true;

  texture_info[slot].data = uint64_t(slot) | (sampler_index << 32);
-
-  if (max_working_set_exceeded()) {
-    set_error("System is out of GPU memory");
-  }
 }

 void MetalDevice::tex_free(device_texture &mem)
@@ -1096,10 +1077,6 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
      }
    }
  }
-
-  if (max_working_set_exceeded()) {
-    set_error("System is out of GPU memory");
-  }
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/device/oneapi/device.cpp
+++ b/intern/cycles/device/oneapi/device.cpp
@@ -31,8 +31,6 @@ bool device_oneapi_init()
   * improves stability as of intel/LLVM SYCL-nightly/20220529.
   * All these env variable can be set beforehand by end-users and
   * will in that case -not- be overwritten. */
-  /* By default, enable only Level-Zero and if all devices are allowed, also CUDA and HIP.
-   * OpenCL backend isn't currently well supported. */
 #  ifdef _WIN32
  if (getenv("SYCL_CACHE_PERSISTENT") == nullptr) {
    _putenv_s("SYCL_CACHE_PERSISTENT", "1");
@@ -41,12 +39,7 @@ bool device_oneapi_init()
    _putenv_s("SYCL_CACHE_THRESHOLD", "0");
  }
  if (getenv("SYCL_DEVICE_FILTER") == nullptr) {
-    if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
-      _putenv_s("SYCL_DEVICE_FILTER", "level_zero");
-    }
-    else {
-      _putenv_s("SYCL_DEVICE_FILTER", "level_zero,cuda,hip");
-    }
+    _putenv_s("SYCL_DEVICE_FILTER", "level_zero");
  }
  if (getenv("SYCL_ENABLE_PCI") == nullptr) {
    _putenv_s("SYCL_ENABLE_PCI", "1");
@@ -57,12 +50,7 @@ bool device_oneapi_init()
 #  elif __linux__
  setenv("SYCL_CACHE_PERSISTENT", "1", false);
  setenv("SYCL_CACHE_THRESHOLD", "0", false);
-  if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
-    setenv("SYCL_DEVICE_FILTER", "level_zero", false);
-  }
-  else {
-    setenv("SYCL_DEVICE_FILTER", "level_zero,cuda,hip", false);
-  }
+  setenv("SYCL_DEVICE_FILTER", "level_zero", false);
  setenv("SYCL_ENABLE_PCI", "1", false);
  setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
 #  endif
--- a/intern/cycles/device/oneapi/device_impl.cpp
+++ b/intern/cycles/device/oneapi/device_impl.cpp
@@ -430,9 +430,9 @@ void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_
  sycl::usm::alloc usm_type = get_pointer_type(usm_ptr, queue->get_context());
  (void)usm_type;
  assert(usm_type == sycl::usm::alloc::device ||
-         (usm_type == sycl::usm::alloc::host &&
-          (allow_host || device_type == sycl::info::device_type::cpu)) ||
-         usm_type == sycl::usm::alloc::unknown);
+         ((device_type == sycl::info::device_type::cpu || allow_host) &&
+              usm_type == sycl::usm::alloc::host ||
+          usm_type == sycl::usm::alloc::unknown));
 #  else
  /* Silence warning about unused arguments. */
  (void)queue_;
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -1,15 +1,16 @@
 /* SPDX-License-Identifier: Apache-2.0
- * Copyright 2019, NVIDIA Corporation
- * Copyright 2019-2022 Blender Foundation */
+ * Copyright 2019, NVIDIA Corporation.
+ * Copyright 2019-2022 Blender Foundation. */

 #ifdef WITH_OPTIX

 #  include "device/optix/device_impl.h"
-#  include "device/optix/queue.h"

 #  include "bvh/bvh.h"
 #  include "bvh/optix.h"

+#  include "integrator/pass_accessor_gpu.h"
+
 #  include "scene/hair.h"
 #  include "scene/mesh.h"
 #  include "scene/object.h"
@@ -28,8 +29,197 @@
 #  define __KERNEL_OPTIX__
 #  include "kernel/device/optix/globals.h"

+#  include <optix_denoiser_tiling.h>
+
 CCL_NAMESPACE_BEGIN

+// A minimal copy of functionality `optix_denoiser_tiling.h` which allows to fix integer overflow
+// issues without bumping SDK or driver requirement.
+//
+// The original code is Copyright NVIDIA Corporation, BSD-3-Clause.
+namespace {
+
+#  if OPTIX_ABI_VERSION >= 60
+using ::optixUtilDenoiserInvokeTiled;
+#  else
+static OptixResult optixUtilDenoiserSplitImage(const OptixImage2D &input,
+                                               const OptixImage2D &output,
+                                               unsigned int overlapWindowSizeInPixels,
+                                               unsigned int tileWidth,
+                                               unsigned int tileHeight,
+                                               std::vector<OptixUtilDenoiserImageTile> &tiles)
+{
+  if (tileWidth == 0 || tileHeight == 0)
+    return OPTIX_ERROR_INVALID_VALUE;
+
+  unsigned int inPixelStride = optixUtilGetPixelStride(input);
+  unsigned int outPixelStride = optixUtilGetPixelStride(output);
+
+  int inp_w = std::min(tileWidth + 2 * overlapWindowSizeInPixels, input.width);
+  int inp_h = std::min(tileHeight + 2 * overlapWindowSizeInPixels, input.height);
+  int inp_y = 0, copied_y = 0;
+
+  do {
+    int inputOffsetY = inp_y == 0 ? 0 :
+                                    std::max((int)overlapWindowSizeInPixels,
+                                             inp_h - ((int)input.height - inp_y));
+    int copy_y = inp_y == 0 ? std::min(input.height, tileHeight + overlapWindowSizeInPixels) :
+                              std::min(tileHeight, input.height - copied_y);
+
+    int inp_x = 0, copied_x = 0;
+    do {
+      int inputOffsetX = inp_x == 0 ? 0 :
+                                      std::max((int)overlapWindowSizeInPixels,
+                                               inp_w - ((int)input.width - inp_x));
+      int copy_x = inp_x == 0 ? std::min(input.width, tileWidth + overlapWindowSizeInPixels) :
+                                std::min(tileWidth, input.width - copied_x);
+
+      OptixUtilDenoiserImageTile tile;
+      tile.input.data = input.data + (size_t)(inp_y - inputOffsetY) * input.rowStrideInBytes +
+                        +(size_t)(inp_x - inputOffsetX) * inPixelStride;
+      tile.input.width = inp_w;
+      tile.input.height = inp_h;
+      tile.input.rowStrideInBytes = input.rowStrideInBytes;
+      tile.input.pixelStrideInBytes = input.pixelStrideInBytes;
+      tile.input.format = input.format;
+
+      tile.output.data = output.data + (size_t)inp_y * output.rowStrideInBytes +
+                         (size_t)inp_x * outPixelStride;
+      tile.output.width = copy_x;
+      tile.output.height = copy_y;
+      tile.output.rowStrideInBytes = output.rowStrideInBytes;
+      tile.output.pixelStrideInBytes = output.pixelStrideInBytes;
+      tile.output.format = output.format;
+
+      tile.inputOffsetX = inputOffsetX;
+      tile.inputOffsetY = inputOffsetY;
+      tiles.push_back(tile);
+
+      inp_x += inp_x == 0 ? tileWidth + overlapWindowSizeInPixels : tileWidth;
+      copied_x += copy_x;
+    } while (inp_x < static_cast<int>(input.width));
+
+    inp_y += inp_y == 0 ? tileHeight + overlapWindowSizeInPixels : tileHeight;
+    copied_y += copy_y;
+  } while (inp_y < static_cast<int>(input.height));
+
+  return OPTIX_SUCCESS;
+}
+
+static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
+                                                CUstream stream,
+                                                const OptixDenoiserParams *params,
+                                                CUdeviceptr denoiserState,
+                                                size_t denoiserStateSizeInBytes,
+                                                const OptixDenoiserGuideLayer *guideLayer,
+                                                const OptixDenoiserLayer *layers,
+                                                unsigned int numLayers,
+                                                CUdeviceptr scratch,
+                                                size_t scratchSizeInBytes,
+                                                unsigned int overlapWindowSizeInPixels,
+                                                unsigned int tileWidth,
+                                                unsigned int tileHeight)
+{
+  if (!guideLayer || !layers)
+    return OPTIX_ERROR_INVALID_VALUE;
+
+  std::vector<std::vector<OptixUtilDenoiserImageTile>> tiles(numLayers);
+  std::vector<std::vector<OptixUtilDenoiserImageTile>> prevTiles(numLayers);
+  for (unsigned int l = 0; l < numLayers; l++) {
+    if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].input,
+                                                                 layers[l].output,
+                                                                 overlapWindowSizeInPixels,
+                                                                 tileWidth,
+                                                                 tileHeight,
+                                                                 tiles[l]))
+      return res;
+
+    if (layers[l].previousOutput.data) {
+      OptixImage2D dummyOutput = layers[l].previousOutput;
+      if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].previousOutput,
+                                                                   dummyOutput,
+                                                                   overlapWindowSizeInPixels,
+                                                                   tileWidth,
+                                                                   tileHeight,
+                                                                   prevTiles[l]))
+        return res;
+    }
+  }
+
+  std::vector<OptixUtilDenoiserImageTile> albedoTiles;
+  if (guideLayer->albedo.data) {
+    OptixImage2D dummyOutput = guideLayer->albedo;
+    if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->albedo,
+                                                                 dummyOutput,
+                                                                 overlapWindowSizeInPixels,
+                                                                 tileWidth,
+                                                                 tileHeight,
+                                                                 albedoTiles))
+      return res;
+  }
+
+  std::vector<OptixUtilDenoiserImageTile> normalTiles;
+  if (guideLayer->normal.data) {
+    OptixImage2D dummyOutput = guideLayer->normal;
+    if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->normal,
+                                                                 dummyOutput,
+                                                                 overlapWindowSizeInPixels,
+                                                                 tileWidth,
+                                                                 tileHeight,
+                                                                 normalTiles))
+      return res;
+  }
+  std::vector<OptixUtilDenoiserImageTile> flowTiles;
+  if (guideLayer->flow.data) {
+    OptixImage2D dummyOutput = guideLayer->flow;
+    if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->flow,
+                                                                 dummyOutput,
+                                                                 overlapWindowSizeInPixels,
+                                                                 tileWidth,
+                                                                 tileHeight,
+                                                                 flowTiles))
+      return res;
+  }
+
+  for (size_t t = 0; t < tiles[0].size(); t++) {
+    std::vector<OptixDenoiserLayer> tlayers;
+    for (unsigned int l = 0; l < numLayers; l++) {
+      OptixDenoiserLayer layer = {};
+      layer.input = (tiles[l])[t].input;
+      layer.output = (tiles[l])[t].output;
+      if (layers[l].previousOutput.data)
+        layer.previousOutput = (prevTiles[l])[t].input;
+      tlayers.push_back(layer);
+    }
+
+    OptixDenoiserGuideLayer gl = {};
+    if (guideLayer->albedo.data)
+      gl.albedo = albedoTiles[t].input;
+
+    if (guideLayer->normal.data)
+      gl.normal = normalTiles[t].input;
+
+    if (guideLayer->flow.data)
+      gl.flow = flowTiles[t].input;
+
+    if (const OptixResult res = optixDenoiserInvoke(denoiser,
+                                                    stream,
+                                                    params,
+                                                    denoiserState,
+                                                    denoiserStateSizeInBytes,
+                                                    &gl,
+                                                    &tlayers[0],
+                                                    numLayers,
+                                                    (tiles[0])[t].inputOffsetX,
+                                                    (tiles[0])[t].inputOffsetY,
+                                                    scratch,
+                                                    scratchSizeInBytes))
+      return res;
+  }
+  return OPTIX_SUCCESS;
+}
+#  endif
+
 #  if OPTIX_ABI_VERSION >= 55
 static void execute_optix_task(TaskPool &pool, OptixTask task, OptixResult &failure_reason)
 {
@@ -49,10 +239,18 @@ static void execute_optix_task(TaskPool &pool, OptixTask task, OptixResult &fail
 }
 #  endif

+}  // namespace
+
+OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
+    : device(device), queue(device), state(device, "__denoiser_state", true)
+{
+}
+
 OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
    : CUDADevice(info, stats, profiler),
      sbt_data(this, "__sbt", MEM_READ_ONLY),
-      launch_params(this, "kernel_params", false)
+      launch_params(this, "kernel_params", false),
+      denoiser_(this)
 {
  /* Make the CUDA context current. */
  if (!cuContext) {
@@ -143,6 +341,11 @@ OptiXDevice::~OptiXDevice()
  }
 #  endif

+  /* Make sure denoiser is destroyed before device context! */
+  if (denoiser_.optix_denoiser != nullptr) {
+    optixDenoiserDestroy(denoiser_.optix_denoiser);
+  }
+
  optixDeviceContextDestroy(context);
 }

@@ -917,6 +1120,571 @@ void *OptiXDevice::get_cpu_osl_memory()
 #  endif
 }

+/* --------------------------------------------------------------------
+ * Buffer denoising.
+ */
+
+class OptiXDevice::DenoiseContext {
+ public:
+  explicit DenoiseContext(OptiXDevice *device, const DeviceDenoiseTask &task)
+      : denoise_params(task.params),
+        render_buffers(task.render_buffers),
+        buffer_params(task.buffer_params),
+        guiding_buffer(device, "denoiser guiding passes buffer", true),
+        num_samples(task.num_samples)
+  {
+    num_input_passes = 1;
+    if (denoise_params.use_pass_albedo) {
+      num_input_passes += 1;
+      use_pass_albedo = true;
+      pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO);
+      if (denoise_params.use_pass_normal) {
+        num_input_passes += 1;
+        use_pass_normal = true;
+        pass_denoising_normal = buffer_params.get_pass_offset(PASS_DENOISING_NORMAL);
+      }
+    }
+
+    if (denoise_params.temporally_stable) {
+      prev_output.device_pointer = render_buffers->buffer.device_pointer;
+
+      prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
+
+      prev_output.stride = buffer_params.stride;
+      prev_output.pass_stride = buffer_params.pass_stride;
+
+      num_input_passes += 1;
+      use_pass_flow = true;
+      pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
+    }
+
+    use_guiding_passes = (num_input_passes - 1) > 0;
+
+    if (use_guiding_passes) {
+      if (task.allow_inplace_modification) {
+        guiding_params.device_pointer = render_buffers->buffer.device_pointer;
+
+        guiding_params.pass_albedo = pass_denoising_albedo;
+        guiding_params.pass_normal = pass_denoising_normal;
+        guiding_params.pass_flow = pass_motion;
+
+        guiding_params.stride = buffer_params.stride;
+        guiding_params.pass_stride = buffer_params.pass_stride;
+      }
+      else {
+        guiding_params.pass_stride = 0;
+        if (use_pass_albedo) {
+          guiding_params.pass_albedo = guiding_params.pass_stride;
+          guiding_params.pass_stride += 3;
+        }
+        if (use_pass_normal) {
+          guiding_params.pass_normal = guiding_params.pass_stride;
+          guiding_params.pass_stride += 3;
+        }
+        if (use_pass_flow) {
+          guiding_params.pass_flow = guiding_params.pass_stride;
+          guiding_params.pass_stride += 2;
+        }
+
+        guiding_params.stride = buffer_params.width;
+
+        guiding_buffer.alloc_to_device(buffer_params.width * buffer_params.height *
+                                       guiding_params.pass_stride);
+        guiding_params.device_pointer = guiding_buffer.device_pointer;
+      }
+    }
+
+    pass_sample_count = buffer_params.get_pass_offset(PASS_SAMPLE_COUNT);
+  }
+
+  const DenoiseParams &denoise_params;
+
+  RenderBuffers *render_buffers = nullptr;
+  const BufferParams &buffer_params;
+
+  /* Previous output. */
+  struct {
+    device_ptr device_pointer = 0;
+
+    int offset = PASS_UNUSED;
+
+    int stride = -1;
+    int pass_stride = -1;
+  } prev_output;
+
+  /* Device-side storage of the guiding passes. */
+  device_only_memory<float> guiding_buffer;
+
+  struct {
+    device_ptr device_pointer = 0;
+
+    /* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
+    int pass_albedo = PASS_UNUSED;
+    int pass_normal = PASS_UNUSED;
+    int pass_flow = PASS_UNUSED;
+
+    int stride = -1;
+    int pass_stride = -1;
+  } guiding_params;
+
+  /* Number of input passes. Including the color and extra auxiliary passes. */
+  int num_input_passes = 0;
+  bool use_guiding_passes = false;
+  bool use_pass_albedo = false;
+  bool use_pass_normal = false;
+  bool use_pass_flow = false;
+
+  int num_samples = 0;
+
+  int pass_sample_count = PASS_UNUSED;
+
+  /* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
+  int pass_denoising_albedo = PASS_UNUSED;
+  int pass_denoising_normal = PASS_UNUSED;
+  int pass_motion = PASS_UNUSED;
+
+  /* For passes which don't need albedo channel for denoising we replace the actual albedo with
+   * the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
+   * the fake values and denoising of passes which do need albedo can no longer happen. */
+  bool albedo_replaced_with_fake = false;
+};
+
+class OptiXDevice::DenoisePass {
+ public:
+  DenoisePass(const PassType type, const BufferParams &buffer_params) : type(type)
+  {
+    noisy_offset = buffer_params.get_pass_offset(type, PassMode::NOISY);
+    denoised_offset = buffer_params.get_pass_offset(type, PassMode::DENOISED);
+
+    const PassInfo pass_info = Pass::get_info(type);
+    num_components = pass_info.num_components;
+    use_compositing = pass_info.use_compositing;
+    use_denoising_albedo = pass_info.use_denoising_albedo;
+  }
+
+  PassType type;
+
+  int noisy_offset;
+  int denoised_offset;
+
+  int num_components;
+  bool use_compositing;
+  bool use_denoising_albedo;
+};
+
+bool OptiXDevice::denoise_buffer(const DeviceDenoiseTask &task)
+{
+  const CUDAContextScope scope(this);
+
+  DenoiseContext context(this, task);
+
+  if (!denoise_ensure(context)) {
+    return false;
+  }
+
+  if (!denoise_filter_guiding_preprocess(context)) {
+    LOG(ERROR) << "Error preprocessing guiding passes.";
+    return false;
+  }
+
+  /* Passes which will use real albedo when it is available. */
+  denoise_pass(context, PASS_COMBINED);
+  denoise_pass(context, PASS_SHADOW_CATCHER_MATTE);
+
+  /* Passes which do not need albedo and hence if real is present it needs to become fake. */
+  denoise_pass(context, PASS_SHADOW_CATCHER);
+
+  return true;
+}
+
+DeviceQueue *OptiXDevice::get_denoise_queue()
+{
+  return &denoiser_.queue;
+}
+
+bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context)
+{
+  const BufferParams &buffer_params = context.buffer_params;
+
+  const int work_size = buffer_params.width * buffer_params.height;
+
+  DeviceKernelArguments args(&context.guiding_params.device_pointer,
+                             &context.guiding_params.pass_stride,
+                             &context.guiding_params.pass_albedo,
+                             &context.guiding_params.pass_normal,
+                             &context.guiding_params.pass_flow,
+                             &context.render_buffers->buffer.device_pointer,
+                             &buffer_params.offset,
+                             &buffer_params.stride,
+                             &buffer_params.pass_stride,
+                             &context.pass_sample_count,
+                             &context.pass_denoising_albedo,
+                             &context.pass_denoising_normal,
+                             &context.pass_motion,
+                             &buffer_params.full_x,
+                             &buffer_params.full_y,
+                             &buffer_params.width,
+                             &buffer_params.height,
+                             &context.num_samples);
+
+  return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
+}
+
+bool OptiXDevice::denoise_filter_guiding_set_fake_albedo(DenoiseContext &context)
+{
+  const BufferParams &buffer_params = context.buffer_params;
+
+  const int work_size = buffer_params.width * buffer_params.height;
+
+  DeviceKernelArguments args(&context.guiding_params.device_pointer,
+                             &context.guiding_params.pass_stride,
+                             &context.guiding_params.pass_albedo,
+                             &buffer_params.width,
+                             &buffer_params.height);
+
+  return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
+}
+
+void OptiXDevice::denoise_pass(DenoiseContext &context, PassType pass_type)
+{
+  const BufferParams &buffer_params = context.buffer_params;
+
+  const DenoisePass pass(pass_type, buffer_params);
+
+  if (pass.noisy_offset == PASS_UNUSED) {
+    return;
+  }
+  if (pass.denoised_offset == PASS_UNUSED) {
+    LOG(DFATAL) << "Missing denoised pass " << pass_type_as_string(pass_type);
+    return;
+  }
+
+  if (pass.use_denoising_albedo) {
+    if (context.albedo_replaced_with_fake) {
+      LOG(ERROR) << "Pass which requires albedo is denoised after fake albedo has been set.";
+      return;
+    }
+  }
+  else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
+    context.albedo_replaced_with_fake = true;
+    if (!denoise_filter_guiding_set_fake_albedo(context)) {
+      LOG(ERROR) << "Error replacing real albedo with the fake one.";
+      return;
+    }
+  }
+
+  /* Read and preprocess noisy color input pass. */
+  denoise_color_read(context, pass);
+  if (!denoise_filter_color_preprocess(context, pass)) {
+    LOG(ERROR) << "Error connverting denoising passes to RGB buffer.";
+    return;
+  }
+
+  if (!denoise_run(context, pass)) {
+    LOG(ERROR) << "Error running OptiX denoiser.";
+    return;
+  }
+
+  /* Store result in the combined pass of the render buffer.
+   *
+   * This will scale the denoiser result up to match the number of, possibly per-pixel, samples. */
+  if (!denoise_filter_color_postprocess(context, pass)) {
+    LOG(ERROR) << "Error copying denoiser result to the denoised pass.";
+    return;
+  }
+
+  denoiser_.queue.synchronize();
+}
+
+void OptiXDevice::denoise_color_read(DenoiseContext &context, const DenoisePass &pass)
+{
+  PassAccessor::PassAccessInfo pass_access_info;
+  pass_access_info.type = pass.type;
+  pass_access_info.mode = PassMode::NOISY;
+  pass_access_info.offset = pass.noisy_offset;
+
+  /* Denoiser operates on passes which are used to calculate the approximation, and is never used
+   * on the approximation. The latter is not even possible because OptiX does not support
+   * denoising of semi-transparent pixels. */
+  pass_access_info.use_approximate_shadow_catcher = false;
+  pass_access_info.use_approximate_shadow_catcher_background = false;
+  pass_access_info.show_active_pixels = false;
+
+  /* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases.
+   */
+  const PassAccessorGPU pass_accessor(
+      &denoiser_.queue, pass_access_info, 1.0f, context.num_samples);
+
+  PassAccessor::Destination destination(pass_access_info.type);
+  destination.d_pixels = context.render_buffers->buffer.device_pointer +
+                         pass.denoised_offset * sizeof(float);
+  destination.num_components = 3;
+  destination.pixel_stride = context.buffer_params.pass_stride;
+
+  BufferParams buffer_params = context.buffer_params;
+  buffer_params.window_x = 0;
+  buffer_params.window_y = 0;
+  buffer_params.window_width = buffer_params.width;
+  buffer_params.window_height = buffer_params.height;
+
+  pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
+}
+
+bool OptiXDevice::denoise_filter_color_preprocess(DenoiseContext &context, const DenoisePass &pass)
+{
+  const BufferParams &buffer_params = context.buffer_params;
+
+  const int work_size = buffer_params.width * buffer_params.height;
+
+  DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
+                             &buffer_params.full_x,
+                             &buffer_params.full_y,
+                             &buffer_params.width,
+                             &buffer_params.height,
+                             &buffer_params.offset,
+                             &buffer_params.stride,
+                             &buffer_params.pass_stride,
+                             &pass.denoised_offset);
+
+  return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
+}
+
+bool OptiXDevice::denoise_filter_color_postprocess(DenoiseContext &context,
+                                                   const DenoisePass &pass)
+{
+  const BufferParams &buffer_params = context.buffer_params;
+
+  const int work_size = buffer_params.width * buffer_params.height;
+
+  DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
+                             &buffer_params.full_x,
+                             &buffer_params.full_y,
+                             &buffer_params.width,
+                             &buffer_params.height,
+                             &buffer_params.offset,
+                             &buffer_params.stride,
+                             &buffer_params.pass_stride,
+                             &context.num_samples,
+                             &pass.noisy_offset,
+                             &pass.denoised_offset,
+                             &context.pass_sample_count,
+                             &pass.num_components,
+                             &pass.use_compositing);
+
+  return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
+}
+
+bool OptiXDevice::denoise_ensure(DenoiseContext &context)
+{
+  if (!denoise_create_if_needed(context)) {
+    LOG(ERROR) << "OptiX denoiser creation has failed.";
+    return false;
+  }
+
+  if (!denoise_configure_if_needed(context)) {
+    LOG(ERROR) << "OptiX denoiser configuration has failed.";
+    return false;
+  }
+
+  return true;
+}
+
+bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
+{
+  const bool recreate_denoiser = (denoiser_.optix_denoiser == nullptr) ||
+                                 (denoiser_.use_pass_albedo != context.use_pass_albedo) ||
+                                 (denoiser_.use_pass_normal != context.use_pass_normal) ||
+                                 (denoiser_.use_pass_flow != context.use_pass_flow);
+  if (!recreate_denoiser) {
+    return true;
+  }
+
+  /* Destroy existing handle before creating new one. */
+  if (denoiser_.optix_denoiser) {
+    optixDenoiserDestroy(denoiser_.optix_denoiser);
+  }
+
+  /* Create OptiX denoiser handle on demand when it is first used. */
+  OptixDenoiserOptions denoiser_options = {};
+  denoiser_options.guideAlbedo = context.use_pass_albedo;
+  denoiser_options.guideNormal = context.use_pass_normal;
+
+  OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
+  if (context.use_pass_flow) {
+    model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
+  }
+
+  const OptixResult result = optixDenoiserCreate(
+      this->context, model, &denoiser_options, &denoiser_.optix_denoiser);
+
+  if (result != OPTIX_SUCCESS) {
+    set_error("Failed to create OptiX denoiser");
+    return false;
+  }
+
+  /* OptiX denoiser handle was created with the requested number of input passes. */
+  denoiser_.use_pass_albedo = context.use_pass_albedo;
+  denoiser_.use_pass_normal = context.use_pass_normal;
+  denoiser_.use_pass_flow = context.use_pass_flow;
+
+  /* OptiX denoiser has been created, but it needs configuration. */
+  denoiser_.is_configured = false;
+
+  return true;
+}
+
+bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
+{
+  /* Limit maximum tile size denoiser can be invoked with. */
+  const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
+                                   min(context.buffer_params.height, 4096));
+
+  if (denoiser_.is_configured &&
+      (denoiser_.configured_size.x == tile_size.x && denoiser_.configured_size.y == tile_size.y)) {
+    return true;
+  }
+
+  optix_assert(optixDenoiserComputeMemoryResources(
+      denoiser_.optix_denoiser, tile_size.x, tile_size.y, &denoiser_.sizes));
+
+  /* Allocate denoiser state if tile size has changed since last setup. */
+  denoiser_.state.alloc_to_device(denoiser_.sizes.stateSizeInBytes +
+                                  denoiser_.sizes.withOverlapScratchSizeInBytes);
+
+  /* Initialize denoiser state for the current tile size. */
+  const OptixResult result = optixDenoiserSetup(
+      denoiser_.optix_denoiser,
+      0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
+          * on a stream that is not the default stream. */
+      tile_size.x + denoiser_.sizes.overlapWindowSizeInPixels * 2,
+      tile_size.y + denoiser_.sizes.overlapWindowSizeInPixels * 2,
+      denoiser_.state.device_pointer,
+      denoiser_.sizes.stateSizeInBytes,
+      denoiser_.state.device_pointer + denoiser_.sizes.stateSizeInBytes,
+      denoiser_.sizes.withOverlapScratchSizeInBytes);
+  if (result != OPTIX_SUCCESS) {
+    set_error("Failed to set up OptiX denoiser");
+    return false;
+  }
+
+  cuda_assert(cuCtxSynchronize());
+
+  denoiser_.is_configured = true;
+  denoiser_.configured_size = tile_size;
+
+  return true;
+}
+
+bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
+{
+  const BufferParams &buffer_params = context.buffer_params;
+  const int width = buffer_params.width;
+  const int height = buffer_params.height;
+
+  /* Set up input and output layer information. */
+  OptixImage2D color_layer = {0};
+  OptixImage2D albedo_layer = {0};
+  OptixImage2D normal_layer = {0};
+  OptixImage2D flow_layer = {0};
+
+  OptixImage2D output_layer = {0};
+  OptixImage2D prev_output_layer = {0};
+
+  /* Color pass. */
+  {
+    const int pass_denoised = pass.denoised_offset;
+    const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float);
+
+    color_layer.data = context.render_buffers->buffer.device_pointer +
+                       pass_denoised * sizeof(float);
+    color_layer.width = width;
+    color_layer.height = height;
+    color_layer.rowStrideInBytes = pass_stride_in_bytes * context.buffer_params.stride;
+    color_layer.pixelStrideInBytes = pass_stride_in_bytes;
+    color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
+  }
+
+  /* Previous output. */
+  if (context.prev_output.offset != PASS_UNUSED) {
+    const int64_t pass_stride_in_bytes = context.prev_output.pass_stride * sizeof(float);
+
+    prev_output_layer.data = context.prev_output.device_pointer +
+                             context.prev_output.offset * sizeof(float);
+    prev_output_layer.width = width;
+    prev_output_layer.height = height;
+    prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
+    prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
+    prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
+  }
+
+  /* Optional albedo and color passes. */
+  if (context.num_input_passes > 1) {
+    const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
+    const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride * sizeof(float);
+    const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes;
+
+    if (context.use_pass_albedo) {
+      albedo_layer.data = d_guiding_buffer + context.guiding_params.pass_albedo * sizeof(float);
+      albedo_layer.width = width;
+      albedo_layer.height = height;
+      albedo_layer.rowStrideInBytes = row_stride_in_bytes;
+      albedo_layer.pixelStrideInBytes = pixel_stride_in_bytes;
+      albedo_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
+    }
+
+    if (context.use_pass_normal) {
+      normal_layer.data = d_guiding_buffer + context.guiding_params.pass_normal * sizeof(float);
+      normal_layer.width = width;
+      normal_layer.height = height;
+      normal_layer.rowStrideInBytes = row_stride_in_bytes;
+      normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
+      normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
+    }
+
+    if (context.use_pass_flow) {
+      flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow * sizeof(float);
+      flow_layer.width = width;
+      flow_layer.height = height;
+      flow_layer.rowStrideInBytes = row_stride_in_bytes;
+      flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
+      flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
+    }
+  }
+
+  /* Denoise in-place of the noisy input in the render buffers. */
+  output_layer = color_layer;
+
+  OptixDenoiserGuideLayer guide_layers = {};
+  guide_layers.albedo = albedo_layer;
+  guide_layers.normal = normal_layer;
+  guide_layers.flow = flow_layer;
+
+  OptixDenoiserLayer image_layers = {};
+  image_layers.input = color_layer;
+  image_layers.previousOutput = prev_output_layer;
+  image_layers.output = output_layer;
+
+  /* Finally run denoising. */
+  OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
+
+  optix_assert(ccl::optixUtilDenoiserInvokeTiled(denoiser_.optix_denoiser,
+                                                 denoiser_.queue.stream(),
+                                                 &params,
+                                                 denoiser_.state.device_pointer,
+                                                 denoiser_.sizes.stateSizeInBytes,
+                                                 &guide_layers,
+                                                 &image_layers,
+                                                 1,
+                                                 denoiser_.state.device_pointer +
+                                                     denoiser_.sizes.stateSizeInBytes,
+                                                 denoiser_.sizes.withOverlapScratchSizeInBytes,
+                                                 denoiser_.sizes.overlapWindowSizeInPixels,
+                                                 denoiser_.configured_size.x,
+                                                 denoiser_.configured_size.y));
+
+  return true;
+}
+
 bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
                                  OptixBuildOperation operation,
                                  const OptixBuildInput &build_input,
--- a/intern/cycles/device/optix/device_impl.h
+++ b/intern/cycles/device/optix/device_impl.h
@@ -1,14 +1,17 @@
 /* SPDX-License-Identifier: Apache-2.0
- * Copyright 2019, NVIDIA Corporation
- * Copyright 2019-2022 Blender Foundation */
+ * Copyright 2019, NVIDIA Corporation.
+ * Copyright 2019-2022 Blender Foundation. */

 #pragma once

 #ifdef WITH_OPTIX

 #  include "device/cuda/device_impl.h"
+#  include "device/optix/queue.h"
 #  include "device/optix/util.h"
 #  include "kernel/osl/globals.h"
+#  include "kernel/types.h"
+#  include "util/unique_ptr.h"

 CCL_NAMESPACE_BEGIN

@@ -84,6 +87,32 @@ class OptiXDevice : public CUDADevice {
  vector<unique_ptr<device_only_memory<char>>> delayed_free_bvh_memory;
  thread_mutex delayed_free_bvh_mutex;

+  class Denoiser {
+   public:
+    explicit Denoiser(OptiXDevice *device);
+
+    OptiXDevice *device;
+    OptiXDeviceQueue queue;
+
+    OptixDenoiser optix_denoiser = nullptr;
+
+    /* Configuration size, as provided to `optixDenoiserSetup`.
+     * If the `optixDenoiserSetup()` was never used on the current `optix_denoiser` the
+     * `is_configured` will be false. */
+    bool is_configured = false;
+    int2 configured_size = make_int2(0, 0);
+
+    /* OptiX denoiser state and scratch buffers, stored in a single memory buffer.
+     * The memory layout goes as following: [denoiser state][scratch buffer]. */
+    device_only_memory<unsigned char> state;
+    OptixDenoiserSizes sizes = {};
+
+    bool use_pass_albedo = false;
+    bool use_pass_normal = false;
+    bool use_pass_flow = false;
+  };
+  Denoiser denoiser_;
+
 public:
  OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
  ~OptiXDevice();
@@ -113,6 +142,53 @@ class OptiXDevice : public CUDADevice {
  virtual unique_ptr<DeviceQueue> gpu_queue_create() override;

  void *get_cpu_osl_memory() override;
+
+  /* --------------------------------------------------------------------
+   * Denoising.
+   */
+
+  class DenoiseContext;
+  class DenoisePass;
+
+  virtual bool denoise_buffer(const DeviceDenoiseTask &task) override;
+  virtual DeviceQueue *get_denoise_queue() override;
+
+  /* Read guiding passes from the render buffers, preprocess them in a way which is expected by
+   * OptiX and store in the guiding passes memory within the given context.
+   *
+   * Pre=-processing of the guiding passes is to only happen once per context lifetime. DO not
+   * preprocess them for every pass which is being denoised. */
+  bool denoise_filter_guiding_preprocess(DenoiseContext &context);
+
+  /* Set fake albedo pixels in the albedo guiding pass storage.
+   * After this point only passes which do not need albedo for denoising can be processed. */
+  bool denoise_filter_guiding_set_fake_albedo(DenoiseContext &context);
+
+  void denoise_pass(DenoiseContext &context, PassType pass_type);
+
+  /* Read input color pass from the render buffer into the memory which corresponds to the noisy
+   * input within the given context. Pixels are scaled to the number of samples, but are not
+   * preprocessed yet. */
+  void denoise_color_read(DenoiseContext &context, const DenoisePass &pass);
+
+  /* Run corresponding filter kernels, preparing data for the denoiser or copying data from the
+   * denoiser result to the render buffer. */
+  bool denoise_filter_color_preprocess(DenoiseContext &context, const DenoisePass &pass);
+  bool denoise_filter_color_postprocess(DenoiseContext &context, const DenoisePass &pass);
+
+  /* Make sure the OptiX denoiser is created and configured. */
+  bool denoise_ensure(DenoiseContext &context);
+
+  /* Create OptiX denoiser descriptor if needed.
+   * Will do nothing if the current OptiX descriptor is usable for the given parameters.
+   * If the OptiX denoiser descriptor did re-allocate here it is left unconfigured. */
+  bool denoise_create_if_needed(DenoiseContext &context);
+
+  /* Configure existing OptiX denoiser descriptor for the use for the given task. */
+  bool denoise_configure_if_needed(DenoiseContext &context);
+
+  /* Run configured denoiser. */
+  bool denoise_run(DenoiseContext &context, const DenoisePass &pass);
 };

 CCL_NAMESPACE_END
--- a/intern/cycles/graph/node_type.h
+++ b/intern/cycles/graph/node_type.h
@@ -66,9 +66,7 @@ struct SocketType {
    LINK_NORMAL = (1 << 8),
    LINK_POSITION = (1 << 9),
    LINK_TANGENT = (1 << 10),
-    LINK_OSL_INITIALIZER = (1 << 11),
-    DEFAULT_LINK_MASK = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) |
-                        (1 << 10) | (1 << 11)
+    DEFAULT_LINK_MASK = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10)
  };

  ustring name;
--- a/intern/cycles/integrator/CMakeLists.txt
+++ b/intern/cycles/integrator/CMakeLists.txt
@@ -8,7 +8,7 @@ set(INC
 set(SRC
  adaptive_sampling.cpp
  denoiser.cpp
-  denoiser_gpu.cpp
+  denoiser_device.cpp
  denoiser_oidn.cpp
  denoiser_optix.cpp
  path_trace.cpp
@@ -30,7 +30,7 @@ set(SRC
 set(SRC_HEADERS
  adaptive_sampling.h
  denoiser.h
-  denoiser_gpu.h
+  denoiser_device.h
  denoiser_oidn.h
  denoiser_optix.h
  path_trace.h
--- a/intern/cycles/integrator/denoiser.cpp
+++ b/intern/cycles/integrator/denoiser.cpp
@@ -16,11 +16,9 @@ unique_ptr<Denoiser> Denoiser::create(Device *path_trace_device, const DenoisePa
 {
  DCHECK(params.use);

-#ifdef WITH_OPTIX
  if (params.type == DENOISER_OPTIX && Device::available_devices(DEVICE_MASK_OPTIX).size()) {
    return make_unique<OptiXDenoiser>(path_trace_device, params);
  }
-#endif

  /* Always fallback to OIDN. */
  DenoiseParams oidn_params = params;
--- a/intern/cycles/integrator/denoiser_device.cpp
+++ b/intern/cycles/integrator/denoiser_device.cpp
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: Apache-2.0
 * Copyright 2011-2022 Blender Foundation */

-#include "integrator/denoiser_gpu.h"
+#include "integrator/denoiser_device.h"

 #include "device/denoise.h"
 #include "device/device.h"
@@ -13,27 +13,27 @@

 CCL_NAMESPACE_BEGIN

-DenoiserGPU::DenoiserGPU(Device *path_trace_device, const DenoiseParams &params)
+DeviceDenoiser::DeviceDenoiser(Device *path_trace_device, const DenoiseParams &params)
    : Denoiser(path_trace_device, params)
 {
 }

-DenoiserGPU::~DenoiserGPU()
+DeviceDenoiser::~DeviceDenoiser()
 {
  /* Explicit implementation, to allow forward declaration of Device in the header. */
 }

-bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
-                                 RenderBuffers *render_buffers,
-                                 const int num_samples,
-                                 bool allow_inplace_modification)
+bool DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params,
+                                    RenderBuffers *render_buffers,
+                                    const int num_samples,
+                                    bool allow_inplace_modification)
 {
  Device *denoiser_device = get_denoiser_device();
  if (!denoiser_device) {
    return false;
  }

-  DenoiseTask task;
+  DeviceDenoiseTask task;
  task.params = params_;
  task.num_samples = num_samples;
  task.buffer_params = buffer_params;
@@ -50,6 +50,8 @@ bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
  else {
    VLOG_WORK << "Creating temporary buffer on denoiser device.";

+    DeviceQueue *queue = denoiser_device->get_denoise_queue();
+
    /* Create buffer which is available by the device used by denoiser. */

    /* TODO(sergey): Optimize data transfers. For example, only copy denoising related passes,
@@ -68,13 +70,13 @@ bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
           render_buffers->buffer.data(),
           sizeof(float) * local_render_buffers.buffer.size());

-    denoiser_queue_->copy_to_device(local_render_buffers.buffer);
+    queue->copy_to_device(local_render_buffers.buffer);

    task.render_buffers = &local_render_buffers;
    task.allow_inplace_modification = true;
  }

-  const bool denoise_result = denoise_buffer(task);
+  const bool denoise_result = denoiser_device->denoise_buffer(task);

  if (local_buffer_used) {
    local_render_buffers.copy_from_device();
@@ -88,21 +90,4 @@ bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
  return denoise_result;
 }

-Device *DenoiserGPU::ensure_denoiser_device(Progress *progress)
-{
-  Device *denoiser_device = Denoiser::ensure_denoiser_device(progress);
-  if (!denoiser_device) {
-    return nullptr;
-  }
-
-  if (!denoiser_queue_) {
-    denoiser_queue_ = denoiser_device->gpu_queue_create();
-    if (!denoiser_queue_) {
-      return nullptr;
-    }
-  }
-
-  return denoiser_device;
-}
-
 CCL_NAMESPACE_END
--- a/intern/cycles/integrator/denoiser_device.h
+++ b/intern/cycles/integrator/denoiser_device.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+#include "integrator/denoiser.h"
+#include "util/unique_ptr.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Denoiser which uses device-specific denoising implementation, such as OptiX denoiser which are
+ * implemented as a part of a driver of specific device.
+ *
+ * This implementation makes sure the to-be-denoised buffer is available on the denoising device
+ * and invoke denoising kernel via device API. */
+class DeviceDenoiser : public Denoiser {
+ public:
+  DeviceDenoiser(Device *path_trace_device, const DenoiseParams &params);
+  ~DeviceDenoiser();
+
+  virtual bool denoise_buffer(const BufferParams &buffer_params,
+                              RenderBuffers *render_buffers,
+                              const int num_samples,
+                              bool allow_inplace_modification) override;
+};
+
+CCL_NAMESPACE_END
--- a/intern/cycles/integrator/denoiser_gpu.h
+++ b/intern/cycles/integrator/denoiser_gpu.h
@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2022 Blender Foundation */
-
-#pragma once
-
-#include "integrator/denoiser.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Implementation of Denoiser which uses a device-specific denoising implementation, running on a
- * GPU device queue. It makes sure the to-be-denoised buffer is available on the denoising device
- * and invokes denoising kernels via the device queue API. */
-class DenoiserGPU : public Denoiser {
- public:
-  DenoiserGPU(Device *path_trace_device, const DenoiseParams &params);
-  ~DenoiserGPU();
-
-  virtual bool denoise_buffer(const BufferParams &buffer_params,
-                              RenderBuffers *render_buffers,
-                              const int num_samples,
-                              bool allow_inplace_modification) override;
-
- protected:
-  /* All the parameters needed to perform buffer denoising on a device.
-   * Is not really a task in its canonical terms (as in, is not an asynchronous running task). Is
-   * more like a wrapper for all the arguments and parameters needed to perform denoising. Is a
-   * single place where they are all listed, so that it's not required to modify all device methods
-   * when these parameters do change. */
-  class DenoiseTask {
-   public:
-    DenoiseParams params;
-
-    int num_samples;
-
-    RenderBuffers *render_buffers;
-    BufferParams buffer_params;
-
-    /* Allow to do in-place modification of the input passes (scaling them down i.e.). This will
-     * lower the memory footprint of the denoiser but will make input passes "invalid" (from path
-     * tracer) point of view. */
-    bool allow_inplace_modification;
-  };
-
-  /* Returns true if task is fully handled. */
-  virtual bool denoise_buffer(const DenoiseTask & /*task*/) = 0;
-
-  virtual Device *ensure_denoiser_device(Progress *progress) override;
-
-  unique_ptr<DeviceQueue> denoiser_queue_;
-};
-
-CCL_NAMESPACE_END
--- a/intern/cycles/integrator/denoiser_optix.cpp
+++ b/intern/cycles/integrator/denoiser_optix.cpp
@@ -1,786 +1,21 @@
 /* SPDX-License-Identifier: Apache-2.0
 * Copyright 2011-2022 Blender Foundation */

-#ifdef WITH_OPTIX
+#include "integrator/denoiser_optix.h"

-#  include "integrator/denoiser_optix.h"
-#  include "integrator/pass_accessor_gpu.h"
-
-#  include "device/optix/device_impl.h"
-#  include "device/optix/queue.h"
-
-#  include <optix_denoiser_tiling.h>
+#include "device/denoise.h"
+#include "device/device.h"

 CCL_NAMESPACE_BEGIN

-#  if OPTIX_ABI_VERSION >= 60
-using ::optixUtilDenoiserInvokeTiled;
-#  else
-// A minimal copy of functionality `optix_denoiser_tiling.h` which allows to fix integer overflow
-// issues without bumping SDK or driver requirement.
-//
-// The original code is Copyright NVIDIA Corporation, BSD-3-Clause.
-static OptixResult optixUtilDenoiserSplitImage(const OptixImage2D &input,
-                                               const OptixImage2D &output,
-                                               unsigned int overlapWindowSizeInPixels,
-                                               unsigned int tileWidth,
-                                               unsigned int tileHeight,
-                                               std::vector<OptixUtilDenoiserImageTile> &tiles)
-{
-  if (tileWidth == 0 || tileHeight == 0)
-    return OPTIX_ERROR_INVALID_VALUE;
-
-  unsigned int inPixelStride = optixUtilGetPixelStride(input);
-  unsigned int outPixelStride = optixUtilGetPixelStride(output);
-
-  int inp_w = std::min(tileWidth + 2 * overlapWindowSizeInPixels, input.width);
-  int inp_h = std::min(tileHeight + 2 * overlapWindowSizeInPixels, input.height);
-  int inp_y = 0, copied_y = 0;
-
-  do {
-    int inputOffsetY = inp_y == 0 ? 0 :
-                                    std::max((int)overlapWindowSizeInPixels,
-                                             inp_h - ((int)input.height - inp_y));
-    int copy_y = inp_y == 0 ? std::min(input.height, tileHeight + overlapWindowSizeInPixels) :
-                              std::min(tileHeight, input.height - copied_y);
-
-    int inp_x = 0, copied_x = 0;
-    do {
-      int inputOffsetX = inp_x == 0 ? 0 :
-                                      std::max((int)overlapWindowSizeInPixels,
-                                               inp_w - ((int)input.width - inp_x));
-      int copy_x = inp_x == 0 ? std::min(input.width, tileWidth + overlapWindowSizeInPixels) :
-                                std::min(tileWidth, input.width - copied_x);
-
-      OptixUtilDenoiserImageTile tile;
-      tile.input.data = input.data + (size_t)(inp_y - inputOffsetY) * input.rowStrideInBytes +
-                        +(size_t)(inp_x - inputOffsetX) * inPixelStride;
-      tile.input.width = inp_w;
-      tile.input.height = inp_h;
-      tile.input.rowStrideInBytes = input.rowStrideInBytes;
-      tile.input.pixelStrideInBytes = input.pixelStrideInBytes;
-      tile.input.format = input.format;
-
-      tile.output.data = output.data + (size_t)inp_y * output.rowStrideInBytes +
-                         (size_t)inp_x * outPixelStride;
-      tile.output.width = copy_x;
-      tile.output.height = copy_y;
-      tile.output.rowStrideInBytes = output.rowStrideInBytes;
-      tile.output.pixelStrideInBytes = output.pixelStrideInBytes;
-      tile.output.format = output.format;
-
-      tile.inputOffsetX = inputOffsetX;
-      tile.inputOffsetY = inputOffsetY;
-      tiles.push_back(tile);
-
-      inp_x += inp_x == 0 ? tileWidth + overlapWindowSizeInPixels : tileWidth;
-      copied_x += copy_x;
-    } while (inp_x < static_cast<int>(input.width));
-
-    inp_y += inp_y == 0 ? tileHeight + overlapWindowSizeInPixels : tileHeight;
-    copied_y += copy_y;
-  } while (inp_y < static_cast<int>(input.height));
-
-  return OPTIX_SUCCESS;
-}
-
-static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
-                                                CUstream stream,
-                                                const OptixDenoiserParams *params,
-                                                CUdeviceptr denoiserState,
-                                                size_t denoiserStateSizeInBytes,
-                                                const OptixDenoiserGuideLayer *guideLayer,
-                                                const OptixDenoiserLayer *layers,
-                                                unsigned int numLayers,
-                                                CUdeviceptr scratch,
-                                                size_t scratchSizeInBytes,
-                                                unsigned int overlapWindowSizeInPixels,
-                                                unsigned int tileWidth,
-                                                unsigned int tileHeight)
-{
-  if (!guideLayer || !layers)
-    return OPTIX_ERROR_INVALID_VALUE;
-
-  std::vector<std::vector<OptixUtilDenoiserImageTile>> tiles(numLayers);
-  std::vector<std::vector<OptixUtilDenoiserImageTile>> prevTiles(numLayers);
-  for (unsigned int l = 0; l < numLayers; l++) {
-    if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].input,
-                                                                 layers[l].output,
-                                                                 overlapWindowSizeInPixels,
-                                                                 tileWidth,
-                                                                 tileHeight,
-                                                                 tiles[l]))
-      return res;
-
-    if (layers[l].previousOutput.data) {
-      OptixImage2D dummyOutput = layers[l].previousOutput;
-      if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].previousOutput,
-                                                                   dummyOutput,
-                                                                   overlapWindowSizeInPixels,
-                                                                   tileWidth,
-                                                                   tileHeight,
-                                                                   prevTiles[l]))
-        return res;
-    }
-  }
-
-  std::vector<OptixUtilDenoiserImageTile> albedoTiles;
-  if (guideLayer->albedo.data) {
-    OptixImage2D dummyOutput = guideLayer->albedo;
-    if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->albedo,
-                                                                 dummyOutput,
-                                                                 overlapWindowSizeInPixels,
-                                                                 tileWidth,
-                                                                 tileHeight,
-                                                                 albedoTiles))
-      return res;
-  }
-
-  std::vector<OptixUtilDenoiserImageTile> normalTiles;
-  if (guideLayer->normal.data) {
-    OptixImage2D dummyOutput = guideLayer->normal;
-    if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->normal,
-                                                                 dummyOutput,
-                                                                 overlapWindowSizeInPixels,
-                                                                 tileWidth,
-                                                                 tileHeight,
-                                                                 normalTiles))
-      return res;
-  }
-  std::vector<OptixUtilDenoiserImageTile> flowTiles;
-  if (guideLayer->flow.data) {
-    OptixImage2D dummyOutput = guideLayer->flow;
-    if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->flow,
-                                                                 dummyOutput,
-                                                                 overlapWindowSizeInPixels,
-                                                                 tileWidth,
-                                                                 tileHeight,
-                                                                 flowTiles))
-      return res;
-  }
-
-  for (size_t t = 0; t < tiles[0].size(); t++) {
-    std::vector<OptixDenoiserLayer> tlayers;
-    for (unsigned int l = 0; l < numLayers; l++) {
-      OptixDenoiserLayer layer = {};
-      layer.input = (tiles[l])[t].input;
-      layer.output = (tiles[l])[t].output;
-      if (layers[l].previousOutput.data)
-        layer.previousOutput = (prevTiles[l])[t].input;
-      tlayers.push_back(layer);
-    }
-
-    OptixDenoiserGuideLayer gl = {};
-    if (guideLayer->albedo.data)
-      gl.albedo = albedoTiles[t].input;
-
-    if (guideLayer->normal.data)
-      gl.normal = normalTiles[t].input;
-
-    if (guideLayer->flow.data)
-      gl.flow = flowTiles[t].input;
-
-    if (const OptixResult res = optixDenoiserInvoke(denoiser,
-                                                    stream,
-                                                    params,
-                                                    denoiserState,
-                                                    denoiserStateSizeInBytes,
-                                                    &gl,
-                                                    &tlayers[0],
-                                                    numLayers,
-                                                    (tiles[0])[t].inputOffsetX,
-                                                    (tiles[0])[t].inputOffsetY,
-                                                    scratch,
-                                                    scratchSizeInBytes))
-      return res;
-  }
-  return OPTIX_SUCCESS;
-}
-#  endif
-
 OptiXDenoiser::OptiXDenoiser(Device *path_trace_device, const DenoiseParams &params)
-    : DenoiserGPU(path_trace_device, params), state_(path_trace_device, "__denoiser_state", true)
+    : DeviceDenoiser(path_trace_device, params)
 {
 }

-OptiXDenoiser::~OptiXDenoiser()
-{
-  /* It is important that the OptixDenoiser handle is destroyed before the OptixDeviceContext
-   * handle, which is guaranteed since the local denoising device owning the OptiX device context
-   * is deleted as part of the Denoiser class destructor call after this. */
-  if (optix_denoiser_ != nullptr) {
-    optixDenoiserDestroy(optix_denoiser_);
-  }
-}
-
 uint OptiXDenoiser::get_device_type_mask() const
 {
  return DEVICE_MASK_OPTIX;
 }

-class OptiXDenoiser::DenoiseContext {
- public:
-  explicit DenoiseContext(OptiXDevice *device, const DenoiseTask &task)
-      : denoise_params(task.params),
-        render_buffers(task.render_buffers),
-        buffer_params(task.buffer_params),
-        guiding_buffer(device, "denoiser guiding passes buffer", true),
-        num_samples(task.num_samples)
-  {
-    num_input_passes = 1;
-    if (denoise_params.use_pass_albedo) {
-      num_input_passes += 1;
-      use_pass_albedo = true;
-      pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO);
-      if (denoise_params.use_pass_normal) {
-        num_input_passes += 1;
-        use_pass_normal = true;
-        pass_denoising_normal = buffer_params.get_pass_offset(PASS_DENOISING_NORMAL);
-      }
-    }
-
-    if (denoise_params.temporally_stable) {
-      prev_output.device_pointer = render_buffers->buffer.device_pointer;
-
-      prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
-
-      prev_output.stride = buffer_params.stride;
-      prev_output.pass_stride = buffer_params.pass_stride;
-
-      num_input_passes += 1;
-      use_pass_motion = true;
-      pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
-    }
-
-    use_guiding_passes = (num_input_passes - 1) > 0;
-
-    if (use_guiding_passes) {
-      if (task.allow_inplace_modification) {
-        guiding_params.device_pointer = render_buffers->buffer.device_pointer;
-
-        guiding_params.pass_albedo = pass_denoising_albedo;
-        guiding_params.pass_normal = pass_denoising_normal;
-        guiding_params.pass_flow = pass_motion;
-
-        guiding_params.stride = buffer_params.stride;
-        guiding_params.pass_stride = buffer_params.pass_stride;
-      }
-      else {
-        guiding_params.pass_stride = 0;
-        if (use_pass_albedo) {
-          guiding_params.pass_albedo = guiding_params.pass_stride;
-          guiding_params.pass_stride += 3;
-        }
-        if (use_pass_normal) {
-          guiding_params.pass_normal = guiding_params.pass_stride;
-          guiding_params.pass_stride += 3;
-        }
-        if (use_pass_motion) {
-          guiding_params.pass_flow = guiding_params.pass_stride;
-          guiding_params.pass_stride += 2;
-        }
-
-        guiding_params.stride = buffer_params.width;
-
-        guiding_buffer.alloc_to_device(buffer_params.width * buffer_params.height *
-                                       guiding_params.pass_stride);
-        guiding_params.device_pointer = guiding_buffer.device_pointer;
-      }
-    }
-
-    pass_sample_count = buffer_params.get_pass_offset(PASS_SAMPLE_COUNT);
-  }
-
-  const DenoiseParams &denoise_params;
-
-  RenderBuffers *render_buffers = nullptr;
-  const BufferParams &buffer_params;
-
-  /* Previous output. */
-  struct {
-    device_ptr device_pointer = 0;
-
-    int offset = PASS_UNUSED;
-
-    int stride = -1;
-    int pass_stride = -1;
-  } prev_output;
-
-  /* Device-side storage of the guiding passes. */
-  device_only_memory<float> guiding_buffer;
-
-  struct {
-    device_ptr device_pointer = 0;
-
-    /* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
-    int pass_albedo = PASS_UNUSED;
-    int pass_normal = PASS_UNUSED;
-    int pass_flow = PASS_UNUSED;
-
-    int stride = -1;
-    int pass_stride = -1;
-  } guiding_params;
-
-  /* Number of input passes. Including the color and extra auxiliary passes. */
-  int num_input_passes = 0;
-  bool use_guiding_passes = false;
-  bool use_pass_albedo = false;
-  bool use_pass_normal = false;
-  bool use_pass_motion = false;
-
-  int num_samples = 0;
-
-  int pass_sample_count = PASS_UNUSED;
-
-  /* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
-  int pass_denoising_albedo = PASS_UNUSED;
-  int pass_denoising_normal = PASS_UNUSED;
-  int pass_motion = PASS_UNUSED;
-
-  /* For passes which don't need albedo channel for denoising we replace the actual albedo with
-   * the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
-   * the fake values and denoising of passes which do need albedo can no longer happen. */
-  bool albedo_replaced_with_fake = false;
-};
-
-class OptiXDenoiser::DenoisePass {
- public:
-  DenoisePass(const PassType type, const BufferParams &buffer_params) : type(type)
-  {
-    noisy_offset = buffer_params.get_pass_offset(type, PassMode::NOISY);
-    denoised_offset = buffer_params.get_pass_offset(type, PassMode::DENOISED);
-
-    const PassInfo pass_info = Pass::get_info(type);
-    num_components = pass_info.num_components;
-    use_compositing = pass_info.use_compositing;
-    use_denoising_albedo = pass_info.use_denoising_albedo;
-  }
-
-  PassType type;
-
-  int noisy_offset;
-  int denoised_offset;
-
-  int num_components;
-  bool use_compositing;
-  bool use_denoising_albedo;
-};
-
-bool OptiXDenoiser::denoise_buffer(const DenoiseTask &task)
-{
-  OptiXDevice *const optix_device = static_cast<OptiXDevice *>(denoiser_device_);
-
-  const CUDAContextScope scope(optix_device);
-
-  DenoiseContext context(optix_device, task);
-
-  if (!denoise_ensure(context)) {
-    return false;
-  }
-
-  if (!denoise_filter_guiding_preprocess(context)) {
-    LOG(ERROR) << "Error preprocessing guiding passes.";
-    return false;
-  }
-
-  /* Passes which will use real albedo when it is available. */
-  denoise_pass(context, PASS_COMBINED);
-  denoise_pass(context, PASS_SHADOW_CATCHER_MATTE);
-
-  /* Passes which do not need albedo and hence if real is present it needs to become fake. */
-  denoise_pass(context, PASS_SHADOW_CATCHER);
-
-  return true;
-}
-
-bool OptiXDenoiser::denoise_filter_guiding_preprocess(const DenoiseContext &context)
-{
-  const BufferParams &buffer_params = context.buffer_params;
-
-  const int work_size = buffer_params.width * buffer_params.height;
-
-  DeviceKernelArguments args(&context.guiding_params.device_pointer,
-                             &context.guiding_params.pass_stride,
-                             &context.guiding_params.pass_albedo,
-                             &context.guiding_params.pass_normal,
-                             &context.guiding_params.pass_flow,
-                             &context.render_buffers->buffer.device_pointer,
-                             &buffer_params.offset,
-                             &buffer_params.stride,
-                             &buffer_params.pass_stride,
-                             &context.pass_sample_count,
-                             &context.pass_denoising_albedo,
-                             &context.pass_denoising_normal,
-                             &context.pass_motion,
-                             &buffer_params.full_x,
-                             &buffer_params.full_y,
-                             &buffer_params.width,
-                             &buffer_params.height,
-                             &context.num_samples);
-
-  return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
-}
-
-bool OptiXDenoiser::denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context)
-{
-  const BufferParams &buffer_params = context.buffer_params;
-
-  const int work_size = buffer_params.width * buffer_params.height;
-
-  DeviceKernelArguments args(&context.guiding_params.device_pointer,
-                             &context.guiding_params.pass_stride,
-                             &context.guiding_params.pass_albedo,
-                             &buffer_params.width,
-                             &buffer_params.height);
-
-  return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
-}
-
-void OptiXDenoiser::denoise_pass(DenoiseContext &context, PassType pass_type)
-{
-  const BufferParams &buffer_params = context.buffer_params;
-
-  const DenoisePass pass(pass_type, buffer_params);
-
-  if (pass.noisy_offset == PASS_UNUSED) {
-    return;
-  }
-  if (pass.denoised_offset == PASS_UNUSED) {
-    LOG(DFATAL) << "Missing denoised pass " << pass_type_as_string(pass_type);
-    return;
-  }
-
-  if (pass.use_denoising_albedo) {
-    if (context.albedo_replaced_with_fake) {
-      LOG(ERROR) << "Pass which requires albedo is denoised after fake albedo has been set.";
-      return;
-    }
-  }
-  else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
-    context.albedo_replaced_with_fake = true;
-    if (!denoise_filter_guiding_set_fake_albedo(context)) {
-      LOG(ERROR) << "Error replacing real albedo with the fake one.";
-      return;
-    }
-  }
-
-  /* Read and preprocess noisy color input pass. */
-  denoise_color_read(context, pass);
-  if (!denoise_filter_color_preprocess(context, pass)) {
-    LOG(ERROR) << "Error converting denoising passes to RGB buffer.";
-    return;
-  }
-
-  if (!denoise_run(context, pass)) {
-    LOG(ERROR) << "Error running OptiX denoiser.";
-    return;
-  }
-
-  /* Store result in the combined pass of the render buffer.
-   *
-   * This will scale the denoiser result up to match the number of, possibly per-pixel, samples. */
-  if (!denoise_filter_color_postprocess(context, pass)) {
-    LOG(ERROR) << "Error copying denoiser result to the denoised pass.";
-    return;
-  }
-
-  denoiser_queue_->synchronize();
-}
-
-void OptiXDenoiser::denoise_color_read(const DenoiseContext &context, const DenoisePass &pass)
-{
-  PassAccessor::PassAccessInfo pass_access_info;
-  pass_access_info.type = pass.type;
-  pass_access_info.mode = PassMode::NOISY;
-  pass_access_info.offset = pass.noisy_offset;
-
-  /* Denoiser operates on passes which are used to calculate the approximation, and is never used
-   * on the approximation. The latter is not even possible because OptiX does not support
-   * denoising of semi-transparent pixels. */
-  pass_access_info.use_approximate_shadow_catcher = false;
-  pass_access_info.use_approximate_shadow_catcher_background = false;
-  pass_access_info.show_active_pixels = false;
-
-  /* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases.
-   */
-  const PassAccessorGPU pass_accessor(
-      denoiser_queue_.get(), pass_access_info, 1.0f, context.num_samples);
-
-  PassAccessor::Destination destination(pass_access_info.type);
-  destination.d_pixels = context.render_buffers->buffer.device_pointer +
-                         pass.denoised_offset * sizeof(float);
-  destination.num_components = 3;
-  destination.pixel_stride = context.buffer_params.pass_stride;
-
-  BufferParams buffer_params = context.buffer_params;
-  buffer_params.window_x = 0;
-  buffer_params.window_y = 0;
-  buffer_params.window_width = buffer_params.width;
-  buffer_params.window_height = buffer_params.height;
-
-  pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
-}
-
-bool OptiXDenoiser::denoise_filter_color_preprocess(const DenoiseContext &context,
-                                                    const DenoisePass &pass)
-{
-  const BufferParams &buffer_params = context.buffer_params;
-
-  const int work_size = buffer_params.width * buffer_params.height;
-
-  DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
-                             &buffer_params.full_x,
-                             &buffer_params.full_y,
-                             &buffer_params.width,
-                             &buffer_params.height,
-                             &buffer_params.offset,
-                             &buffer_params.stride,
-                             &buffer_params.pass_stride,
-                             &pass.denoised_offset);
-
-  return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
-}
-
-bool OptiXDenoiser::denoise_filter_color_postprocess(const DenoiseContext &context,
-                                                     const DenoisePass &pass)
-{
-  const BufferParams &buffer_params = context.buffer_params;
-
-  const int work_size = buffer_params.width * buffer_params.height;
-
-  DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
-                             &buffer_params.full_x,
-                             &buffer_params.full_y,
-                             &buffer_params.width,
-                             &buffer_params.height,
-                             &buffer_params.offset,
-                             &buffer_params.stride,
-                             &buffer_params.pass_stride,
-                             &context.num_samples,
-                             &pass.noisy_offset,
-                             &pass.denoised_offset,
-                             &context.pass_sample_count,
-                             &pass.num_components,
-                             &pass.use_compositing);
-
-  return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
-}
-
-bool OptiXDenoiser::denoise_ensure(DenoiseContext &context)
-{
-  if (!denoise_create_if_needed(context)) {
-    LOG(ERROR) << "OptiX denoiser creation has failed.";
-    return false;
-  }
-
-  if (!denoise_configure_if_needed(context)) {
-    LOG(ERROR) << "OptiX denoiser configuration has failed.";
-    return false;
-  }
-
-  return true;
-}
-
-bool OptiXDenoiser::denoise_create_if_needed(DenoiseContext &context)
-{
-  const bool recreate_denoiser = (optix_denoiser_ == nullptr) ||
-                                 (use_pass_albedo_ != context.use_pass_albedo) ||
-                                 (use_pass_normal_ != context.use_pass_normal) ||
-                                 (use_pass_motion_ != context.use_pass_motion);
-  if (!recreate_denoiser) {
-    return true;
-  }
-
-  /* Destroy existing handle before creating new one. */
-  if (optix_denoiser_) {
-    optixDenoiserDestroy(optix_denoiser_);
-  }
-
-  /* Create OptiX denoiser handle on demand when it is first used. */
-  OptixDenoiserOptions denoiser_options = {};
-  denoiser_options.guideAlbedo = context.use_pass_albedo;
-  denoiser_options.guideNormal = context.use_pass_normal;
-
-  OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
-  if (context.use_pass_motion) {
-    model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
-  }
-
-  const OptixResult result = optixDenoiserCreate(
-      static_cast<OptiXDevice *>(denoiser_device_)->context,
-      model,
-      &denoiser_options,
-      &optix_denoiser_);
-
-  if (result != OPTIX_SUCCESS) {
-    denoiser_device_->set_error("Failed to create OptiX denoiser");
-    return false;
-  }
-
-  /* OptiX denoiser handle was created with the requested number of input passes. */
-  use_pass_albedo_ = context.use_pass_albedo;
-  use_pass_normal_ = context.use_pass_normal;
-  use_pass_motion_ = context.use_pass_motion;
-
-  /* OptiX denoiser has been created, but it needs configuration. */
-  is_configured_ = false;
-
-  return true;
-}
-
-bool OptiXDenoiser::denoise_configure_if_needed(DenoiseContext &context)
-{
-  /* Limit maximum tile size denoiser can be invoked with. */
-  const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
-                                   min(context.buffer_params.height, 4096));
-
-  if (is_configured_ && (configured_size_.x == tile_size.x && configured_size_.y == tile_size.y)) {
-    return true;
-  }
-
-  optix_device_assert(
-      denoiser_device_,
-      optixDenoiserComputeMemoryResources(optix_denoiser_, tile_size.x, tile_size.y, &sizes_));
-
-  /* Allocate denoiser state if tile size has changed since last setup. */
-  state_.device = denoiser_device_;
-  state_.alloc_to_device(sizes_.stateSizeInBytes + sizes_.withOverlapScratchSizeInBytes);
-
-  /* Initialize denoiser state for the current tile size. */
-  const OptixResult result = optixDenoiserSetup(
-      optix_denoiser_,
-      0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
-          * on a stream that is not the default stream. */
-      tile_size.x + sizes_.overlapWindowSizeInPixels * 2,
-      tile_size.y + sizes_.overlapWindowSizeInPixels * 2,
-      state_.device_pointer,
-      sizes_.stateSizeInBytes,
-      state_.device_pointer + sizes_.stateSizeInBytes,
-      sizes_.withOverlapScratchSizeInBytes);
-  if (result != OPTIX_SUCCESS) {
-    denoiser_device_->set_error("Failed to set up OptiX denoiser");
-    return false;
-  }
-
-  cuda_device_assert(denoiser_device_, cuCtxSynchronize());
-
-  is_configured_ = true;
-  configured_size_ = tile_size;
-
-  return true;
-}
-
-bool OptiXDenoiser::denoise_run(const DenoiseContext &context, const DenoisePass &pass)
-{
-  const BufferParams &buffer_params = context.buffer_params;
-  const int width = buffer_params.width;
-  const int height = buffer_params.height;
-
-  /* Set up input and output layer information. */
-  OptixImage2D color_layer = {0};
-  OptixImage2D albedo_layer = {0};
-  OptixImage2D normal_layer = {0};
-  OptixImage2D flow_layer = {0};
-
-  OptixImage2D output_layer = {0};
-  OptixImage2D prev_output_layer = {0};
-
-  /* Color pass. */
-  {
-    const int pass_denoised = pass.denoised_offset;
-    const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float);
-
-    color_layer.data = context.render_buffers->buffer.device_pointer +
-                       pass_denoised * sizeof(float);
-    color_layer.width = width;
-    color_layer.height = height;
-    color_layer.rowStrideInBytes = pass_stride_in_bytes * context.buffer_params.stride;
-    color_layer.pixelStrideInBytes = pass_stride_in_bytes;
-    color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
-  }
-
-  /* Previous output. */
-  if (context.prev_output.offset != PASS_UNUSED) {
-    const int64_t pass_stride_in_bytes = context.prev_output.pass_stride * sizeof(float);
-
-    prev_output_layer.data = context.prev_output.device_pointer +
-                             context.prev_output.offset * sizeof(float);
-    prev_output_layer.width = width;
-    prev_output_layer.height = height;
-    prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
-    prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
-    prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
-  }
-
-  /* Optional albedo and color passes. */
-  if (context.num_input_passes > 1) {
-    const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
-    const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride * sizeof(float);
-    const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes;
-
-    if (context.use_pass_albedo) {
-      albedo_layer.data = d_guiding_buffer + context.guiding_params.pass_albedo * sizeof(float);
-      albedo_layer.width = width;
-      albedo_layer.height = height;
-      albedo_layer.rowStrideInBytes = row_stride_in_bytes;
-      albedo_layer.pixelStrideInBytes = pixel_stride_in_bytes;
-      albedo_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
-    }
-
-    if (context.use_pass_normal) {
-      normal_layer.data = d_guiding_buffer + context.guiding_params.pass_normal * sizeof(float);
-      normal_layer.width = width;
-      normal_layer.height = height;
-      normal_layer.rowStrideInBytes = row_stride_in_bytes;
-      normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
-      normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
-    }
-
-    if (context.use_pass_motion) {
-      flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow * sizeof(float);
-      flow_layer.width = width;
-      flow_layer.height = height;
-      flow_layer.rowStrideInBytes = row_stride_in_bytes;
-      flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
-      flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
-    }
-  }
-
-  /* Denoise in-place of the noisy input in the render buffers. */
-  output_layer = color_layer;
-
-  OptixDenoiserGuideLayer guide_layers = {};
-  guide_layers.albedo = albedo_layer;
-  guide_layers.normal = normal_layer;
-  guide_layers.flow = flow_layer;
-
-  OptixDenoiserLayer image_layers = {};
-  image_layers.input = color_layer;
-  image_layers.previousOutput = prev_output_layer;
-  image_layers.output = output_layer;
-
-  /* Finally run denoising. */
-  OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
-
-  optix_device_assert(denoiser_device_,
-                      ccl::optixUtilDenoiserInvokeTiled(
-                          optix_denoiser_,
-                          static_cast<OptiXDeviceQueue *>(denoiser_queue_.get())->stream(),
-                          &params,
-                          state_.device_pointer,
-                          sizes_.stateSizeInBytes,
-                          &guide_layers,
-                          &image_layers,
-                          1,
-                          state_.device_pointer + sizes_.stateSizeInBytes,
-                          sizes_.withOverlapScratchSizeInBytes,
-                          sizes_.overlapWindowSizeInPixels,
-                          configured_size_.x,
-                          configured_size_.y));
-
-  return true;
-}
-
 CCL_NAMESPACE_END
-
-#endif
--- a/intern/cycles/integrator/denoiser_optix.h
+++ b/intern/cycles/integrator/denoiser_optix.h
@@ -3,84 +3,16 @@

 #pragma once

-#ifdef WITH_OPTIX
-
-#  include "integrator/denoiser_gpu.h"
-
-#  include "device/optix/util.h"
+#include "integrator/denoiser_device.h"

 CCL_NAMESPACE_BEGIN

-/* Implementation of denoising API which uses the OptiX denoiser. */
-class OptiXDenoiser : public DenoiserGPU {
+class OptiXDenoiser : public DeviceDenoiser {
 public:
  OptiXDenoiser(Device *path_trace_device, const DenoiseParams &params);
-  ~OptiXDenoiser();

 protected:
  virtual uint get_device_type_mask() const override;
-
- private:
-  class DenoiseContext;
-  class DenoisePass;
-
-  virtual bool denoise_buffer(const DenoiseTask &task) override;
-
-  /* Read guiding passes from the render buffers, preprocess them in a way which is expected by
-   * OptiX and store in the guiding passes memory within the given context.
-   *
-   * Pre-processing of the guiding passes is to only happen once per context lifetime. DO not
-   * preprocess them for every pass which is being denoised. */
-  bool denoise_filter_guiding_preprocess(const DenoiseContext &context);
-
-  /* Set fake albedo pixels in the albedo guiding pass storage.
-   * After this point only passes which do not need albedo for denoising can be processed. */
-  bool denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context);
-
-  void denoise_pass(DenoiseContext &context, PassType pass_type);
-
-  /* Read input color pass from the render buffer into the memory which corresponds to the noisy
-   * input within the given context. Pixels are scaled to the number of samples, but are not
-   * preprocessed yet. */
-  void denoise_color_read(const DenoiseContext &context, const DenoisePass &pass);
-
-  /* Run corresponding filter kernels, preparing data for the denoiser or copying data from the
-   * denoiser result to the render buffer. */
-  bool denoise_filter_color_preprocess(const DenoiseContext &context, const DenoisePass &pass);
-  bool denoise_filter_color_postprocess(const DenoiseContext &context, const DenoisePass &pass);
-
-  /* Make sure the OptiX denoiser is created and configured. */
-  bool denoise_ensure(DenoiseContext &context);
-
-  /* Create OptiX denoiser descriptor if needed.
-   * Will do nothing if the current OptiX descriptor is usable for the given parameters.
-   * If the OptiX denoiser descriptor did re-allocate here it is left unconfigured. */
-  bool denoise_create_if_needed(DenoiseContext &context);
-
-  /* Configure existing OptiX denoiser descriptor for the use for the given task. */
-  bool denoise_configure_if_needed(DenoiseContext &context);
-
-  /* Run configured denoiser. */
-  bool denoise_run(const DenoiseContext &context, const DenoisePass &pass);
-
-  OptixDenoiser optix_denoiser_ = nullptr;
-
-  /* Configuration size, as provided to `optixDenoiserSetup`.
-   * If the `optixDenoiserSetup()` was never used on the current `optix_denoiser` the
-   * `is_configured` will be false. */
-  bool is_configured_ = false;
-  int2 configured_size_ = make_int2(0, 0);
-
-  /* OptiX denoiser state and scratch buffers, stored in a single memory buffer.
-   * The memory layout goes as following: [denoiser state][scratch buffer]. */
-  device_only_memory<unsigned char> state_;
-  OptixDenoiserSizes sizes_ = {};
-
-  bool use_pass_albedo_ = false;
-  bool use_pass_normal_ = false;
-  bool use_pass_motion_ = false;
 };

 CCL_NAMESPACE_END
-
-#endif
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -285,17 +285,10 @@ set(SRC_KERNEL_INTEGRATOR_HEADERS
 )

 set(SRC_KERNEL_LIGHT_HEADERS
-  light/area.h
+  light/light.h
  light/background.h
  light/common.h
-  light/distant.h
-  light/distribution.h
-  light/light.h
-  light/point.h
  light/sample.h
-  light/spot.h
-  light/tree.h
-  light/triangle.h
 )

 set(SRC_KERNEL_SAMPLE_HEADERS
@@ -473,29 +466,54 @@ if(WITH_CYCLES_CUDA_BINARIES)

    if(WITH_CYCLES_DEBUG)
      set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
-      set(cuda_flags ${cuda_flags} --ptxas-options="-v")
    endif()

-    set(_cuda_nvcc_args
-          -arch=${arch}
-          ${CUDA_NVCC_FLAGS}
-          --${format}
-          ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
-          ${cuda_flags})
+    if(WITH_CYCLES_CUBIN_COMPILER)
+      string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
+
+      # Needed to find libnvrtc-builtins.so. Can't do it from inside
+      # cycles_cubin_cc since the env variable is read before main()
+      if(APPLE)
+        set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+          -E env DYLD_LIBRARY_PATH="${cuda_toolkit_root_dir}/lib")
+      elseif(UNIX)
+        set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+          -E env LD_LIBRARY_PATH="${cuda_toolkit_root_dir}/lib64")
+      endif()

-    if(WITH_COMPILER_CCACHE AND CCACHE_PROGRAM)
      add_custom_command(
        OUTPUT ${cuda_file}
-        COMMAND ${CCACHE_PROGRAM} ${cuda_nvcc_executable} ${_cuda_nvcc_args}
-        DEPENDS ${kernel_sources})
+        COMMAND ${CUBIN_CC_ENV}
+            "$<TARGET_FILE:cycles_cubin_cc>"
+            -target ${CUDA_ARCH}
+            -i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
+            ${cuda_flags}
+            -v
+            -cuda-toolkit-dir "${cuda_toolkit_root_dir}"
+        DEPENDS ${kernel_sources} cycles_cubin_cc)
    else()
-      add_custom_command(
-        OUTPUT ${cuda_file}
-        COMMAND ${cuda_nvcc_executable} ${_cuda_nvcc_args}
-        DEPENDS ${kernel_sources})
-    endif()
+      set(_cuda_nvcc_args
+            -arch=${arch}
+            ${CUDA_NVCC_FLAGS}
+            --${format}
+            ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
+            --ptxas-options="-v"
+            ${cuda_flags})

-    unset(_cuda_nvcc_args)
+      if(WITH_COMPILER_CCACHE AND CCACHE_PROGRAM)
+        add_custom_command(
+          OUTPUT ${cuda_file}
+          COMMAND ${CCACHE_PROGRAM} ${cuda_nvcc_executable} ${_cuda_nvcc_args}
+          DEPENDS ${kernel_sources})
+      else()
+        add_custom_command(
+          OUTPUT ${cuda_file}
+          COMMAND ${cuda_nvcc_executable} ${_cuda_nvcc_args}
+          DEPENDS ${kernel_sources})
+      endif()
+
+      unset(_cuda_nvcc_args)
+    endif()
    delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib)
    list(APPEND cuda_cubins ${cuda_file})

@@ -571,22 +589,13 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
    if(WIN32)
      set(hip_command ${CMAKE_COMMAND})
      set(hip_flags
-        -E env "HIP_PATH=${HIP_ROOT_DIR}"
+        -E env "HIP_PATH=${HIP_ROOT_DIR}" "PATH=${HIP_PERL_DIR}"
        ${HIP_HIPCC_EXECUTABLE}.bat)
    else()
      set(hip_command ${HIP_HIPCC_EXECUTABLE})
      set(hip_flags)
    endif()

-    # There's a bug in the compiler causing some scenes to fail to render on Vega cards
-    # A workaround currently is to set -O1 opt level during kernel compilation for these
-    # cards Remove this when a newer compiler is available with fixes.
-    if(WIN32 AND (${arch} MATCHES "gfx90[a-z0-9]+"))
-      set(hip_opt_flags "-O1")
-    else()
-      set(hip_opt_flags)
-    endif()
-
    set(hip_flags
      ${hip_flags}
      --amdgpu-target=${arch}
@@ -603,7 +612,6 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
      -Wno-unused-value
      --hipcc-func-supp
      -ffast-math
-      ${hip_opt_flags}
      -o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})

    if(WITH_NANOVDB)
@@ -657,25 +665,55 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
      set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
    endif()

-    add_custom_command(
-      OUTPUT
-        ${output}
-      DEPENDS
-        ${input}
-        ${SRC_KERNEL_HEADERS}
-        ${SRC_KERNEL_DEVICE_GPU_HEADERS}
-        ${SRC_KERNEL_DEVICE_CUDA_HEADERS}
-        ${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
-        ${SRC_UTIL_HEADERS}
-      COMMAND
-        ${CUDA_NVCC_EXECUTABLE}
-        --ptx
-        -arch=sm_50
-        ${cuda_flags}
-        ${input}
-      WORKING_DIRECTORY
-        "${CMAKE_CURRENT_SOURCE_DIR}")
+    if(WITH_CYCLES_CUBIN_COMPILER)
+      # Needed to find libnvrtc-builtins.so. Can't do it from inside
+      # cycles_cubin_cc since the env variable is read before main()
+      if(APPLE)
+        set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+          -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
+      elseif(UNIX)
+        set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+          -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
+      endif()

+      add_custom_command(
+        OUTPUT ${output}
+        DEPENDS
+          ${input}
+          ${SRC_KERNEL_HEADERS}
+          ${SRC_KERNEL_DEVICE_GPU_HEADERS}
+          ${SRC_KERNEL_DEVICE_CUDA_HEADERS}
+          ${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
+          ${SRC_UTIL_HEADERS}
+        COMMAND ${CUBIN_CC_ENV}
+            "$<TARGET_FILE:cycles_cubin_cc>"
+            -target 50
+            -ptx
+            -i ${CMAKE_CURRENT_SOURCE_DIR}/${input}
+            ${cuda_flags}
+            -v
+            -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
+        DEPENDS ${kernel_sources} cycles_cubin_cc)
+    else()
+      add_custom_command(
+        OUTPUT
+          ${output}
+        DEPENDS
+          ${input}
+          ${SRC_KERNEL_HEADERS}
+          ${SRC_KERNEL_DEVICE_GPU_HEADERS}
+          ${SRC_KERNEL_DEVICE_CUDA_HEADERS}
+          ${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
+          ${SRC_UTIL_HEADERS}
+        COMMAND
+          ${CUDA_NVCC_EXECUTABLE}
+          --ptx
+          -arch=sm_50
+          ${cuda_flags}
+          ${input}
+        WORKING_DIRECTORY
+          "${CMAKE_CURRENT_SOURCE_DIR}")
+    endif()
    list(APPEND optix_ptx ${output})

    delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib)
--- a/intern/cycles/kernel/closure/bsdf_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse.h
@@ -69,7 +69,7 @@ ccl_device int bsdf_diffuse_sample(ccl_private const ShaderClosure *sc,
 ccl_device int bsdf_translucent_setup(ccl_private DiffuseBsdf *bsdf)
 {
  bsdf->type = CLOSURE_BSDF_TRANSLUCENT_ID;
-  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }

 ccl_device Spectrum bsdf_translucent_eval(ccl_private const ShaderClosure *sc,
--- a/intern/cycles/kernel/closure/bsdf_hair.h
+++ b/intern/cycles/kernel/closure/bsdf_hair.h
@@ -34,7 +34,7 @@ ccl_device int bsdf_hair_transmission_setup(ccl_private HairBsdf *bsdf)
  bsdf->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID;
  bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f);
  bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f);
-  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }

 ccl_device Spectrum bsdf_hair_reflection_eval(ccl_private const ShaderClosure *sc,
--- a/intern/cycles/kernel/closure/bsdf_hair_principled.h
+++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h
@@ -196,7 +196,7 @@ ccl_device int bsdf_principled_hair_setup(ccl_private ShaderData *sd,

  bsdf->extra->geom = make_float4(Y.x, Y.y, Y.z, h);

-  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG | SD_BSDF_HAS_TRANSMISSION;
+  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
 }

 #endif /* __HAIR__ */
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -346,7 +346,7 @@ ccl_device int bsdf_microfacet_ggx_refraction_setup(ccl_private MicrofacetBsdf *

  bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;

-  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }

 ccl_device void bsdf_microfacet_ggx_blur(ccl_private ShaderClosure *sc, float roughness)
@@ -776,7 +776,7 @@ ccl_device int bsdf_microfacet_beckmann_refraction_setup(ccl_private MicrofacetB
  bsdf->alpha_y = bsdf->alpha_x;

  bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
-  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
+  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }

 ccl_device void bsdf_microfacet_beckmann_blur(ccl_private ShaderClosure *sc, float roughness)
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
@@ -559,7 +559,7 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_setup(ccl_private MicrofacetBsdf

  bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID;

-  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG | SD_BSDF_HAS_TRANSMISSION;
+  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
 }

 ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(ccl_private MicrofacetBsdf *bsdf,
--- a/intern/cycles/kernel/data_arrays.h
+++ b/intern/cycles/kernel/data_arrays.h
@@ -60,13 +60,6 @@ KERNEL_DATA_ARRAY(KernelLight, lights)
 KERNEL_DATA_ARRAY(float2, light_background_marginal_cdf)
 KERNEL_DATA_ARRAY(float2, light_background_conditional_cdf)

-/* light tree */
-KERNEL_DATA_ARRAY(KernelLightTreeNode, light_tree_nodes)
-KERNEL_DATA_ARRAY(KernelLightTreeEmitter, light_tree_emitters)
-KERNEL_DATA_ARRAY(uint, light_to_tree)
-KERNEL_DATA_ARRAY(uint, object_lookup_offset)
-KERNEL_DATA_ARRAY(uint, triangle_to_tree)
-
 /* particles */
 KERNEL_DATA_ARRAY(KernelParticle, particles)

--- a/intern/cycles/kernel/data_template.h
+++ b/intern/cycles/kernel/data_template.h
@@ -23,19 +23,24 @@ KERNEL_STRUCT_MEMBER(background, int, volume_shader)
 KERNEL_STRUCT_MEMBER(background, float, volume_step_size)
 KERNEL_STRUCT_MEMBER(background, int, transparent)
 KERNEL_STRUCT_MEMBER(background, float, transparent_roughness_squared_threshold)
+/* Portal sampling. */
+KERNEL_STRUCT_MEMBER(background, float, portal_weight)
+KERNEL_STRUCT_MEMBER(background, int, num_portals)
+KERNEL_STRUCT_MEMBER(background, int, portal_offset)
 /* Sun sampling. */
 KERNEL_STRUCT_MEMBER(background, float, sun_weight)
 /* Importance map sampling. */
 KERNEL_STRUCT_MEMBER(background, float, map_weight)
-KERNEL_STRUCT_MEMBER(background, float, portal_weight)
 KERNEL_STRUCT_MEMBER(background, int, map_res_x)
 KERNEL_STRUCT_MEMBER(background, int, map_res_y)
 /* Multiple importance sampling. */
 KERNEL_STRUCT_MEMBER(background, int, use_mis)
 /* Lightgroup. */
 KERNEL_STRUCT_MEMBER(background, int, lightgroup)
-/* Light Index. */
-KERNEL_STRUCT_MEMBER(background, int, light_index)
+/* Padding. */
+KERNEL_STRUCT_MEMBER(background, int, pad1)
+KERNEL_STRUCT_MEMBER(background, int, pad2)
+KERNEL_STRUCT_MEMBER(background, int, pad3)
 KERNEL_STRUCT_END(KernelBackground)

 /* BVH: own BVH2 if no native device acceleration struct used. */
@@ -97,6 +102,8 @@ KERNEL_STRUCT_MEMBER(film, int, pass_emission)
 KERNEL_STRUCT_MEMBER(film, int, pass_background)
 KERNEL_STRUCT_MEMBER(film, int, pass_ao)
 KERNEL_STRUCT_MEMBER(film, float, pass_alpha_threshold)
+KERNEL_STRUCT_MEMBER(film, int, pass_shadow)
+KERNEL_STRUCT_MEMBER(film, float, pass_shadow_scale)
 KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher)
 KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher_sample_count)
 KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher_matte)
@@ -130,6 +137,9 @@ KERNEL_STRUCT_MEMBER(film, int, use_approximate_shadow_catcher)
 KERNEL_STRUCT_MEMBER(film, int, pass_guiding_color)
 KERNEL_STRUCT_MEMBER(film, int, pass_guiding_probability)
 KERNEL_STRUCT_MEMBER(film, int, pass_guiding_avg_roughness)
+/* Padding. */
+KERNEL_STRUCT_MEMBER(film, int, pad1)
+KERNEL_STRUCT_MEMBER(film, int, pad2)
 KERNEL_STRUCT_END(KernelFilm)

 /* Integrator. */
@@ -137,18 +147,10 @@ KERNEL_STRUCT_END(KernelFilm)
 KERNEL_STRUCT_BEGIN(KernelIntegrator, integrator)
 /* Emission. */
 KERNEL_STRUCT_MEMBER(integrator, int, use_direct_light)
-KERNEL_STRUCT_MEMBER(integrator, int, use_light_mis)
-KERNEL_STRUCT_MEMBER(integrator, int, use_light_tree)
-KERNEL_STRUCT_MEMBER(integrator, int, num_lights)
-KERNEL_STRUCT_MEMBER(integrator, int, num_distant_lights)
-KERNEL_STRUCT_MEMBER(integrator, int, num_background_lights)
-/* Portal sampling. */
-KERNEL_STRUCT_MEMBER(integrator, int, num_portals)
-KERNEL_STRUCT_MEMBER(integrator, int, portal_offset)
-/* Flat light distribution. */
 KERNEL_STRUCT_MEMBER(integrator, int, num_distribution)
-KERNEL_STRUCT_MEMBER(integrator, float, distribution_pdf_triangles)
-KERNEL_STRUCT_MEMBER(integrator, float, distribution_pdf_lights)
+KERNEL_STRUCT_MEMBER(integrator, int, num_all_lights)
+KERNEL_STRUCT_MEMBER(integrator, float, pdf_triangles)
+KERNEL_STRUCT_MEMBER(integrator, float, pdf_lights)
 KERNEL_STRUCT_MEMBER(integrator, float, light_inv_rr_threshold)
 /* Bounces. */
 KERNEL_STRUCT_MEMBER(integrator, int, min_bounce)
@@ -175,11 +177,12 @@ KERNEL_STRUCT_MEMBER(integrator, int, seed)
 /* Clamp. */
 KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_direct)
 KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_indirect)
+/* MIS. */
+KERNEL_STRUCT_MEMBER(integrator, int, use_lamp_mis)
 /* Caustics. */
 KERNEL_STRUCT_MEMBER(integrator, int, use_caustics)
 /* Sampling pattern. */
 KERNEL_STRUCT_MEMBER(integrator, int, sampling_pattern)
-KERNEL_STRUCT_MEMBER(integrator, int, pmj_sequence_size)
 KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance)
 /* Volume render. */
 KERNEL_STRUCT_MEMBER(integrator, int, use_volumes)
@@ -191,6 +194,7 @@ KERNEL_STRUCT_MEMBER(integrator, int, has_shadow_catcher)
 KERNEL_STRUCT_MEMBER(integrator, int, filter_closures)
 /* MIS debugging. */
 KERNEL_STRUCT_MEMBER(integrator, int, direct_light_sampling_type)
+
 /* Path Guiding */
 KERNEL_STRUCT_MEMBER(integrator, float, surface_guiding_probability)
 KERNEL_STRUCT_MEMBER(integrator, float, volume_guiding_probability)
@@ -201,10 +205,6 @@ KERNEL_STRUCT_MEMBER(integrator, int, use_surface_guiding)
 KERNEL_STRUCT_MEMBER(integrator, int, use_volume_guiding)
 KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_direct_light)
 KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_mis_weights)
-
-/* Padding. */
-KERNEL_STRUCT_MEMBER(integrator, int, pad1)
-KERNEL_STRUCT_MEMBER(integrator, int, pad2)
 KERNEL_STRUCT_END(KernelIntegrator)

 /* SVM. For shader specialization. */
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -314,7 +314,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
                        int kernel_index);
  ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index;

-  gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
+  gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
+                                  num_states,
+                                  indices,
+                                  num_indices,
+                                  ccl_gpu_kernel_lambda_pass);
 }
 ccl_gpu_kernel_postfix

@@ -329,7 +333,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
                        int kernel_index);
  ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index;

-  gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
+  gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
+                                  num_states,
+                                  indices,
+                                  num_indices,
+                                  ccl_gpu_kernel_lambda_pass);
 }
 ccl_gpu_kernel_postfix

@@ -341,7 +349,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
 {
  ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) != 0);

-  gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
+  gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
+                                  num_states,
+                                  indices,
+                                  num_indices,
+                                  ccl_gpu_kernel_lambda_pass);
 }
 ccl_gpu_kernel_postfix

@@ -354,8 +366,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
 {
  ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) == 0);

-  gpu_parallel_active_index_array(
-      num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass);
+  gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
+                                  num_states,
+                                  indices + indices_offset,
+                                  num_indices,
+                                  ccl_gpu_kernel_lambda_pass);
 }
 ccl_gpu_kernel_postfix

@@ -368,8 +383,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
 {
  ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0);

-  gpu_parallel_active_index_array(
-      num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass);
+  gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
+                                  num_states,
+                                  indices + indices_offset,
+                                  num_indices,
+                                  ccl_gpu_kernel_lambda_pass);
 }
 ccl_gpu_kernel_postfix

@@ -413,7 +431,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
                        int num_active_paths);
  ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths;

-  gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
+  gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
+                                  num_states,
+                                  indices,
+                                  num_indices,
+                                  ccl_gpu_kernel_lambda_pass);
 }
 ccl_gpu_kernel_postfix

@@ -447,7 +469,11 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
                        int num_active_paths);
  ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths;

-  gpu_parallel_active_index_array(num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass);
+  gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE,
+                                  num_states,
+                                  indices,
+                                  num_indices,
+                                  ccl_gpu_kernel_lambda_pass);
 }
 ccl_gpu_kernel_postfix

--- a/intern/cycles/kernel/device/gpu/parallel_active_index.h
+++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h
@@ -56,7 +56,7 @@ void gpu_parallel_active_index_array_impl(const uint num_states,
  const uint is_active = (state_index < num_states) ? is_active_op(state_index) : 0;
 #else /* !__KERNEL__ONEAPI__ */
 #  ifndef __KERNEL_METAL__
-template<typename IsActiveOp>
+template<uint blocksize, typename IsActiveOp>
 __device__
 #  endif
    void
@@ -79,10 +79,6 @@ __device__
 {
  extern ccl_gpu_shared int warp_offset[];

-#    ifndef __KERNEL_METAL__
-  const uint blocksize = ccl_gpu_block_dim_x;
-#    endif
-
  const uint thread_index = ccl_gpu_thread_idx_x;
  const uint thread_warp = thread_index % ccl_gpu_warp_size;

@@ -153,7 +149,7 @@ __device__

 #ifdef __KERNEL_METAL__

-#  define gpu_parallel_active_index_array(num_states, indices, num_indices, is_active_op) \
+#  define gpu_parallel_active_index_array(dummy, num_states, indices, num_indices, is_active_op) \
    const uint is_active = (ccl_gpu_global_id_x() < num_states) ? \
                               is_active_op(ccl_gpu_global_id_x()) : \
                               0; \
@@ -171,13 +167,15 @@ __device__
                                         simdgroup_offset)
 #elif defined(__KERNEL_ONEAPI__)

-#  define gpu_parallel_active_index_array(num_states, indices, num_indices, is_active_op) \
+#  define gpu_parallel_active_index_array( \
+      blocksize, num_states, indices, num_indices, is_active_op) \
    gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op)

 #else

-#  define gpu_parallel_active_index_array(num_states, indices, num_indices, is_active_op) \
-    gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op)
+#  define gpu_parallel_active_index_array( \
+      blocksize, num_states, indices, num_indices, is_active_op) \
+    gpu_parallel_active_index_array_impl<blocksize>(num_states, indices, num_indices, is_active_op)

 #endif

--- a/intern/cycles/kernel/film/data_passes.h
+++ b/intern/cycles/kernel/film/data_passes.h
@@ -42,27 +42,27 @@ ccl_device_inline void film_write_data_passes(KernelGlobals kg,
  ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);

  if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
-    if (INTEGRATOR_STATE(state, path, sample) == 0) {
-      if (flag & PASSMASK(DEPTH)) {
-        const float depth = camera_z_depth(kg, sd->P);
-        film_overwrite_pass_float(buffer + kernel_data.film.pass_depth, depth);
-      }
-      if (flag & PASSMASK(OBJECT_ID)) {
-        const float id = object_pass_id(kg, sd->object);
-        film_overwrite_pass_float(buffer + kernel_data.film.pass_object_id, id);
-      }
-      if (flag & PASSMASK(MATERIAL_ID)) {
-        const float id = shader_pass_id(kg, sd);
-        film_overwrite_pass_float(buffer + kernel_data.film.pass_material_id, id);
-      }
-      if (flag & PASSMASK(POSITION)) {
-        const float3 position = sd->P;
-        film_overwrite_pass_float3(buffer + kernel_data.film.pass_position, position);
-      }
-    }
-
    if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f ||
        average(surface_shader_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) {
+      if (INTEGRATOR_STATE(state, path, sample) == 0) {
+        if (flag & PASSMASK(DEPTH)) {
+          const float depth = camera_z_depth(kg, sd->P);
+          film_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
+        }
+        if (flag & PASSMASK(OBJECT_ID)) {
+          const float id = object_pass_id(kg, sd->object);
+          film_write_pass_float(buffer + kernel_data.film.pass_object_id, id);
+        }
+        if (flag & PASSMASK(MATERIAL_ID)) {
+          const float id = shader_pass_id(kg, sd);
+          film_write_pass_float(buffer + kernel_data.film.pass_material_id, id);
+        }
+        if (flag & PASSMASK(POSITION)) {
+          const float3 position = sd->P;
+          film_write_pass_float3(buffer + kernel_data.film.pass_position, position);
+        }
+      }
+
      if (flag & PASSMASK(NORMAL)) {
        const float3 normal = surface_shader_average_normal(kg, sd);
        film_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
@@ -157,47 +157,4 @@ ccl_device_inline void film_write_data_passes(KernelGlobals kg,
 #endif
 }

-ccl_device_inline void film_write_data_passes_background(
-    KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
-{
-#ifdef __PASSES__
-  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-
-  if (!(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
-    return;
-  }
-
-  /* Don't write data passes for paths that were split off for shadow catchers
-   * to avoid double-counting. */
-  if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) {
-    return;
-  }
-
-  const int flag = kernel_data.film.pass_flag;
-
-  if (!(flag & PASS_ANY)) {
-    return;
-  }
-
-  if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
-    ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
-
-    if (INTEGRATOR_STATE(state, path, sample) == 0) {
-      if (flag & PASSMASK(DEPTH)) {
-        film_overwrite_pass_float(buffer + kernel_data.film.pass_depth, 0.0f);
-      }
-      if (flag & PASSMASK(OBJECT_ID)) {
-        film_overwrite_pass_float(buffer + kernel_data.film.pass_object_id, 0.0f);
-      }
-      if (flag & PASSMASK(MATERIAL_ID)) {
-        film_overwrite_pass_float(buffer + kernel_data.film.pass_material_id, 0.0f);
-      }
-      if (flag & PASSMASK(POSITION)) {
-        film_overwrite_pass_float3(buffer + kernel_data.film.pass_position, zero_float3());
-      }
-    }
-  }
-#endif
-}
-
 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/film/light_passes.h
+++ b/intern/cycles/kernel/film/light_passes.h
@@ -527,6 +527,17 @@ ccl_device_inline void film_write_direct_light(KernelGlobals kg,
        film_write_pass_spectrum(buffer + pass_offset, contribution);
      }
    }
+
+    /* Write shadow pass. */
+    if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) &&
+        (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
+      const Spectrum unshadowed_throughput = INTEGRATOR_STATE(
+          state, shadow_path, unshadowed_throughput);
+      const Spectrum shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
+      const Spectrum shadow = safe_divide(shadowed_throughput, unshadowed_throughput) *
+                              kernel_data.film.pass_shadow_scale;
+      film_write_pass_spectrum(buffer + kernel_data.film.pass_shadow, shadow);
+    }
  }
 #endif
 }
--- a/intern/cycles/kernel/film/write.h
+++ b/intern/cycles/kernel/film/write.h
@@ -12,7 +12,6 @@
 CCL_NAMESPACE_BEGIN

 /* Get pointer to pixel in render buffer. */
-
 ccl_device_forceinline ccl_global float *film_pass_pixel_render_buffer(
    KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
 {
@@ -22,8 +21,7 @@ ccl_device_forceinline ccl_global float *film_pass_pixel_render_buffer(
  return render_buffer + render_buffer_offset;
 }

-/* Accumulate in passes. */
-
+/* Write to pixel. */
 ccl_device_inline void film_write_pass_float(ccl_global float *ccl_restrict buffer, float value)
 {
 #ifdef __ATOMIC_PASS_WRITE__
@@ -76,25 +74,6 @@ ccl_device_inline void film_write_pass_float4(ccl_global float *ccl_restrict buf
 #endif
 }

-/* Overwrite for passes that only write on sample 0. This assumes only a single thread will write
- * to this pixel and no atomics are needed. */
-
-ccl_device_inline void film_overwrite_pass_float(ccl_global float *ccl_restrict buffer,
-                                                 float value)
-{
-  *buffer = value;
-}
-
-ccl_device_inline void film_overwrite_pass_float3(ccl_global float *ccl_restrict buffer,
-                                                  float3 value)
-{
-  buffer[0] = value.x;
-  buffer[1] = value.y;
-  buffer[2] = value.z;
-}
-
-/* Read back from passes. */
-
 ccl_device_inline float kernel_read_pass_float(ccl_global float *ccl_restrict buffer)
 {
  return *buffer;
--- a/intern/cycles/kernel/integrator/intersect_closest.h
+++ b/intern/cycles/kernel/integrator/intersect_closest.h
@@ -11,10 +11,10 @@
 #include "kernel/integrator/path_state.h"
 #include "kernel/integrator/shadow_catcher.h"

-#include "kernel/geom/geom.h"
-
 #include "kernel/light/light.h"

+#include "kernel/geom/geom.h"
+
 #include "kernel/bvh/bvh.h"

 CCL_NAMESPACE_BEGIN
@@ -387,7 +387,7 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg,
 #endif /* __MNEE__ */

  /* Light intersection for MIS. */
-  if (kernel_data.integrator.use_light_mis) {
+  if (kernel_data.integrator.use_lamp_mis) {
    /* NOTE: if we make lights visible to camera rays, we'll need to initialize
     * these in the path_state_init. */
    const int last_type = INTEGRATOR_STATE(state, isect, type);
--- a/intern/cycles/kernel/integrator/mnee.h
+++ b/intern/cycles/kernel/integrator/mnee.h
@@ -108,6 +108,48 @@ ccl_device_inline float mat22_inverse(const float4 m, ccl_private float4 &m_inve
  return det;
 }

+/* Update light sample */
+ccl_device_forceinline void mnee_update_light_sample(KernelGlobals kg,
+                                                     const float3 P,
+                                                     ccl_private LightSample *ls)
+{
+  /* correct light sample position/direction and pdf
+   * NOTE: preserve pdf in area measure */
+  const ccl_global KernelLight *klight = &kernel_data_fetch(lights, ls->lamp);
+
+  if (ls->type == LIGHT_POINT || ls->type == LIGHT_SPOT) {
+    ls->D = normalize_len(ls->P - P, &ls->t);
+    ls->Ng = -ls->D;
+
+    float2 uv = map_to_sphere(ls->Ng);
+    ls->u = uv.x;
+    ls->v = uv.y;
+
+    float invarea = klight->spot.invarea;
+    ls->eval_fac = (0.25f * M_1_PI_F) * invarea;
+    ls->pdf = invarea;
+
+    if (ls->type == LIGHT_SPOT) {
+      /* spot light attenuation */
+      float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]);
+      ls->eval_fac *= spot_light_attenuation(
+          dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls->Ng);
+    }
+  }
+  else if (ls->type == LIGHT_AREA) {
+    float invarea = fabsf(klight->area.invarea);
+    ls->D = normalize_len(ls->P - P, &ls->t);
+    ls->pdf = invarea;
+    if (klight->area.tan_spread > 0.f) {
+      ls->eval_fac = 0.25f * invarea;
+      ls->eval_fac *= light_spread_attenuation(
+          ls->D, ls->Ng, klight->area.tan_spread, klight->area.normalize_spread);
+    }
+  }
+
+  ls->pdf *= kernel_data.integrator.pdf_lights;
+}
+
 /* Manifold vertex setup from ray and intersection data */
 ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg,
                                                       ccl_private ManifoldVertex *vtx,
@@ -777,7 +819,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,

  /* Update light sample with new position / direct.ion
   * and keep pdf in vertex area measure */
-  light_sample_update_position(kg, ls, vertices[vertex_count - 1].p);
+  mnee_update_light_sample(kg, vertices[vertex_count - 1].p, ls);

  /* Save state path bounce info in case a light path node is used in the refractive interface or
   * light shader graph. */
--- a/intern/cycles/kernel/integrator/path_state.h
+++ b/intern/cycles/kernel/integrator/path_state.h
@@ -91,10 +91,7 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
 #endif
 }

-ccl_device_inline void path_state_next(KernelGlobals kg,
-                                       IntegratorState state,
-                                       const int label,
-                                       const int shader_flag)
+ccl_device_inline void path_state_next(KernelGlobals kg, IntegratorState state, int label)
 {
  uint32_t flag = INTEGRATOR_STATE(state, path, flag);

@@ -123,12 +120,12 @@ ccl_device_inline void path_state_next(KernelGlobals kg,
    flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
  }

-  flag &= ~(PATH_RAY_ALL_VISIBILITY | PATH_RAY_MIS_SKIP | PATH_RAY_MIS_HAD_TRANSMISSION);
+  flag &= ~(PATH_RAY_ALL_VISIBILITY | PATH_RAY_MIS_SKIP);

 #ifdef __VOLUME__
  if (label & LABEL_VOLUME_SCATTER) {
    /* volume scatter */
-    flag |= PATH_RAY_VOLUME_SCATTER | PATH_RAY_MIS_HAD_TRANSMISSION;
+    flag |= PATH_RAY_VOLUME_SCATTER;
    flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
    if (!(flag & PATH_RAY_ANY_PASS)) {
      flag |= PATH_RAY_VOLUME_PASS;
@@ -191,11 +188,6 @@ ccl_device_inline void path_state_next(KernelGlobals kg,
      flag |= PATH_RAY_GLOSSY | PATH_RAY_SINGULAR | PATH_RAY_MIS_SKIP;
    }

-    /* Flag for consistent MIS weights with light tree. */
-    if (shader_flag & SD_BSDF_HAS_TRANSMISSION) {
-      flag |= PATH_RAY_MIS_HAD_TRANSMISSION;
-    }
-
    /* Render pass categories. */
    if (!(flag & PATH_RAY_ANY_PASS) && !(flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
      flag |= PATH_RAY_SURFACE_PASS;
--- a/intern/cycles/kernel/integrator/shade_background.h
+++ b/intern/cycles/kernel/integrator/shade_background.h
@@ -3,7 +3,6 @@

 #pragma once

-#include "kernel/film/data_passes.h"
 #include "kernel/film/light_passes.h"

 #include "kernel/integrator/guiding.h"
@@ -69,9 +68,9 @@ ccl_device_inline void integrate_background(KernelGlobals kg,
  bool eval_background = true;
  float transparent = 0.0f;

-  int path_flag = INTEGRATOR_STATE(state, path, flag);
  const bool is_transparent_background_ray = kernel_data.background.transparent &&
-                                             (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND);
+                                             (INTEGRATOR_STATE(state, path, flag) &
+                                              PATH_RAY_TRANSPARENT_BACKGROUND);

  if (is_transparent_background_ray) {
    transparent = average(INTEGRATOR_STATE(state, path, throughput));
@@ -86,7 +85,7 @@ ccl_device_inline void integrate_background(KernelGlobals kg,
 #ifdef __MNEE__
  if (INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_CULL_LIGHT_CONNECTION) {
    if (kernel_data.background.use_mis) {
-      for (int lamp = 0; lamp < kernel_data.integrator.num_lights; lamp++) {
+      for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) {
        /* This path should have been resolved with mnee, it will
         * generate a firefly for small lights since it is improbable. */
        const ccl_global KernelLight *klight = &kernel_data_fetch(lights, lamp);
@@ -113,10 +112,17 @@ ccl_device_inline void integrate_background(KernelGlobals kg,

    /* Background MIS weights. */
    float mis_weight = 1.0f;
-    /* Check if background light exists or if we should skip PDF. */
+    /* Check if background light exists or if we should skip pdf. */
    if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) &&
        kernel_data.background.use_mis) {
-      mis_weight = light_sample_mis_weight_forward_background(kg, state, path_flag);
+      const float3 ray_P = INTEGRATOR_STATE(state, ray, P);
+      const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
+      const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
+
+      /* multiple importance sampling, get background light pdf for ray
+       * direction, and compute weight with respect to BSDF pdf */
+      const float pdf = background_light_pdf(kg, ray_P, ray_D);
+      mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf);
    }

    guiding_record_background(kg, state, L, mis_weight);
@@ -125,7 +131,6 @@ ccl_device_inline void integrate_background(KernelGlobals kg,

  /* Write to render buffer. */
  film_write_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer);
-  film_write_data_passes_background(kg, state, render_buffer);
 }

 ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
@@ -135,8 +140,8 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
  const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
  const float ray_time = INTEGRATOR_STATE(state, ray, time);
  LightSample ls ccl_optional_struct_init;
-  for (int lamp = 0; lamp < kernel_data.integrator.num_lights; lamp++) {
-    if (distant_light_sample_from_intersection(kg, ray_D, lamp, &ls)) {
+  for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) {
+    if (light_sample_from_distant_ray(kg, ray_D, lamp, &ls)) {
      /* Use visibility flag to skip lights. */
 #ifdef __PASSES__
      const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
@@ -175,7 +180,10 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
      /* MIS weighting. */
      float mis_weight = 1.0f;
      if (!(path_flag & PATH_RAY_MIS_SKIP)) {
-        mis_weight = light_sample_mis_weight_forward_distant(kg, state, path_flag, &ls);
+        /* multiple importance sampling, get regular light pdf,
+         * and compute weight with respect to BSDF pdf */
+        const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
+        mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf);
      }

      /* Write to render buffer. */
--- a/intern/cycles/kernel/integrator/shade_light.h
+++ b/intern/cycles/kernel/integrator/shade_light.h
@@ -61,7 +61,10 @@ ccl_device_inline void integrate_light(KernelGlobals kg,
  /* MIS weighting. */
  float mis_weight = 1.0f;
  if (!(path_flag & PATH_RAY_MIS_SKIP)) {
-    mis_weight = light_sample_mis_weight_forward_lamp(kg, state, path_flag, &ls, ray_P);
+    /* multiple importance sampling, get regular light pdf,
+     * and compute weight with respect to BSDF pdf */
+    const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
+    mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf);
  }

  /* Write to render buffer. */
--- a/intern/cycles/kernel/integrator/shade_surface.h
+++ b/intern/cycles/kernel/integrator/shade_surface.h
@@ -15,6 +15,7 @@
 #include "kernel/integrator/surface_shader.h"
 #include "kernel/integrator/volume_stack.h"

+#include "kernel/light/light.h"
 #include "kernel/light/sample.h"

 CCL_NAMESPACE_BEGIN
@@ -112,16 +113,20 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
  Spectrum L = surface_shader_emission(sd);
  float mis_weight = 1.0f;

-  const bool has_mis = !(path_flag & PATH_RAY_MIS_SKIP) &&
-                       (sd->flag & ((sd->flag & SD_BACKFACING) ? SD_MIS_BACK : SD_MIS_FRONT));
-
 #ifdef __HAIR__
-  if (has_mis && (sd->type & PRIMITIVE_TRIANGLE))
+  if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) &&
+      (sd->type & PRIMITIVE_TRIANGLE))
 #else
-  if (has_mis)
+  if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS))
 #endif
  {
-    mis_weight = light_sample_mis_weight_forward_surface(kg, state, path_flag, sd);
+    const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
+    const float t = sd->ray_length;
+
+    /* Multiple importance sampling, get triangle light pdf,
+     * and compute weight with respect to BSDF pdf. */
+    float pdf = triangle_light_pdf(kg, sd, t);
+    mis_weight = light_sample_mis_weight_forward(kg, bsdf_pdf, pdf);
  }

  guiding_record_surface_emission(kg, state, L, mis_weight);
@@ -149,17 +154,8 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
    const uint bounce = INTEGRATOR_STATE(state, path, bounce);
    const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT);

-    if (!light_sample_from_position(kg,
-                                    rng_state,
-                                    rand_light.x,
-                                    rand_light.y,
-                                    sd->time,
-                                    sd->P,
-                                    sd->N,
-                                    sd->flag,
-                                    bounce,
-                                    path_flag,
-                                    &ls)) {
+    if (!light_distribution_sample_from_position(
+            kg, rand_light.x, rand_light.y, sd->time, sd->P, bounce, path_flag, &ls)) {
      return;
    }
  }
@@ -326,6 +322,10 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,

  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, throughput) = throughput;

+  if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) {
+    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unshadowed_throughput) = throughput;
+  }
+
  /* Write Lightgroup, +1 as lightgroup is int but we need to encode into a uint8_t. */
  INTEGRATOR_STATE_WRITE(
      shadow_state, shadow_path, lightgroup) = (ls.type != LIGHT_BACKGROUND) ?
@@ -441,12 +441,11 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
  /* Update path state */
  if (!(label & LABEL_TRANSPARENT)) {
    INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = bsdf_pdf;
-    INTEGRATOR_STATE_WRITE(state, path, mis_origin_n) = sd->N;
    INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
        unguided_bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
  }

-  path_state_next(kg, state, label, sd->flag);
+  path_state_next(kg, state, label);

  guiding_record_surface_bounce(kg,
                                state,
--- a/intern/cycles/kernel/integrator/shade_volume.h
+++ b/intern/cycles/kernel/integrator/shade_volume.h
@@ -34,9 +34,6 @@ typedef struct VolumeIntegrateResult {
  Spectrum direct_throughput;
  float direct_t;
  ShaderVolumePhases direct_phases;
-#  ifdef __PATH_GUIDING__
-  VolumeSampleMethod direct_sample_method;
-#  endif

  /* Throughput and offset for indirect light scattering. */
  bool indirect_scatter;
@@ -583,9 +580,6 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
    result.direct_t = volume_equiangular_sample(
        ray, equiangular_light_P, vstate.rscatter, &vstate.equiangular_pdf);
  }
-#  ifdef __PATH_GUIDING__
-  result.direct_sample_method = vstate.direct_sample_method;
-#  endif

 #  ifdef __DENOISING_FEATURES__
  const bool write_denoising_features = (INTEGRATOR_STATE(state, path, flag) &
@@ -685,14 +679,14 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
 #  endif /* __DENOISING_FEATURES__ */
 }

-/* Path tracing: sample point on light for equiangular sampling. */
-ccl_device_forceinline bool integrate_volume_equiangular_sample_light(
+/* Path tracing: sample point on light and evaluate light shader, then
+ * queue shadow ray to be traced. */
+ccl_device_forceinline bool integrate_volume_sample_light(
    KernelGlobals kg,
    IntegratorState state,
-    ccl_private const Ray *ccl_restrict ray,
    ccl_private const ShaderData *ccl_restrict sd,
    ccl_private const RNGState *ccl_restrict rng_state,
-    ccl_private float3 *ccl_restrict P)
+    ccl_private LightSample *ccl_restrict ls)
 {
  /* Test if there is a light or BSDF that needs direct light. */
  if (!kernel_data.integrator.use_direct_light) {
@@ -704,30 +698,15 @@ ccl_device_forceinline bool integrate_volume_equiangular_sample_light(
  const uint bounce = INTEGRATOR_STATE(state, path, bounce);
  const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT);

-  LightSample ls ccl_optional_struct_init;
-  if (!light_sample_from_volume_segment(kg,
-                                        rand_light.x,
-                                        rand_light.y,
-                                        sd->time,
-                                        sd->P,
-                                        ray->D,
-                                        ray->tmax - ray->tmin,
-                                        bounce,
-                                        path_flag,
-                                        &ls)) {
+  if (!light_distribution_sample_from_volume_segment(
+          kg, rand_light.x, rand_light.y, sd->time, sd->P, bounce, path_flag, ls)) {
    return false;
  }

-  if (ls.shader & SHADER_EXCLUDE_SCATTER) {
+  if (ls->shader & SHADER_EXCLUDE_SCATTER) {
    return false;
  }

-  if (ls.t == FLT_MAX) {
-    return false;
-  }
-
-  *P = ls.P;
-
  return true;
 }

@@ -740,10 +719,8 @@ ccl_device_forceinline void integrate_volume_direct_light(
    ccl_private const RNGState *ccl_restrict rng_state,
    const float3 P,
    ccl_private const ShaderVolumePhases *ccl_restrict phases,
-#  ifdef __PATH_GUIDING__
-    ccl_private const Spectrum unlit_throughput,
-#  endif
-    ccl_private const Spectrum throughput)
+    ccl_private const Spectrum throughput,
+    ccl_private LightSample *ccl_restrict ls)
 {
  PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT);

@@ -751,38 +728,23 @@ ccl_device_forceinline void integrate_volume_direct_light(
    return;
  }

-  /* Sample position on the same light again, now from the shading point where we scattered.
+  /* Sample position on the same light again, now from the shading
+   * point where we scattered.
   *
-   * Note that this means we sample the light tree twice when equiangular sampling is used.
-   * We could consider sampling the light tree just once and use the same light position again.
-   *
-   * This would make the PDFs for MIS weights more complicated due to having to account for
-   * both distance/equiangular and direct/indirect light sampling, but could be more accurate.
-   * Additionally we could end up behind the light or outside a spot light cone, which might
-   * waste a sample. Though on the other hand it would be possible to prevent that with
-   * equiangular sampling restricted to a smaller sub-segment where the light has influence. */
-  LightSample ls ccl_optional_struct_init;
+   * TODO: decorrelate random numbers and use light_sample_new_position to
+   * avoid resampling the CDF. */
  {
    const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
    const uint bounce = INTEGRATOR_STATE(state, path, bounce);
    const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT);

-    if (!light_sample_from_position(kg,
-                                    rng_state,
-                                    rand_light.x,
-                                    rand_light.y,
-                                    sd->time,
-                                    P,
-                                    zero_float3(),
-                                    SD_BSDF_HAS_TRANSMISSION,
-                                    bounce,
-                                    path_flag,
-                                    &ls)) {
+    if (!light_distribution_sample_from_position(
+            kg, rand_light.x, rand_light.y, sd->time, P, bounce, path_flag, ls)) {
      return;
    }
  }

-  if (ls.shader & SHADER_EXCLUDE_SCATTER) {
+  if (ls->shader & SHADER_EXCLUDE_SCATTER) {
    return;
  }

@@ -794,32 +756,32 @@ ccl_device_forceinline void integrate_volume_direct_light(
   * non-constant light sources. */
  ShaderDataTinyStorage emission_sd_storage;
  ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-  const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time);
+  const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time);
  if (is_zero(light_eval)) {
    return;
  }

  /* Evaluate BSDF. */
  BsdfEval phase_eval ccl_optional_struct_init;
-  float phase_pdf = volume_shader_phase_eval(kg, state, sd, phases, ls.D, &phase_eval);
+  float phase_pdf = volume_shader_phase_eval(kg, state, sd, phases, ls->D, &phase_eval);

-  if (ls.shader & SHADER_USE_MIS) {
-    float mis_weight = light_sample_mis_weight_nee(kg, ls.pdf, phase_pdf);
+  if (ls->shader & SHADER_USE_MIS) {
+    float mis_weight = light_sample_mis_weight_nee(kg, ls->pdf, phase_pdf);
    bsdf_eval_mul(&phase_eval, mis_weight);
  }

-  bsdf_eval_mul(&phase_eval, light_eval / ls.pdf);
+  bsdf_eval_mul(&phase_eval, light_eval / ls->pdf);

  /* Path termination. */
  const float terminate = path_state_rng_light_termination(kg, rng_state);
-  if (light_sample_terminate(kg, &ls, &phase_eval, terminate)) {
+  if (light_sample_terminate(kg, ls, &phase_eval, terminate)) {
    return;
  }

  /* Create shadow ray. */
  Ray ray ccl_optional_struct_init;
-  light_sample_to_volume_shadow_ray(kg, sd, &ls, P, &ray);
-  const bool is_light = light_sample_is_light(&ls);
+  light_sample_to_volume_shadow_ray(kg, sd, ls, P, &ray);
+  const bool is_light = light_sample_is_light(ls);

  /* Branch off shadow kernel. */
  IntegratorShadowState shadow_state = integrator_shadow_path_init(
@@ -878,14 +840,18 @@ ccl_device_forceinline void integrate_volume_direct_light(
      state, path, transmission_bounce);
  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, throughput) = throughput_phase;

+  if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) {
+    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unshadowed_throughput) = throughput;
+  }
+
  /* Write Lightgroup, +1 as lightgroup is int but we need to encode into a uint8_t. */
  INTEGRATOR_STATE_WRITE(
-      shadow_state, shadow_path, lightgroup) = (ls.type != LIGHT_BACKGROUND) ?
-                                                   ls.group + 1 :
+      shadow_state, shadow_path, lightgroup) = (ls->type != LIGHT_BACKGROUND) ?
+                                                   ls->group + 1 :
                                                   kernel_data.background.lightgroup + 1;

 #  ifdef __PATH_GUIDING__
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unlit_throughput) = unlit_throughput;
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unlit_throughput) = throughput;
  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = INTEGRATOR_STATE(
      state, guiding, path_segment);
 #  endif
@@ -983,11 +949,10 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(

  /* Update path state */
  INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf;
-  INTEGRATOR_STATE_WRITE(state, path, mis_origin_n) = zero_float3();
  INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
      unguided_phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));

-  path_state_next(kg, state, label, sd->flag);
+  path_state_next(kg, state, label);
  return true;
 }

@@ -1009,11 +974,12 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,

  /* Sample light ahead of volume stepping, for equiangular sampling. */
  /* TODO: distant lights are ignored now, but could instead use even distribution. */
+  LightSample ls ccl_optional_struct_init;
  const bool need_light_sample = !(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_TERMINATE);
-  float3 equiangular_P = zero_float3();
  const bool have_equiangular_sample = need_light_sample &&
-                                       integrate_volume_equiangular_sample_light(
-                                           kg, state, ray, &sd, &rng_state, &equiangular_P);
+                                       integrate_volume_sample_light(
+                                           kg, state, &sd, &rng_state, &ls) &&
+                                       (ls.t != FLT_MAX);

  VolumeSampleMethod direct_sample_method = (have_equiangular_sample) ?
                                                volume_stack_sample_method(kg, state) :
@@ -1024,13 +990,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
  const float step_size = volume_stack_step_size(kg, volume_read_lambda_pass);

 #  if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
-  /* The current path throughput which is used later to calculate per-segment throughput.*/
  const float3 initial_throughput = INTEGRATOR_STATE(state, path, throughput);
-  /* The path throughput used to calculate the throughput for direct light. */
-  float3 unlit_throughput = initial_throughput;
-  /* If a new path segment is generated at the direct scatter position.*/
-  bool guiding_generated_new_segment = false;
-  float rand_phase_guiding = 0.5f;
 #  endif

  /* TODO: expensive to zero closures? */
@@ -1043,7 +1003,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
                                 render_buffer,
                                 step_size,
                                 direct_sample_method,
-                                 equiangular_P,
+                                 ls.P,
                                 result);

  /* Perform path termination. The intersect_closest will have already marked this path
@@ -1058,48 +1018,41 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
    return VOLUME_PATH_MISSED;
  }

+#  if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
+  bool guiding_generated_new_segment = false;
+  if (kernel_data.integrator.use_guiding) {
+    /* Record transmittance using change in throughput. */
+    float3 transmittance_weight = spectrum_to_rgb(
+        safe_divide_color(result.indirect_throughput, initial_throughput));
+    guiding_record_volume_transmission(kg, state, transmittance_weight);
+
+    if (result.indirect_scatter) {
+      const float3 P = ray->P + result.indirect_t * ray->D;
+
+      /* Record volume segment up to direct scatter position.
+       * TODO: volume segment is wrong when direct_t and indirect_t. */
+      if (result.direct_scatter && (result.direct_t == result.indirect_t)) {
+        guiding_record_volume_segment(kg, state, P, sd.I);
+        guiding_generated_new_segment = true;
+      }
+
+#    if PATH_GUIDING_LEVEL >= 4
+      /* TODO: this position will be wrong for direct light pdf computation,
+       * since the direct light position may be different? */
+      volume_shader_prepare_guiding(
+          kg, state, &sd, &rng_state, P, ray->D, &result.direct_phases, direct_sample_method);
+#    endif
+    }
+    else {
+      /* No guiding if we don't scatter. */
+      state->guiding.use_volume_guiding = false;
+    }
+  }
+#  endif
+
  /* Direct light. */
  if (result.direct_scatter) {
    const float3 direct_P = ray->P + result.direct_t * ray->D;
-
-#  ifdef __PATH_GUIDING__
-    if (kernel_data.integrator.use_guiding) {
-#    if PATH_GUIDING_LEVEL >= 1
-      if (result.direct_sample_method == VOLUME_SAMPLE_DISTANCE) {
-        /* If the direct scatter event is generated using VOLUME_SAMPLE_DISTANCE the direct event
-         * will happen at the same position as the indirect event and the direct light contribution
-         * will contribute to the position of the next path segment.*/
-        float3 transmittance_weight = spectrum_to_rgb(
-            safe_divide_color(result.indirect_throughput, initial_throughput));
-        guiding_record_volume_transmission(kg, state, transmittance_weight);
-        guiding_record_volume_segment(kg, state, direct_P, sd.I);
-        guiding_generated_new_segment = true;
-        unlit_throughput = result.indirect_throughput / continuation_probability;
-        rand_phase_guiding = path_state_rng_1D(kg, &rng_state, PRNG_VOLUME_PHASE_GUIDING_DISTANCE);
-      }
-      else {
-        /* If the direct scatter event is generated using VOLUME_SAMPLE_EQUIANGULAR the direct
-         * event will happen at a separate position as the indirect event and the direct light
-         * contribution will contribute to the position of the current/previous path segment. The
-         * unlit_throughput has to be adjusted to include the scattering at the previous segment.*/
-        float3 scatterEval = one_float3();
-        if (state->guiding.path_segment) {
-          pgl_vec3f scatteringWeight = state->guiding.path_segment->scatteringWeight;
-          scatterEval = make_float3(scatteringWeight.x, scatteringWeight.y, scatteringWeight.z);
-        }
-        unlit_throughput /= scatterEval;
-        unlit_throughput *= continuation_probability;
-        rand_phase_guiding = path_state_rng_1D(
-            kg, &rng_state, PRNG_VOLUME_PHASE_GUIDING_EQUIANGULAR);
-      }
-#    endif
-#    if PATH_GUIDING_LEVEL >= 4
-      volume_shader_prepare_guiding(
-          kg, state, &sd, rand_phase_guiding, direct_P, ray->D, &result.direct_phases);
-#    endif
-    }
-#  endif
-
    result.direct_throughput /= continuation_probability;
    integrate_volume_direct_light(kg,
                                  state,
@@ -1107,10 +1060,8 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
                                  &rng_state,
                                  direct_P,
                                  &result.direct_phases,
-#  ifdef __PATH_GUIDING__
-                                  unlit_throughput,
-#  endif
-                                  result.direct_throughput);
+                                  result.direct_throughput,
+                                  &ls);
  }

  /* Indirect light.
@@ -1118,13 +1069,6 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
   * Only divide throughput by continuation_probability if we scatter. For the attenuation
   * case the next surface will already do this division. */
  if (result.indirect_scatter) {
-#  if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
-    if (!guiding_generated_new_segment) {
-      float3 transmittance_weight = spectrum_to_rgb(
-          safe_divide_color(result.indirect_throughput, initial_throughput));
-      guiding_record_volume_transmission(kg, state, transmittance_weight);
-    }
-#  endif
    result.indirect_throughput /= continuation_probability;
  }
  INTEGRATOR_STATE_WRITE(state, path, throughput) = result.indirect_throughput;
@@ -1132,21 +1076,10 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
  if (result.indirect_scatter) {
    sd.P = ray->P + result.indirect_t * ray->D;

-#  if defined(__PATH_GUIDING__)
-#    if PATH_GUIDING_LEVEL >= 1
+#  if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
    if (!guiding_generated_new_segment) {
      guiding_record_volume_segment(kg, state, sd.P, sd.I);
    }
-#    endif
-#    if PATH_GUIDING_LEVEL >= 4
-    /* If the direct scatter event was generated using VOLUME_SAMPLE_EQUIANGULAR we need to
-     * initialize the guiding distribution at the indirect scatter position. */
-    if (result.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR) {
-      rand_phase_guiding = path_state_rng_1D(kg, &rng_state, PRNG_VOLUME_PHASE_GUIDING_DISTANCE);
-      volume_shader_prepare_guiding(
-          kg, state, &sd, rand_phase_guiding, sd.P, ray->D, &result.indirect_phases);
-    }
-#    endif
 #  endif

    if (integrate_volume_phase_scatter(kg, state, &sd, &rng_state, &result.indirect_phases)) {
@@ -1157,10 +1090,6 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
    }
  }
  else {
-#  if defined(__PATH_GUIDING__)
-    /* No guiding if we don't scatter. */
-    state->guiding.use_volume_guiding = false;
-#  endif
    return VOLUME_PATH_ATTENUATED;
  }
 }
--- a/intern/cycles/kernel/integrator/shadow_state_template.h
+++ b/intern/cycles/kernel/integrator/shadow_state_template.h
@@ -32,7 +32,7 @@ KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, throughput, KERNEL_FEATURE_PAT
 KERNEL_STRUCT_MEMBER(shadow_path,
                     PackedSpectrum,
                     unshadowed_throughput,
-                     KERNEL_FEATURE_AO_ADDITIVE)
+                     KERNEL_FEATURE_SHADOW_PASS | KERNEL_FEATURE_AO_ADDITIVE)
 /* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */
 KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES)
 KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES)
--- a/intern/cycles/kernel/integrator/state_template.h
+++ b/intern/cycles/kernel/integrator/state_template.h
@@ -41,7 +41,6 @@ KERNEL_STRUCT_MEMBER(path, uint8_t, mnee, KERNEL_FEATURE_PATH_TRACING)
 * zero and distance. Note that transparency and volume attenuation increase
 * the ray tmin but keep P unmodified so that this works. */
 KERNEL_STRUCT_MEMBER(path, float, mis_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(path, packed_float3, mis_origin_n, KERNEL_FEATURE_PATH_TRACING)
 /* Filter glossy. */
 KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
 /* Continuation probability for path termination. */
--- a/intern/cycles/kernel/integrator/volume_shader.h
+++ b/intern/cycles/kernel/integrator/volume_shader.h
@@ -95,10 +95,11 @@ ccl_device_inline void volume_shader_copy_phases(ccl_private ShaderVolumePhases
 ccl_device_inline void volume_shader_prepare_guiding(KernelGlobals kg,
                                                     IntegratorState state,
                                                     ccl_private ShaderData *sd,
-                                                     float rand_phase_guiding,
+                                                     ccl_private const RNGState *rng_state,
                                                     const float3 P,
                                                     const float3 D,
-                                                     ccl_private ShaderVolumePhases *phases)
+                                                     ccl_private ShaderVolumePhases *phases,
+                                                     const VolumeSampleMethod direct_sample_method)
 {
  /* Have any phase functions to guide? */
  const int num_phases = phases->num_closure;
@@ -108,6 +109,7 @@ ccl_device_inline void volume_shader_prepare_guiding(KernelGlobals kg,
  }

  const float volume_guiding_probability = kernel_data.integrator.volume_guiding_probability;
+  float rand_phase_guiding = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_PHASE_GUIDING);

  /* If we have more than one phase function we select one random based on its
   * sample weight to calculate the product distribution for guiding. */
--- a/intern/cycles/kernel/light/area.h
+++ b/intern/cycles/kernel/light/area.h
@@ -1,387 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2022 Blender Foundation */
-
-#pragma once
-
-#include "kernel/light/common.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Importance sampling.
- *
- * An Area-Preserving Parametrization for Spherical Rectangles.
- * Carlos Urena et al.
- *
- * NOTE: light_p is modified when sample_coord is true. */
-ccl_device_inline float area_light_rect_sample(float3 P,
-                                               ccl_private float3 *light_p,
-                                               const float3 axis_u,
-                                               const float len_u,
-                                               const float3 axis_v,
-                                               const float len_v,
-                                               float randu,
-                                               float randv,
-                                               bool sample_coord)
-{
-  /* In our name system we're using P for the center, which is o in the paper. */
-  float3 corner = *light_p - axis_u * len_u * 0.5f - axis_v * len_v * 0.5f;
-  /* Compute local reference system R. */
-  float3 x = axis_u;
-  float3 y = axis_v;
-  float3 z = cross(x, y);
-  /* Compute rectangle coords in local reference system. */
-  float3 dir = corner - P;
-  float z0 = dot(dir, z);
-  /* Flip 'z' to make it point against Q. */
-  if (z0 > 0.0f) {
-    z *= -1.0f;
-    z0 *= -1.0f;
-  }
-  float x0 = dot(dir, x);
-  float y0 = dot(dir, y);
-  float x1 = x0 + len_u;
-  float y1 = y0 + len_v;
-  /* Compute internal angles (gamma_i). */
-  float4 diff = make_float4(x0, y1, x1, y0) - make_float4(x1, y0, x0, y1);
-  float4 nz = make_float4(y0, x1, y1, x0) * diff;
-  nz = nz / sqrt(z0 * z0 * diff * diff + nz * nz);
-  float g0 = safe_acosf(-nz.x * nz.y);
-  float g1 = safe_acosf(-nz.y * nz.z);
-  float g2 = safe_acosf(-nz.z * nz.w);
-  float g3 = safe_acosf(-nz.w * nz.x);
-  /* Compute predefined constants. */
-  float b0 = nz.x;
-  float b1 = nz.z;
-  float b0sq = b0 * b0;
-  float k = M_2PI_F - g2 - g3;
-  /* Compute solid angle from internal angles. */
-  float S = g0 + g1 - k;
-
-  if (sample_coord) {
-    /* Compute cu. */
-    float au = randu * S + k;
-    float fu = (cosf(au) * b0 - b1) / sinf(au);
-    float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
-    cu = clamp(cu, -1.0f, 1.0f);
-    /* Compute xu. */
-    float xu = -(cu * z0) / max(sqrtf(1.0f - cu * cu), 1e-7f);
-    xu = clamp(xu, x0, x1);
-    /* Compute yv. */
-    float z0sq = z0 * z0;
-    float y0sq = y0 * y0;
-    float y1sq = y1 * y1;
-    float d = sqrtf(xu * xu + z0sq);
-    float h0 = y0 / sqrtf(d * d + y0sq);
-    float h1 = y1 / sqrtf(d * d + y1sq);
-    float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
-    float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
-
-    /* Transform (xu, yv, z0) to world coords. */
-    *light_p = P + xu * x + yv * y + z0 * z;
-  }
-
-  /* return pdf */
-  if (S != 0.0f)
-    return 1.0f / S;
-  else
-    return 0.0f;
-}
-
-/* Light spread. */
-
-ccl_device float area_light_spread_attenuation(const float3 D,
-                                               const float3 lightNg,
-                                               const float cot_half_spread,
-                                               const float normalize_spread)
-{
-  /* Model a soft-box grid, computing the ratio of light not hidden by the
-   * slats of the grid at a given angle. (see D10594). */
-  const float cos_a = -dot(D, lightNg);
-  const float sin_a = safe_sqrtf(1.0f - sqr(cos_a));
-  const float tan_a = sin_a / cos_a;
-  return max((1.0f - (cot_half_spread * tan_a)) * normalize_spread, 0.0f);
-}
-
-/* Compute subset of area light that actually has an influence on the shading point, to
- * reduce noise with low spread. */
-ccl_device bool area_light_spread_clamp_area_light(const float3 P,
-                                                   const float3 lightNg,
-                                                   ccl_private float3 *lightP,
-                                                   const float3 axis_u,
-                                                   ccl_private float *len_u,
-                                                   const float3 axis_v,
-                                                   ccl_private float *len_v,
-                                                   const float cot_half_spread)
-{
-  /* Closest point in area light plane and distance to that plane. */
-  const float3 closest_P = P - dot(lightNg, P - *lightP) * lightNg;
-  const float t = len(closest_P - P);
-
-  /* Radius of circle on area light that actually affects the shading point. */
-  const float radius = t / cot_half_spread;
-
-  /* Local uv coordinates of closest point. */
-  const float closest_u = dot(axis_u, closest_P - *lightP);
-  const float closest_v = dot(axis_v, closest_P - *lightP);
-
-  /* Compute rectangle encompassing the circle that affects the shading point,
-   * clamped to the bounds of the area light. */
-  const float min_u = max(closest_u - radius, -*len_u * 0.5f);
-  const float max_u = min(closest_u + radius, *len_u * 0.5f);
-  const float min_v = max(closest_v - radius, -*len_v * 0.5f);
-  const float max_v = min(closest_v + radius, *len_v * 0.5f);
-
-  /* Skip if rectangle is empty. */
-  if (min_u >= max_u || min_v >= max_v) {
-    return false;
-  }
-
-  /* Compute new area light center position and axes from rectangle in local
-   * uv coordinates. */
-  const float new_center_u = 0.5f * (min_u + max_u);
-  const float new_center_v = 0.5f * (min_v + max_v);
-  *len_u = max_u - min_u;
-  *len_v = max_v - min_v;
-
-  *lightP = *lightP + new_center_u * axis_u + new_center_v * axis_v;
-
-  return true;
-}
-
-/* Common API. */
-
-template<bool in_volume_segment>
-ccl_device_inline bool area_light_sample(const ccl_global KernelLight *klight,
-                                         const float randu,
-                                         const float randv,
-                                         const float3 P,
-                                         ccl_private LightSample *ls)
-{
-  ls->P = klight->co;
-
-  const float3 axis_u = klight->area.axis_u;
-  const float3 axis_v = klight->area.axis_v;
-  const float len_u = klight->area.len_u;
-  const float len_v = klight->area.len_v;
-  float3 Ng = klight->area.dir;
-  float invarea = fabsf(klight->area.invarea);
-  bool is_round = (klight->area.invarea < 0.0f);
-
-  if (!in_volume_segment) {
-    if (dot(ls->P - P, Ng) > 0.0f) {
-      return false;
-    }
-  }
-
-  float3 inplane;
-
-  if (is_round || in_volume_segment) {
-    inplane = ellipse_sample(axis_u * len_u * 0.5f, axis_v * len_v * 0.5f, randu, randv);
-    ls->P += inplane;
-    ls->pdf = invarea;
-  }
-  else {
-    inplane = ls->P;
-
-    float sample_len_u = len_u;
-    float sample_len_v = len_v;
-
-    if (!in_volume_segment && klight->area.cot_half_spread > 0.0f) {
-      if (!area_light_spread_clamp_area_light(P,
-                                              Ng,
-                                              &ls->P,
-                                              axis_u,
-                                              &sample_len_u,
-                                              axis_v,
-                                              &sample_len_v,
-                                              klight->area.cot_half_spread)) {
-        return false;
-      }
-    }
-
-    ls->pdf = area_light_rect_sample(
-        P, &ls->P, axis_u, sample_len_u, axis_v, sample_len_v, randu, randv, true);
-    inplane = ls->P - inplane;
-  }
-
-  const float light_u = dot(inplane, axis_u) / len_u;
-  const float light_v = dot(inplane, axis_v) / len_v;
-
-  /* NOTE: Return barycentric coordinates in the same notation as Embree and OptiX. */
-  ls->u = light_v + 0.5f;
-  ls->v = -light_u - light_v;
-
-  ls->Ng = Ng;
-  ls->D = normalize_len(ls->P - P, &ls->t);
-
-  ls->eval_fac = 0.25f * invarea;
-
-  if (klight->area.cot_half_spread > 0.0f) {
-    /* Area Light spread angle attenuation */
-    ls->eval_fac *= area_light_spread_attenuation(
-        ls->D, ls->Ng, klight->area.cot_half_spread, klight->area.normalize_spread);
-  }
-
-  if (is_round) {
-    ls->pdf *= lamp_light_pdf(Ng, -ls->D, ls->t);
-  }
-
-  return true;
-}
-
-ccl_device_forceinline void area_light_update_position(const ccl_global KernelLight *klight,
-                                                       ccl_private LightSample *ls,
-                                                       const float3 P)
-{
-  const float invarea = fabsf(klight->area.invarea);
-  ls->D = normalize_len(ls->P - P, &ls->t);
-  ls->pdf = invarea;
-
-  if (klight->area.cot_half_spread > 0.f) {
-    ls->eval_fac = 0.25f * invarea;
-    ls->eval_fac *= area_light_spread_attenuation(
-        ls->D, ls->Ng, klight->area.cot_half_spread, klight->area.normalize_spread);
-  }
-}
-
-ccl_device_inline bool area_light_intersect(const ccl_global KernelLight *klight,
-                                            const ccl_private Ray *ccl_restrict ray,
-                                            ccl_private float *t,
-                                            ccl_private float *u,
-                                            ccl_private float *v)
-{
-  /* Area light. */
-  const float invarea = fabsf(klight->area.invarea);
-  const bool is_round = (klight->area.invarea < 0.0f);
-  if (invarea == 0.0f) {
-    return false;
-  }
-
-  const float3 inv_extent_u = klight->area.axis_u / klight->area.len_u;
-  const float3 inv_extent_v = klight->area.axis_v / klight->area.len_v;
-  const float3 Ng = klight->area.dir;
-
-  /* One sided. */
-  if (dot(ray->D, Ng) >= 0.0f) {
-    return false;
-  }
-
-  const float3 light_P = klight->co;
-
-  float3 P;
-  return ray_quad_intersect(ray->P,
-                            ray->D,
-                            ray->tmin,
-                            ray->tmax,
-                            light_P,
-                            inv_extent_u,
-                            inv_extent_v,
-                            Ng,
-                            &P,
-                            t,
-                            u,
-                            v,
-                            is_round);
-}
-
-ccl_device_inline bool area_light_sample_from_intersection(
-    const ccl_global KernelLight *klight,
-    ccl_private const Intersection *ccl_restrict isect,
-    const float3 ray_P,
-    const float3 ray_D,
-    ccl_private LightSample *ccl_restrict ls)
-{
-
-  /* area light */
-  float invarea = fabsf(klight->area.invarea);
-
-  float3 Ng = klight->area.dir;
-  float3 light_P = klight->co;
-
-  ls->u = isect->u;
-  ls->v = isect->v;
-  ls->D = ray_D;
-  ls->Ng = Ng;
-
-  const bool is_round = (klight->area.invarea < 0.0f);
-  if (is_round) {
-    ls->pdf = invarea * lamp_light_pdf(Ng, -ray_D, ls->t);
-  }
-  else {
-    const float3 axis_u = klight->area.axis_u;
-    const float3 axis_v = klight->area.axis_v;
-    float sample_len_u = klight->area.len_u;
-    float sample_len_v = klight->area.len_v;
-
-    if (klight->area.cot_half_spread > 0.0f) {
-      if (!area_light_spread_clamp_area_light(ray_P,
-                                              Ng,
-                                              &light_P,
-                                              axis_u,
-                                              &sample_len_u,
-                                              axis_v,
-                                              &sample_len_v,
-                                              klight->area.cot_half_spread)) {
-        return false;
-      }
-    }
-
-    ls->pdf = area_light_rect_sample(
-        ray_P, &light_P, axis_u, sample_len_u, axis_v, sample_len_v, 0, 0, false);
-  }
-  ls->eval_fac = 0.25f * invarea;
-
-  if (klight->area.cot_half_spread > 0.0f) {
-    /* Area Light spread angle attenuation */
-    ls->eval_fac *= area_light_spread_attenuation(
-        ls->D, ls->Ng, klight->area.cot_half_spread, klight->area.normalize_spread);
-    if (ls->eval_fac == 0.0f) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-template<bool in_volume_segment>
-ccl_device_forceinline bool area_light_tree_parameters(const ccl_global KernelLight *klight,
-                                                       const float3 centroid,
-                                                       const float3 P,
-                                                       const float3 N,
-                                                       const float3 bcone_axis,
-                                                       ccl_private float &cos_theta_u,
-                                                       ccl_private float2 &distance,
-                                                       ccl_private float3 &point_to_centroid)
-{
-  if (!in_volume_segment) {
-    /* TODO: a cheap substitute for minimal distance between point and primitive. Does it
-     * worth the overhead to compute the accurate minimal distance? */
-    float min_distance;
-    point_to_centroid = safe_normalize_len(centroid - P, &min_distance);
-    distance = make_float2(min_distance, min_distance);
-  }
-
-  cos_theta_u = FLT_MAX;
-
-  const float3 extentu = klight->area.axis_u * klight->area.len_u;
-  const float3 extentv = klight->area.axis_v * klight->area.len_v;
-  for (int i = 0; i < 4; i++) {
-    const float3 corner = ((i & 1) - 0.5f) * extentu + 0.5f * ((i & 2) - 1) * extentv + centroid;
-    float distance_point_to_corner;
-    const float3 point_to_corner = safe_normalize_len(corner - P, &distance_point_to_corner);
-    cos_theta_u = fminf(cos_theta_u, dot(point_to_centroid, point_to_corner));
-    if (!in_volume_segment) {
-      distance.x = fmaxf(distance.x, distance_point_to_corner);
-    }
-  }
-
-  const bool front_facing = dot(bcone_axis, point_to_centroid) < 0;
-  const bool shape_above_surface = dot(N, centroid - P) + fabsf(dot(N, extentu)) +
-                                       fabsf(dot(N, extentv)) >
-                                   0;
-  const bool in_volume = is_zero(N);
-
-  return (front_facing && shape_above_surface) || in_volume;
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/kernel/light/background.h
+++ b/intern/cycles/kernel/light/background.h
@@ -3,7 +3,6 @@

 #pragma once

-#include "kernel/light/area.h"
 #include "kernel/light/common.h"

 CCL_NAMESPACE_BEGIN
@@ -131,11 +130,11 @@ ccl_device float background_map_pdf(KernelGlobals kg, float3 direction)
 ccl_device_inline bool background_portal_data_fetch_and_check_side(
    KernelGlobals kg, float3 P, int index, ccl_private float3 *lightpos, ccl_private float3 *dir)
 {
-  int portal = kernel_data.integrator.portal_offset + index;
+  int portal = kernel_data.background.portal_offset + index;
  const ccl_global KernelLight *klight = &kernel_data_fetch(lights, portal);

-  *lightpos = klight->co;
-  *dir = klight->area.dir;
+  *lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+  *dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);

  /* Check whether portal is on the right side. */
  if (dot(*dir, P - *lightpos) > 1e-4f)
@@ -150,7 +149,7 @@ ccl_device_inline float background_portal_pdf(
  float portal_pdf = 0.0f;

  int num_possible = 0;
-  for (int p = 0; p < kernel_data.integrator.num_portals; p++) {
+  for (int p = 0; p < kernel_data.background.num_portals; p++) {
    if (p == ignore_portal)
      continue;

@@ -164,16 +163,12 @@ ccl_device_inline float background_portal_pdf(
    }
    num_possible++;

-    int portal = kernel_data.integrator.portal_offset + p;
+    int portal = kernel_data.background.portal_offset + p;
    const ccl_global KernelLight *klight = &kernel_data_fetch(lights, portal);
-
-    const float3 axis_u = klight->area.axis_u;
-    const float len_u = klight->area.len_u;
-    const float3 axis_v = klight->area.axis_v;
-    const float len_v = klight->area.len_v;
-    const float3 inv_extent_u = axis_u / len_u;
-    const float3 inv_extent_v = axis_v / len_v;
-
+    float3 axisu = make_float3(
+        klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+    float3 axisv = make_float3(
+        klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
    bool is_round = (klight->area.invarea < 0.0f);

    if (!ray_quad_intersect(P,
@@ -181,8 +176,8 @@ ccl_device_inline float background_portal_pdf(
                            1e-4f,
                            FLT_MAX,
                            lightpos,
-                            inv_extent_u,
-                            inv_extent_v,
+                            axisu,
+                            axisv,
                            dir,
                            NULL,
                            NULL,
@@ -194,11 +189,10 @@ ccl_device_inline float background_portal_pdf(
    if (is_round) {
      float t;
      float3 D = normalize_len(lightpos - P, &t);
-      portal_pdf += fabsf(klight->area.invarea) * lamp_light_pdf(dir, -D, t);
+      portal_pdf += fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
    }
    else {
-      portal_pdf += area_light_rect_sample(
-          P, &lightpos, axis_u, len_u, axis_v, len_v, 0.0f, 0.0f, false);
+      portal_pdf += rect_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false);
    }
  }

@@ -213,7 +207,7 @@ ccl_device_inline float background_portal_pdf(
 ccl_device int background_num_possible_portals(KernelGlobals kg, float3 P)
 {
  int num_possible_portals = 0;
-  for (int p = 0; p < kernel_data.integrator.num_portals; p++) {
+  for (int p = 0; p < kernel_data.background.num_portals; p++) {
    float3 lightpos, dir;
    if (background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
      num_possible_portals++;
@@ -237,7 +231,7 @@ ccl_device float3 background_portal_sample(KernelGlobals kg,
  /* TODO(sergey): Some smarter way of finding portal to sample
   * is welcome.
   */
-  for (int p = 0; p < kernel_data.integrator.num_portals; p++) {
+  for (int p = 0; p < kernel_data.background.num_portals; p++) {
    /* Search for the sampled portal. */
    float3 lightpos, dir;
    if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
@@ -245,24 +239,23 @@ ccl_device float3 background_portal_sample(KernelGlobals kg,

    if (portal == 0) {
      /* p is the portal to be sampled. */
-      int portal = kernel_data.integrator.portal_offset + p;
+      int portal = kernel_data.background.portal_offset + p;
      const ccl_global KernelLight *klight = &kernel_data_fetch(lights, portal);
-      const float3 axis_u = klight->area.axis_u;
-      const float3 axis_v = klight->area.axis_v;
-      const float len_u = klight->area.len_u;
-      const float len_v = klight->area.len_v;
+      float3 axisu = make_float3(
+          klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+      float3 axisv = make_float3(
+          klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
      bool is_round = (klight->area.invarea < 0.0f);

      float3 D;
      if (is_round) {
-        lightpos += ellipse_sample(axis_u * len_u * 0.5f, axis_v * len_v * 0.5f, randu, randv);
+        lightpos += ellipse_sample(axisu * 0.5f, axisv * 0.5f, randu, randv);
        float t;
        D = normalize_len(lightpos - P, &t);
-        *pdf = fabsf(klight->area.invarea) * lamp_light_pdf(dir, -D, t);
+        *pdf = fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
      }
      else {
-        *pdf = area_light_rect_sample(
-            P, &lightpos, axis_u, len_u, axis_v, len_v, randu, randv, true);
+        *pdf = rect_light_sample(P, &lightpos, axisu, axisv, randu, randv, true);
        D = normalize(lightpos - P);
      }

@@ -421,7 +414,7 @@ ccl_device float background_light_pdf(KernelGlobals kg, float3 P, float3 directi
  float pdf_fac = (portal_method_pdf + sun_method_pdf + map_method_pdf);
  if (pdf_fac == 0.0f) {
    /* Use uniform as a fallback if we can't use any strategy. */
-    return 1.0f / M_4PI_F;
+    return kernel_data.integrator.pdf_lights / M_4PI_F;
  }

  pdf_fac = 1.0f / pdf_fac;
@@ -437,21 +430,7 @@ ccl_device float background_light_pdf(KernelGlobals kg, float3 P, float3 directi
    pdf += background_map_pdf(kg, direction) * map_method_pdf;
  }

-  return pdf;
-}
-
-ccl_device_forceinline bool background_light_tree_parameters(const float3 centroid,
-                                                             ccl_private float &cos_theta_u,
-                                                             ccl_private float2 &distance,
-                                                             ccl_private float3 &point_to_centroid)
-{
-  /* Cover the whole sphere */
-  cos_theta_u = -1.0f;
-
-  distance = make_float2(1.0f, 1.0f);
-  point_to_centroid = -centroid;
-
-  return true;
+  return pdf * kernel_data.integrator.pdf_lights;
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/light/common.h
+++ b/intern/cycles/kernel/light/common.h
@@ -7,26 +7,92 @@

 CCL_NAMESPACE_BEGIN

-/* Light Sample Result */
+/* Area light sampling */

-typedef struct LightSample {
-  float3 P;            /* position on light, or direction for distant light */
-  float3 Ng;           /* normal on light */
-  float3 D;            /* direction from shading point to light */
-  float t;             /* distance to light (FLT_MAX for distant light) */
-  float u, v;          /* parametric coordinate on primitive */
-  float pdf;           /* pdf for selecting light and point on light */
-  float pdf_selection; /* pdf for selecting light */
-  float eval_fac;      /* intensity multiplier */
-  int object;          /* object id for triangle/curve lights */
-  int prim;            /* primitive id for triangle/curve lights */
-  int shader;          /* shader id */
-  int lamp;            /* lamp id */
-  int group;           /* lightgroup */
-  LightType type;      /* type of light */
-} LightSample;
+/* Uses the following paper:
+ *
+ * Carlos Urena et al.
+ * An Area-Preserving Parametrization for Spherical Rectangles.
+ *
+ * https://www.solidangle.com/research/egsr2013_spherical_rectangle.pdf
+ *
+ * NOTE: light_p is modified when sample_coord is true.
+ */
+ccl_device_inline float rect_light_sample(float3 P,
+                                          ccl_private float3 *light_p,
+                                          float3 axisu,
+                                          float3 axisv,
+                                          float randu,
+                                          float randv,
+                                          bool sample_coord)
+{
+  /* In our name system we're using P for the center,
+   * which is o in the paper.
+   */

-/* Utilities */
+  float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f;
+  float axisu_len, axisv_len;
+  /* Compute local reference system R. */
+  float3 x = normalize_len(axisu, &axisu_len);
+  float3 y = normalize_len(axisv, &axisv_len);
+  float3 z = cross(x, y);
+  /* Compute rectangle coords in local reference system. */
+  float3 dir = corner - P;
+  float z0 = dot(dir, z);
+  /* Flip 'z' to make it point against Q. */
+  if (z0 > 0.0f) {
+    z *= -1.0f;
+    z0 *= -1.0f;
+  }
+  float x0 = dot(dir, x);
+  float y0 = dot(dir, y);
+  float x1 = x0 + axisu_len;
+  float y1 = y0 + axisv_len;
+  /* Compute internal angles (gamma_i). */
+  float4 diff = make_float4(x0, y1, x1, y0) - make_float4(x1, y0, x0, y1);
+  float4 nz = make_float4(y0, x1, y1, x0) * diff;
+  nz = nz / sqrt(z0 * z0 * diff * diff + nz * nz);
+  float g0 = safe_acosf(-nz.x * nz.y);
+  float g1 = safe_acosf(-nz.y * nz.z);
+  float g2 = safe_acosf(-nz.z * nz.w);
+  float g3 = safe_acosf(-nz.w * nz.x);
+  /* Compute predefined constants. */
+  float b0 = nz.x;
+  float b1 = nz.z;
+  float b0sq = b0 * b0;
+  float k = M_2PI_F - g2 - g3;
+  /* Compute solid angle from internal angles. */
+  float S = g0 + g1 - k;
+
+  if (sample_coord) {
+    /* Compute cu. */
+    float au = randu * S + k;
+    float fu = (cosf(au) * b0 - b1) / sinf(au);
+    float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
+    cu = clamp(cu, -1.0f, 1.0f);
+    /* Compute xu. */
+    float xu = -(cu * z0) / max(sqrtf(1.0f - cu * cu), 1e-7f);
+    xu = clamp(xu, x0, x1);
+    /* Compute yv. */
+    float z0sq = z0 * z0;
+    float y0sq = y0 * y0;
+    float y1sq = y1 * y1;
+    float d = sqrtf(xu * xu + z0sq);
+    float h0 = y0 / sqrtf(d * d + y0sq);
+    float h1 = y1 / sqrtf(d * d + y1sq);
+    float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
+    float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
+
+    /* Transform (xu, yv, z0) to world coords. */
+    *light_p = P + xu * x + yv * y + z0 * z;
+  }
+
+  /* return pdf */
+  if (S != 0.0f)
+    return 1.0f / S;
+  else
+    return 0.0f;
+}

 ccl_device_inline float3 ellipse_sample(float3 ru, float3 rv, float randu, float randv)
 {
@@ -43,7 +109,99 @@ ccl_device float3 disk_light_sample(float3 v, float randu, float randv)
  return ellipse_sample(ru, rv, randu, randv);
 }

-ccl_device float lamp_light_pdf(const float3 Ng, const float3 I, float t)
+ccl_device float3 distant_light_sample(float3 D, float radius, float randu, float randv)
+{
+  return normalize(D + disk_light_sample(D, randu, randv) * radius);
+}
+
+ccl_device float3
+sphere_light_sample(float3 P, float3 center, float radius, float randu, float randv)
+{
+  return disk_light_sample(normalize(P - center), randu, randv) * radius;
+}
+
+ccl_device float spot_light_attenuation(float3 dir, float spot_angle, float spot_smooth, float3 N)
+{
+  float attenuation = dot(dir, N);
+
+  if (attenuation <= spot_angle) {
+    attenuation = 0.0f;
+  }
+  else {
+    float t = attenuation - spot_angle;
+
+    if (t < spot_smooth && spot_smooth != 0.0f)
+      attenuation *= smoothstepf(t / spot_smooth);
+  }
+
+  return attenuation;
+}
+
+ccl_device float light_spread_attenuation(const float3 D,
+                                          const float3 lightNg,
+                                          const float tan_spread,
+                                          const float normalize_spread)
+{
+  /* Model a soft-box grid, computing the ratio of light not hidden by the
+   * slats of the grid at a given angle. (see D10594). */
+  const float cos_a = -dot(D, lightNg);
+  const float sin_a = safe_sqrtf(1.0f - sqr(cos_a));
+  const float tan_a = sin_a / cos_a;
+  return max((1.0f - (tan_spread * tan_a)) * normalize_spread, 0.0f);
+}
+
+/* Compute subset of area light that actually has an influence on the shading point, to
+ * reduce noise with low spread. */
+ccl_device bool light_spread_clamp_area_light(const float3 P,
+                                              const float3 lightNg,
+                                              ccl_private float3 *lightP,
+                                              ccl_private float3 *axisu,
+                                              ccl_private float3 *axisv,
+                                              const float tan_spread)
+{
+  /* Closest point in area light plane and distance to that plane. */
+  const float3 closest_P = P - dot(lightNg, P - *lightP) * lightNg;
+  const float t = len(closest_P - P);
+
+  /* Radius of circle on area light that actually affects the shading point. */
+  const float radius = t / tan_spread;
+
+  /* TODO: would be faster to store as normalized vector + length, also in rect_light_sample. */
+  float len_u, len_v;
+  const float3 u = normalize_len(*axisu, &len_u);
+  const float3 v = normalize_len(*axisv, &len_v);
+
+  /* Local uv coordinates of closest point. */
+  const float closest_u = dot(u, closest_P - *lightP);
+  const float closest_v = dot(v, closest_P - *lightP);
+
+  /* Compute rectangle encompassing the circle that affects the shading point,
+   * clamped to the bounds of the area light. */
+  const float min_u = max(closest_u - radius, -len_u * 0.5f);
+  const float max_u = min(closest_u + radius, len_u * 0.5f);
+  const float min_v = max(closest_v - radius, -len_v * 0.5f);
+  const float max_v = min(closest_v + radius, len_v * 0.5f);
+
+  /* Skip if rectangle is empty. */
+  if (min_u >= max_u || min_v >= max_v) {
+    return false;
+  }
+
+  /* Compute new area light center position and axes from rectangle in local
+   * uv coordinates. */
+  const float new_center_u = 0.5f * (min_u + max_u);
+  const float new_center_v = 0.5f * (min_v + max_v);
+  const float new_len_u = max_u - min_u;
+  const float new_len_v = max_v - min_v;
+
+  *lightP = *lightP + new_center_u * u + new_center_v * v;
+  *axisu = u * new_len_u;
+  *axisv = v * new_len_v;
+
+  return true;
+}
+
+ccl_device float lamp_light_pdf(KernelGlobals kg, const float3 Ng, const float3 I, float t)
 {
  float cos_pi = dot(Ng, I);

--- a/intern/cycles/kernel/light/distant.h
+++ b/intern/cycles/kernel/light/distant.h
@@ -1,127 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2022 Blender Foundation */
-
-#pragma once
-
-#include "kernel/geom/geom.h"
-
-#include "kernel/light/common.h"
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_inline bool distant_light_sample(const ccl_global KernelLight *klight,
-                                            const float randu,
-                                            const float randv,
-                                            ccl_private LightSample *ls)
-{
-  /* distant light */
-  float3 lightD = klight->co;
-  float3 D = lightD;
-  float radius = klight->distant.radius;
-  float invarea = klight->distant.invarea;
-
-  if (radius > 0.0f) {
-    D = normalize(D + disk_light_sample(D, randu, randv) * radius);
-  }
-
-  ls->P = D;
-  ls->Ng = D;
-  ls->D = -D;
-  ls->t = FLT_MAX;
-
-  float costheta = dot(lightD, D);
-  ls->pdf = invarea / (costheta * costheta * costheta);
-  ls->eval_fac = ls->pdf;
-
-  return true;
-}
-
-ccl_device bool distant_light_sample_from_intersection(KernelGlobals kg,
-                                                       const float3 ray_D,
-                                                       const int lamp,
-                                                       ccl_private LightSample *ccl_restrict ls)
-{
-  ccl_global const KernelLight *klight = &kernel_data_fetch(lights, lamp);
-  const int shader = klight->shader_id;
-  const float radius = klight->distant.radius;
-  const LightType type = (LightType)klight->type;
-
-  if (type != LIGHT_DISTANT) {
-    return false;
-  }
-  if (!(shader & SHADER_USE_MIS)) {
-    return false;
-  }
-  if (radius == 0.0f) {
-    return false;
-  }
-
-  /* a distant light is infinitely far away, but equivalent to a disk
-   * shaped light exactly 1 unit away from the current shading point.
-   *
-   *     radius              t^2/cos(theta)
-   *  <---------->           t = sqrt(1^2 + tan(theta)^2)
-   *       tan(th)           area = radius*radius*pi
-   *       <----->
-   *        \    |           (1 + tan(theta)^2)/cos(theta)
-   *         \   |           (1 + tan(acos(cos(theta)))^2)/cos(theta)
-   *       t  \th| 1         simplifies to
-   *           \-|           1/(cos(theta)^3)
-   *            \|           magic!
-   *             P
-   */
-
-  float3 lightD = klight->co;
-  float costheta = dot(-lightD, ray_D);
-  float cosangle = klight->distant.cosangle;
-
-  /* Workaround to prevent a hang in the classroom scene with AMD HIP drivers 22.10,
-   * Remove when a compiler fix is available. */
-#ifdef __HIP__
-  ls->shader = klight->shader_id;
-#endif
-
-  if (costheta < cosangle)
-    return false;
-
-  ls->type = type;
-#ifndef __HIP__
-  ls->shader = klight->shader_id;
-#endif
-  ls->object = PRIM_NONE;
-  ls->prim = PRIM_NONE;
-  ls->lamp = lamp;
-  /* todo: missing texture coordinates */
-  ls->u = 0.0f;
-  ls->v = 0.0f;
-  ls->t = FLT_MAX;
-  ls->P = -ray_D;
-  ls->Ng = -ray_D;
-  ls->D = ray_D;
-  ls->group = lamp_lightgroup(kg, lamp);
-
-  /* compute pdf */
-  float invarea = klight->distant.invarea;
-  ls->pdf = invarea / (costheta * costheta * costheta);
-  ls->eval_fac = ls->pdf;
-
-  return true;
-}
-
-ccl_device_forceinline bool distant_light_tree_parameters(const float3 centroid,
-                                                          const float theta_e,
-                                                          ccl_private float &cos_theta_u,
-                                                          ccl_private float2 &distance,
-                                                          ccl_private float3 &point_to_centroid)
-{
-  /* Treating it as a disk light 1 unit away */
-  cos_theta_u = fast_cosf(theta_e);
-
-  distance = make_float2(1.0f / cos_theta_u, 1.0f);
-
-  point_to_centroid = -centroid;
-
-  return true;
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/kernel/light/distribution.h
+++ b/intern/cycles/kernel/light/distribution.h
@@ -1,80 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2022 Blender Foundation */
-
-#pragma once
-
-#include "kernel/light/light.h"
-#include "kernel/light/triangle.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Simple CDF based sampling over all lights in the scene, without taking into
- * account shading position or normal. */
-
-ccl_device int light_distribution_sample(KernelGlobals kg, ccl_private float &randu)
-{
-  /* This is basically std::upper_bound as used by PBRT, to find a point light or
-   * triangle to emit from, proportional to area. a good improvement would be to
-   * also sample proportional to power, though it's not so well defined with
-   * arbitrary shaders. */
-  int first = 0;
-  int len = kernel_data.integrator.num_distribution + 1;
-  float r = randu;
-
-  do {
-    int half_len = len >> 1;
-    int middle = first + half_len;
-
-    if (r < kernel_data_fetch(light_distribution, middle).totarea) {
-      len = half_len;
-    }
-    else {
-      first = middle + 1;
-      len = len - half_len - 1;
-    }
-  } while (len > 0);
-
-  /* Clamping should not be needed but float rounding errors seem to
-   * make this fail on rare occasions. */
-  int index = clamp(first - 1, 0, kernel_data.integrator.num_distribution - 1);
-
-  /* Rescale to reuse random number. this helps the 2D samples within
-   * each area light be stratified as well. */
-  float distr_min = kernel_data_fetch(light_distribution, index).totarea;
-  float distr_max = kernel_data_fetch(light_distribution, index + 1).totarea;
-  randu = (r - distr_min) / (distr_max - distr_min);
-
-  return index;
-}
-
-ccl_device_noinline bool light_distribution_sample(KernelGlobals kg,
-                                                   ccl_private float &randu,
-                                                   const float randv,
-                                                   const float time,
-                                                   const float3 P,
-                                                   const int bounce,
-                                                   const uint32_t path_flag,
-                                                   ccl_private int &emitter_object,
-                                                   ccl_private int &emitter_prim,
-                                                   ccl_private int &emitter_shader_flag,
-                                                   ccl_private float &emitter_pdf_selection)
-{
-  /* Sample light index from distribution. */
-  const int index = light_distribution_sample(kg, randu);
-  ccl_global const KernelLightDistribution *kdistribution = &kernel_data_fetch(light_distribution,
-                                                                               index);
-
-  emitter_object = kdistribution->mesh_light.object_id;
-  emitter_prim = kdistribution->prim;
-  emitter_shader_flag = kdistribution->mesh_light.shader_flag;
-  emitter_pdf_selection = kernel_data.integrator.distribution_pdf_lights;
-
-  return true;
-}
-
-ccl_device_inline float light_distribution_pdf_lamp(KernelGlobals kg)
-{
-  return kernel_data.integrator.distribution_pdf_lights;
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/kernel/light/light.h
+++ b/intern/cycles/kernel/light/light.h
@@ -3,18 +3,31 @@

 #pragma once

-#include "kernel/light/area.h"
+#include "kernel/geom/geom.h"
 #include "kernel/light/background.h"
-#include "kernel/light/distant.h"
-#include "kernel/light/point.h"
-#include "kernel/light/spot.h"
-#include "kernel/light/triangle.h"
-
 #include "kernel/sample/mapping.h"

 CCL_NAMESPACE_BEGIN

-/* Sample point on an individual light. */
+/* Light Sample result */
+
+typedef struct LightSample {
+  float3 P;       /* position on light, or direction for distant light */
+  float3 Ng;      /* normal on light */
+  float3 D;       /* direction from shading point to light */
+  float t;        /* distance to light (FLT_MAX for distant light) */
+  float u, v;     /* parametric coordinate on primitive */
+  float pdf;      /* light sampling probability density function */
+  float eval_fac; /* intensity multiplier */
+  int object;     /* object id for triangle/curve lights */
+  int prim;       /* primitive id for triangle/curve lights */
+  int shader;     /* shader id */
+  int lamp;       /* lamp id */
+  int group;      /* lightgroup */
+  LightType type; /* type of light */
+} LightSample;
+
+/* Regular Light */

 template<bool in_volume_segment>
 ccl_device_inline bool light_sample(KernelGlobals kg,
@@ -50,15 +63,28 @@ ccl_device_inline bool light_sample(KernelGlobals kg,
    ls->Ng = zero_float3();
    ls->D = zero_float3();
    ls->pdf = 1.0f;
-    ls->eval_fac = 0.0f;
    ls->t = FLT_MAX;
    return true;
  }

  if (type == LIGHT_DISTANT) {
-    if (!distant_light_sample(klight, randu, randv, ls)) {
-      return false;
-    }
+    /* distant light */
+    float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+    float3 D = lightD;
+    float radius = klight->distant.radius;
+    float invarea = klight->distant.invarea;
+
+    if (radius > 0.0f)
+      D = distant_light_sample(D, radius, randu, randv);
+
+    ls->P = D;
+    ls->Ng = D;
+    ls->D = -D;
+    ls->t = FLT_MAX;
+
+    float costheta = dot(lightD, D);
+    ls->pdf = invarea / (costheta * costheta * costheta);
+    ls->eval_fac = ls->pdf;
  }
  else if (type == LIGHT_BACKGROUND) {
    /* infinite area light (e.g. light dome or env light) */
@@ -70,28 +96,139 @@ ccl_device_inline bool light_sample(KernelGlobals kg,
    ls->t = FLT_MAX;
    ls->eval_fac = 1.0f;
  }
-  else if (type == LIGHT_SPOT) {
-    if (!spot_light_sample<in_volume_segment>(klight, randu, randv, P, ls)) {
-      return false;
-    }
-  }
-  else if (type == LIGHT_POINT) {
-    if (!point_light_sample<in_volume_segment>(klight, randu, randv, P, ls)) {
-      return false;
-    }
-  }
  else {
-    /* area light */
-    if (!area_light_sample<in_volume_segment>(klight, randu, randv, P, ls)) {
-      return false;
+    ls->P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+
+    if (type == LIGHT_SPOT) {
+      const float3 center = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+      const float radius = klight->spot.radius;
+      const float3 dir = make_float3(
+          klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]);
+      /* disk oriented normal */
+      const float3 lightN = normalize(P - center);
+      ls->P = center;
+
+      if (radius > 0.0f)
+        /* disk light */
+        ls->P += disk_light_sample(lightN, randu, randv) * radius;
+
+      const float invarea = klight->spot.invarea;
+      ls->pdf = invarea;
+
+      ls->D = normalize_len(ls->P - P, &ls->t);
+      /* we set the light normal to the outgoing direction to support texturing */
+      ls->Ng = -ls->D;
+
+      ls->eval_fac = (0.25f * M_1_PI_F) * invarea;
+
+      /* spot light attenuation */
+      ls->eval_fac *= spot_light_attenuation(
+          dir, klight->spot.spot_angle, klight->spot.spot_smooth, -ls->D);
+      if (!in_volume_segment && ls->eval_fac == 0.0f) {
+        return false;
+      }
+
+      float2 uv = map_to_sphere(ls->Ng);
+      ls->u = uv.x;
+      ls->v = uv.y;
+
+      ls->pdf *= lamp_light_pdf(kg, lightN, -ls->D, ls->t);
+    }
+    else if (type == LIGHT_POINT) {
+      float3 center = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+      float radius = klight->spot.radius;
+      /* disk oriented normal */
+      const float3 lightN = normalize(P - center);
+      ls->P = center;
+
+      if (radius > 0.0f) {
+        ls->P += disk_light_sample(lightN, randu, randv) * radius;
+      }
+      ls->pdf = klight->spot.invarea;
+
+      ls->D = normalize_len(ls->P - P, &ls->t);
+      /* we set the light normal to the outgoing direction to support texturing */
+      ls->Ng = -ls->D;
+
+      ls->eval_fac = M_1_PI_F * 0.25f * klight->spot.invarea;
+      if (!in_volume_segment && ls->eval_fac == 0.0f) {
+        return false;
+      }
+
+      float2 uv = map_to_sphere(ls->Ng);
+      ls->u = uv.x;
+      ls->v = uv.y;
+      ls->pdf *= lamp_light_pdf(kg, lightN, -ls->D, ls->t);
+    }
+    else {
+      /* area light */
+      float3 axisu = make_float3(
+          klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+      float3 axisv = make_float3(
+          klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+      float3 Ng = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
+      float invarea = fabsf(klight->area.invarea);
+      bool is_round = (klight->area.invarea < 0.0f);
+
+      if (!in_volume_segment) {
+        if (dot(ls->P - P, Ng) > 0.0f) {
+          return false;
+        }
+      }
+
+      float3 inplane;
+
+      if (is_round || in_volume_segment) {
+        inplane = ellipse_sample(axisu * 0.5f, axisv * 0.5f, randu, randv);
+        ls->P += inplane;
+        ls->pdf = invarea;
+      }
+      else {
+        inplane = ls->P;
+
+        float3 sample_axisu = axisu;
+        float3 sample_axisv = axisv;
+
+        if (!in_volume_segment && klight->area.tan_spread > 0.0f) {
+          if (!light_spread_clamp_area_light(
+                  P, Ng, &ls->P, &sample_axisu, &sample_axisv, klight->area.tan_spread)) {
+            return false;
+          }
+        }
+
+        ls->pdf = rect_light_sample(P, &ls->P, sample_axisu, sample_axisv, randu, randv, true);
+        inplane = ls->P - inplane;
+      }
+
+      const float light_u = dot(inplane, axisu) * (1.0f / dot(axisu, axisu));
+      const float light_v = dot(inplane, axisv) * (1.0f / dot(axisv, axisv));
+
+      /* NOTE: Return barycentric coordinates in the same notation as Embree and OptiX. */
+      ls->u = light_v + 0.5f;
+      ls->v = -light_u - light_v;
+
+      ls->Ng = Ng;
+      ls->D = normalize_len(ls->P - P, &ls->t);
+
+      ls->eval_fac = 0.25f * invarea;
+
+      if (klight->area.tan_spread > 0.0f) {
+        /* Area Light spread angle attenuation */
+        ls->eval_fac *= light_spread_attenuation(
+            ls->D, ls->Ng, klight->area.tan_spread, klight->area.normalize_spread);
+      }
+
+      if (is_round) {
+        ls->pdf *= lamp_light_pdf(kg, Ng, -ls->D, ls->t);
+      }
    }
  }

+  ls->pdf *= kernel_data.integrator.pdf_lights;
+
  return in_volume_segment || (ls->pdf > 0.0f);
 }

-/* Intersect ray with individual light. */
-
 ccl_device bool lights_intersect(KernelGlobals kg,
                                 IntegratorState state,
                                 ccl_private const Ray *ccl_restrict ray,
@@ -101,7 +238,7 @@ ccl_device bool lights_intersect(KernelGlobals kg,
                                 const int last_type,
                                 const uint32_t path_flag)
 {
-  for (int lamp = 0; lamp < kernel_data.integrator.num_lights; lamp++) {
+  for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) {
    const ccl_global KernelLight *klight = &kernel_data_fetch(lights, lamp);

    if (path_flag & PATH_RAY_CAMERA) {
@@ -134,17 +271,76 @@ ccl_device bool lights_intersect(KernelGlobals kg,
    float t = 0.0f, u = 0.0f, v = 0.0f;

    if (type == LIGHT_SPOT) {
-      if (!spot_light_intersect(klight, ray, &t)) {
+      /* Spot/Disk light. */
+      const float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+      const float radius = klight->spot.radius;
+      if (radius == 0.0f) {
+        continue;
+      }
+      /* disk oriented normal */
+      const float3 lightN = normalize(ray->P - lightP);
+      /* One sided. */
+      if (dot(ray->D, lightN) >= 0.0f) {
+        continue;
+      }
+
+      float3 P;
+      if (!ray_disk_intersect(
+              ray->P, ray->D, ray->tmin, ray->tmax, lightP, lightN, radius, &P, &t)) {
        continue;
      }
    }
    else if (type == LIGHT_POINT) {
-      if (!point_light_intersect(klight, ray, &t)) {
+      /* Sphere light (aka, aligned disk light). */
+      const float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+      const float radius = klight->spot.radius;
+      if (radius == 0.0f) {
+        continue;
+      }
+
+      /* disk oriented normal */
+      const float3 lightN = normalize(ray->P - lightP);
+      float3 P;
+      if (!ray_disk_intersect(
+              ray->P, ray->D, ray->tmin, ray->tmax, lightP, lightN, radius, &P, &t)) {
        continue;
      }
    }
    else if (type == LIGHT_AREA) {
-      if (!area_light_intersect(klight, ray, &t, &u, &v)) {
+      /* Area light. */
+      const float invarea = fabsf(klight->area.invarea);
+      const bool is_round = (klight->area.invarea < 0.0f);
+      if (invarea == 0.0f) {
+        continue;
+      }
+
+      const float3 axisu = make_float3(
+          klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+      const float3 axisv = make_float3(
+          klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+      const float3 Ng = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
+
+      /* One sided. */
+      if (dot(ray->D, Ng) >= 0.0f) {
+        continue;
+      }
+
+      const float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+
+      float3 P;
+      if (!ray_quad_intersect(ray->P,
+                              ray->D,
+                              ray->tmin,
+                              ray->tmax,
+                              light_P,
+                              axisu,
+                              axisv,
+                              Ng,
+                              &P,
+                              &t,
+                              &u,
+                              &v,
+                              is_round)) {
        continue;
      }
    }
@@ -166,7 +362,78 @@ ccl_device bool lights_intersect(KernelGlobals kg,
  return isect->prim != PRIM_NONE;
 }

-/* Setup light sample from intersection. */
+ccl_device bool light_sample_from_distant_ray(KernelGlobals kg,
+                                              const float3 ray_D,
+                                              const int lamp,
+                                              ccl_private LightSample *ccl_restrict ls)
+{
+  ccl_global const KernelLight *klight = &kernel_data_fetch(lights, lamp);
+  const int shader = klight->shader_id;
+  const float radius = klight->distant.radius;
+  const LightType type = (LightType)klight->type;
+
+  if (type != LIGHT_DISTANT) {
+    return false;
+  }
+  if (!(shader & SHADER_USE_MIS)) {
+    return false;
+  }
+  if (radius == 0.0f) {
+    return false;
+  }
+
+  /* a distant light is infinitely far away, but equivalent to a disk
+   * shaped light exactly 1 unit away from the current shading point.
+   *
+   *     radius              t^2/cos(theta)
+   *  <---------->           t = sqrt(1^2 + tan(theta)^2)
+   *       tan(th)           area = radius*radius*pi
+   *       <----->
+   *        \    |           (1 + tan(theta)^2)/cos(theta)
+   *         \   |           (1 + tan(acos(cos(theta)))^2)/cos(theta)
+   *       t  \th| 1         simplifies to
+   *           \-|           1/(cos(theta)^3)
+   *            \|           magic!
+   *             P
+   */
+
+  float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+  float costheta = dot(-lightD, ray_D);
+  float cosangle = klight->distant.cosangle;
+
+  /* Workaround to prevent a hang in the classroom scene with AMD HIP drivers 22.10,
+   * Remove when a compiler fix is available. */
+#ifdef __HIP__
+  ls->shader = klight->shader_id;
+#endif
+
+  if (costheta < cosangle)
+    return false;
+
+  ls->type = type;
+#ifndef __HIP__
+  ls->shader = klight->shader_id;
+#endif
+  ls->object = PRIM_NONE;
+  ls->prim = PRIM_NONE;
+  ls->lamp = lamp;
+  /* todo: missing texture coordinates */
+  ls->u = 0.0f;
+  ls->v = 0.0f;
+  ls->t = FLT_MAX;
+  ls->P = -ray_D;
+  ls->Ng = -ray_D;
+  ls->D = ray_D;
+  ls->group = lamp_lightgroup(kg, lamp);
+
+  /* compute pdf */
+  float invarea = klight->distant.invarea;
+  ls->pdf = invarea / (costheta * costheta * costheta);
+  ls->eval_fac = ls->pdf;
+  ls->pdf *= kernel_data.integrator.pdf_lights;
+
+  return true;
+}

 ccl_device bool light_sample_from_intersection(KernelGlobals kg,
                                               ccl_private const Intersection *ccl_restrict isect,
@@ -189,18 +456,102 @@ ccl_device bool light_sample_from_intersection(KernelGlobals kg,
  ls->group = lamp_lightgroup(kg, lamp);

  if (type == LIGHT_SPOT) {
-    if (!spot_light_sample_from_intersection(klight, isect, ray_P, ray_D, ls)) {
+    const float3 center = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+    const float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]);
+    /* the normal of the oriented disk */
+    const float3 lightN = normalize(ray_P - center);
+    /* We set the light normal to the outgoing direction to support texturing. */
+    ls->Ng = -ls->D;
+
+    float invarea = klight->spot.invarea;
+    ls->eval_fac = (0.25f * M_1_PI_F) * invarea;
+    ls->pdf = invarea;
+
+    /* spot light attenuation */
+    ls->eval_fac *= spot_light_attenuation(
+        dir, klight->spot.spot_angle, klight->spot.spot_smooth, -ls->D);
+
+    if (ls->eval_fac == 0.0f) {
      return false;
    }
+
+    float2 uv = map_to_sphere(ls->Ng);
+    ls->u = uv.x;
+    ls->v = uv.y;
+
+    /* compute pdf */
+    if (ls->t != FLT_MAX)
+      ls->pdf *= lamp_light_pdf(kg, lightN, -ls->D, ls->t);
+    else
+      ls->pdf = 0.f;
  }
  else if (type == LIGHT_POINT) {
-    if (!point_light_sample_from_intersection(klight, isect, ray_P, ray_D, ls)) {
+    const float3 center = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+    const float3 lighN = normalize(ray_P - center);
+
+    /* We set the light normal to the outgoing direction to support texturing. */
+    ls->Ng = -ls->D;
+
+    float invarea = klight->spot.invarea;
+    ls->eval_fac = (0.25f * M_1_PI_F) * invarea;
+    ls->pdf = invarea;
+
+    if (ls->eval_fac == 0.0f) {
      return false;
    }
+
+    float2 uv = map_to_sphere(ls->Ng);
+    ls->u = uv.x;
+    ls->v = uv.y;
+
+    /* compute pdf */
+    if (ls->t != FLT_MAX)
+      ls->pdf *= lamp_light_pdf(kg, lighN, -ls->D, ls->t);
+    else
+      ls->pdf = 0.f;
  }
  else if (type == LIGHT_AREA) {
-    if (!area_light_sample_from_intersection(klight, isect, ray_P, ray_D, ls)) {
-      return false;
+    /* area light */
+    float invarea = fabsf(klight->area.invarea);
+
+    float3 axisu = make_float3(
+        klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+    float3 axisv = make_float3(
+        klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+    float3 Ng = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
+    float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+
+    ls->u = isect->u;
+    ls->v = isect->v;
+    ls->D = ray_D;
+    ls->Ng = Ng;
+
+    const bool is_round = (klight->area.invarea < 0.0f);
+    if (is_round) {
+      ls->pdf = invarea * lamp_light_pdf(kg, Ng, -ray_D, ls->t);
+    }
+    else {
+      float3 sample_axisu = axisu;
+      float3 sample_axisv = axisv;
+
+      if (klight->area.tan_spread > 0.0f) {
+        if (!light_spread_clamp_area_light(
+                ray_P, Ng, &light_P, &sample_axisu, &sample_axisv, klight->area.tan_spread)) {
+          return false;
+        }
+      }
+
+      ls->pdf = rect_light_sample(ray_P, &light_P, sample_axisu, sample_axisv, 0, 0, false);
+    }
+    ls->eval_fac = 0.25f * invarea;
+
+    if (klight->area.tan_spread > 0.0f) {
+      /* Area Light spread angle attenuation */
+      ls->eval_fac *= light_spread_attenuation(
+          ls->D, ls->Ng, klight->area.tan_spread, klight->area.normalize_spread);
+      if (ls->eval_fac == 0.0f) {
+        return false;
+      }
    }
  }
  else {
@@ -208,33 +559,411 @@ ccl_device bool light_sample_from_intersection(KernelGlobals kg,
    return false;
  }

+  ls->pdf *= kernel_data.integrator.pdf_lights;
+
  return true;
 }

-/* Update light sample for changed new position, for MNEE. */
+/* Triangle Light */

-ccl_device_forceinline void light_update_position(KernelGlobals kg,
-                                                  ccl_private LightSample *ls,
-                                                  const float3 P)
+/* returns true if the triangle is has motion blur or an instancing transform applied */
+ccl_device_inline bool triangle_world_space_vertices(
+    KernelGlobals kg, int object, int prim, float time, float3 V[3])
 {
-  const ccl_global KernelLight *klight = &kernel_data_fetch(lights, ls->lamp);
+  bool has_motion = false;
+  const int object_flag = kernel_data_fetch(object_flag, object);

-  if (ls->type == LIGHT_POINT) {
-    point_light_update_position(klight, ls, P);
+  if (object_flag & SD_OBJECT_HAS_VERTEX_MOTION && time >= 0.0f) {
+    motion_triangle_vertices(kg, object, prim, time, V);
+    has_motion = true;
  }
-  else if (ls->type == LIGHT_SPOT) {
-    spot_light_update_position(klight, ls, P);
+  else {
+    triangle_vertices(kg, prim, V);
  }
-  else if (ls->type == LIGHT_AREA) {
-    area_light_update_position(klight, ls, P);
+
+  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+#ifdef __OBJECT_MOTION__
+    float object_time = (time >= 0.0f) ? time : 0.5f;
+    Transform tfm = object_fetch_transform_motion_test(kg, object, object_time, NULL);
+#else
+    Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+#endif
+    V[0] = transform_point(&tfm, V[0]);
+    V[1] = transform_point(&tfm, V[1]);
+    V[2] = transform_point(&tfm, V[2]);
+    has_motion = true;
+  }
+  return has_motion;
+}
+
+ccl_device_inline float triangle_light_pdf_area(KernelGlobals kg,
+                                                const float3 Ng,
+                                                const float3 I,
+                                                float t)
+{
+  float pdf = kernel_data.integrator.pdf_triangles;
+  float cos_pi = fabsf(dot(Ng, I));
+
+  if (cos_pi == 0.0f)
+    return 0.0f;
+
+  return t * t * pdf / cos_pi;
+}
+
+ccl_device_forceinline float triangle_light_pdf(KernelGlobals kg,
+                                                ccl_private const ShaderData *sd,
+                                                float t)
+{
+  /* A naive heuristic to decide between costly solid angle sampling
+   * and simple area sampling, comparing the distance to the triangle plane
+   * to the length of the edges of the triangle. */
+
+  float3 V[3];
+  bool has_motion = triangle_world_space_vertices(kg, sd->object, sd->prim, sd->time, V);
+
+  const float3 e0 = V[1] - V[0];
+  const float3 e1 = V[2] - V[0];
+  const float3 e2 = V[2] - V[1];
+  const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
+  const float3 N = cross(e0, e1);
+  const float distance_to_plane = fabsf(dot(N, sd->I * t)) / dot(N, N);
+
+  if (longest_edge_squared > distance_to_plane * distance_to_plane) {
+    /* sd contains the point on the light source
+     * calculate Px, the point that we're shading */
+    const float3 Px = sd->P + sd->I * t;
+    const float3 v0_p = V[0] - Px;
+    const float3 v1_p = V[1] - Px;
+    const float3 v2_p = V[2] - Px;
+
+    const float3 u01 = safe_normalize(cross(v0_p, v1_p));
+    const float3 u02 = safe_normalize(cross(v0_p, v2_p));
+    const float3 u12 = safe_normalize(cross(v1_p, v2_p));
+
+    const float alpha = fast_acosf(dot(u02, u01));
+    const float beta = fast_acosf(-dot(u01, u12));
+    const float gamma = fast_acosf(dot(u02, u12));
+    const float solid_angle = alpha + beta + gamma - M_PI_F;
+
+    /* pdf_triangles is calculated over triangle area, but we're not sampling over its area */
+    if (UNLIKELY(solid_angle == 0.0f)) {
+      return 0.0f;
+    }
+    else {
+      float area = 1.0f;
+      if (has_motion) {
+        /* get the center frame vertices, this is what the PDF was calculated from */
+        triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V);
+        area = triangle_area(V[0], V[1], V[2]);
+      }
+      else {
+        area = 0.5f * len(N);
+      }
+      const float pdf = area * kernel_data.integrator.pdf_triangles;
+      return pdf / solid_angle;
+    }
+  }
+  else {
+    float pdf = triangle_light_pdf_area(kg, sd->Ng, sd->I, t);
+    if (has_motion) {
+      const float area = 0.5f * len(N);
+      if (UNLIKELY(area == 0.0f)) {
+        return 0.0f;
+      }
+      /* scale the PDF.
+       * area = the area the sample was taken from
+       * area_pre = the are from which pdf_triangles was calculated from */
+      triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V);
+      const float area_pre = triangle_area(V[0], V[1], V[2]);
+      pdf = pdf * area_pre / area;
+    }
+    return pdf;
  }
 }

-/* Light info. */
+template<bool in_volume_segment>
+ccl_device_forceinline void triangle_light_sample(KernelGlobals kg,
+                                                  int prim,
+                                                  int object,
+                                                  float randu,
+                                                  float randv,
+                                                  float time,
+                                                  ccl_private LightSample *ls,
+                                                  const float3 P)
+{
+  /* A naive heuristic to decide between costly solid angle sampling
+   * and simple area sampling, comparing the distance to the triangle plane
+   * to the length of the edges of the triangle. */
+
+  float3 V[3];
+  bool has_motion = triangle_world_space_vertices(kg, object, prim, time, V);
+
+  const float3 e0 = V[1] - V[0];
+  const float3 e1 = V[2] - V[0];
+  const float3 e2 = V[2] - V[1];
+  const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
+  const float3 N0 = cross(e0, e1);
+  float Nl = 0.0f;
+  ls->Ng = safe_normalize_len(N0, &Nl);
+  float area = 0.5f * Nl;
+
+  /* flip normal if necessary */
+  const int object_flag = kernel_data_fetch(object_flag, object);
+  if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+    ls->Ng = -ls->Ng;
+  }
+  ls->eval_fac = 1.0f;
+  ls->shader = kernel_data_fetch(tri_shader, prim);
+  ls->object = object;
+  ls->prim = prim;
+  ls->lamp = LAMP_NONE;
+  ls->shader |= SHADER_USE_MIS;
+  ls->type = LIGHT_TRIANGLE;
+  ls->group = object_lightgroup(kg, object);
+
+  float distance_to_plane = fabsf(dot(N0, V[0] - P) / dot(N0, N0));
+
+  if (!in_volume_segment && (longest_edge_squared > distance_to_plane * distance_to_plane)) {
+    /* see James Arvo, "Stratified Sampling of Spherical Triangles"
+     * http://www.graphics.cornell.edu/pubs/1995/Arv95c.pdf */
+
+    /* project the triangle to the unit sphere
+     * and calculate its edges and angles */
+    const float3 v0_p = V[0] - P;
+    const float3 v1_p = V[1] - P;
+    const float3 v2_p = V[2] - P;
+
+    const float3 u01 = safe_normalize(cross(v0_p, v1_p));
+    const float3 u02 = safe_normalize(cross(v0_p, v2_p));
+    const float3 u12 = safe_normalize(cross(v1_p, v2_p));
+
+    const float3 A = safe_normalize(v0_p);
+    const float3 B = safe_normalize(v1_p);
+    const float3 C = safe_normalize(v2_p);
+
+    const float cos_alpha = dot(u02, u01);
+    const float cos_beta = -dot(u01, u12);
+    const float cos_gamma = dot(u02, u12);
+
+    /* calculate dihedral angles */
+    const float alpha = fast_acosf(cos_alpha);
+    const float beta = fast_acosf(cos_beta);
+    const float gamma = fast_acosf(cos_gamma);
+    /* the area of the unit spherical triangle = solid angle */
+    const float solid_angle = alpha + beta + gamma - M_PI_F;
+
+    /* precompute a few things
+     * these could be re-used to take several samples
+     * as they are independent of randu/randv */
+    const float cos_c = dot(A, B);
+    const float sin_alpha = fast_sinf(alpha);
+    const float product = sin_alpha * cos_c;
+
+    /* Select a random sub-area of the spherical triangle
+     * and calculate the third vertex C_ of that new triangle */
+    const float phi = randu * solid_angle - alpha;
+    float s, t;
+    fast_sincosf(phi, &s, &t);
+    const float u = t - cos_alpha;
+    const float v = s + product;
+
+    const float3 U = safe_normalize(C - dot(C, A) * A);
+
+    float q = 1.0f;
+    const float det = ((v * s + u * t) * sin_alpha);
+    if (det != 0.0f) {
+      q = ((v * t - u * s) * cos_alpha - v) / det;
+    }
+    const float temp = max(1.0f - q * q, 0.0f);
+
+    const float3 C_ = safe_normalize(q * A + sqrtf(temp) * U);
+
+    /* Finally, select a random point along the edge of the new triangle
+     * That point on the spherical triangle is the sampled ray direction */
+    const float z = 1.0f - randv * (1.0f - dot(C_, B));
+    ls->D = z * B + safe_sqrtf(1.0f - z * z) * safe_normalize(C_ - dot(C_, B) * B);
+
+    /* calculate intersection with the planar triangle */
+    if (!ray_triangle_intersect(
+            P, ls->D, 0.0f, FLT_MAX, V[0], V[1], V[2], &ls->u, &ls->v, &ls->t)) {
+      ls->pdf = 0.0f;
+      return;
+    }
+
+    ls->P = P + ls->D * ls->t;
+
+    /* pdf_triangles is calculated over triangle area, but we're sampling over solid angle */
+    if (UNLIKELY(solid_angle == 0.0f)) {
+      ls->pdf = 0.0f;
+      return;
+    }
+    else {
+      if (has_motion) {
+        /* get the center frame vertices, this is what the PDF was calculated from */
+        triangle_world_space_vertices(kg, object, prim, -1.0f, V);
+        area = triangle_area(V[0], V[1], V[2]);
+      }
+      const float pdf = area * kernel_data.integrator.pdf_triangles;
+      ls->pdf = pdf / solid_angle;
+    }
+  }
+  else {
+    /* compute random point in triangle. From Eric Heitz's "A Low-Distortion Map Between Triangle
+     * and Square" */
+    float u = randu;
+    float v = randv;
+    if (v > u) {
+      u *= 0.5f;
+      v -= u;
+    }
+    else {
+      v *= 0.5f;
+      u -= v;
+    }
+
+    const float t = 1.0f - u - v;
+    ls->P = u * V[0] + v * V[1] + t * V[2];
+    /* compute incoming direction, distance and pdf */
+    ls->D = normalize_len(ls->P - P, &ls->t);
+    ls->pdf = triangle_light_pdf_area(kg, ls->Ng, -ls->D, ls->t);
+    if (has_motion && area != 0.0f) {
+      /* scale the PDF.
+       * area = the area the sample was taken from
+       * area_pre = the are from which pdf_triangles was calculated from */
+      triangle_world_space_vertices(kg, object, prim, -1.0f, V);
+      const float area_pre = triangle_area(V[0], V[1], V[2]);
+      ls->pdf = ls->pdf * area_pre / area;
+    }
+    ls->u = u;
+    ls->v = v;
+  }
+}
+
+/* Light Distribution */
+
+ccl_device int light_distribution_sample(KernelGlobals kg, ccl_private float *randu)
+{
+  /* This is basically std::upper_bound as used by PBRT, to find a point light or
+   * triangle to emit from, proportional to area. a good improvement would be to
+   * also sample proportional to power, though it's not so well defined with
+   * arbitrary shaders. */
+  int first = 0;
+  int len = kernel_data.integrator.num_distribution + 1;
+  float r = *randu;
+
+  do {
+    int half_len = len >> 1;
+    int middle = first + half_len;
+
+    if (r < kernel_data_fetch(light_distribution, middle).totarea) {
+      len = half_len;
+    }
+    else {
+      first = middle + 1;
+      len = len - half_len - 1;
+    }
+  } while (len > 0);
+
+  /* Clamping should not be needed but float rounding errors seem to
+   * make this fail on rare occasions. */
+  int index = clamp(first - 1, 0, kernel_data.integrator.num_distribution - 1);
+
+  /* Rescale to reuse random number. this helps the 2D samples within
+   * each area light be stratified as well. */
+  float distr_min = kernel_data_fetch(light_distribution, index).totarea;
+  float distr_max = kernel_data_fetch(light_distribution, index + 1).totarea;
+  *randu = (r - distr_min) / (distr_max - distr_min);
+
+  return index;
+}
+
+/* Generic Light */

 ccl_device_inline bool light_select_reached_max_bounces(KernelGlobals kg, int index, int bounce)
 {
  return (bounce > kernel_data_fetch(lights, index).max_bounces);
 }

+template<bool in_volume_segment>
+ccl_device_noinline bool light_distribution_sample(KernelGlobals kg,
+                                                   float randu,
+                                                   const float randv,
+                                                   const float time,
+                                                   const float3 P,
+                                                   const int bounce,
+                                                   const uint32_t path_flag,
+                                                   ccl_private LightSample *ls)
+{
+  /* Sample light index from distribution. */
+  const int index = light_distribution_sample(kg, &randu);
+  ccl_global const KernelLightDistribution *kdistribution = &kernel_data_fetch(light_distribution,
+                                                                               index);
+  const int prim = kdistribution->prim;
+
+  if (prim >= 0) {
+    /* Mesh light. */
+    const int object = kdistribution->mesh_light.object_id;
+
+    /* Exclude synthetic meshes from shadow catcher pass. */
+    if ((path_flag & PATH_RAY_SHADOW_CATCHER_PASS) &&
+        !(kernel_data_fetch(object_flag, object) & SD_OBJECT_SHADOW_CATCHER)) {
+      return false;
+    }
+
+    const int shader_flag = kdistribution->mesh_light.shader_flag;
+    triangle_light_sample<in_volume_segment>(kg, prim, object, randu, randv, time, ls, P);
+    ls->shader |= shader_flag;
+    return (ls->pdf > 0.0f);
+  }
+
+  const int lamp = -prim - 1;
+
+  if (UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) {
+    return false;
+  }
+
+  return light_sample<in_volume_segment>(kg, lamp, randu, randv, P, path_flag, ls);
+}
+
+ccl_device_inline bool light_distribution_sample_from_volume_segment(KernelGlobals kg,
+                                                                     float randu,
+                                                                     const float randv,
+                                                                     const float time,
+                                                                     const float3 P,
+                                                                     const int bounce,
+                                                                     const uint32_t path_flag,
+                                                                     ccl_private LightSample *ls)
+{
+  return light_distribution_sample<true>(kg, randu, randv, time, P, bounce, path_flag, ls);
+}
+
+ccl_device_inline bool light_distribution_sample_from_position(KernelGlobals kg,
+                                                               float randu,
+                                                               const float randv,
+                                                               const float time,
+                                                               const float3 P,
+                                                               const int bounce,
+                                                               const uint32_t path_flag,
+                                                               ccl_private LightSample *ls)
+{
+  return light_distribution_sample<false>(kg, randu, randv, time, P, bounce, path_flag, ls);
+}
+
+ccl_device_inline bool light_distribution_sample_new_position(KernelGlobals kg,
+                                                              const float randu,
+                                                              const float randv,
+                                                              const float time,
+                                                              const float3 P,
+                                                              ccl_private LightSample *ls)
+{
+  /* Sample a new position on the same light, for volume sampling. */
+  if (ls->type == LIGHT_TRIANGLE) {
+    triangle_light_sample<false>(kg, ls->prim, ls->object, randu, randv, time, ls, P);
+    return (ls->pdf > 0.0f);
+  }
+  else {
+    return light_sample<false>(kg, ls->lamp, randu, randv, P, 0, ls);
+  }
+}
+
 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/light/point.h
+++ b/intern/cycles/kernel/light/point.h
@@ -1,136 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2022 Blender Foundation */
-
-#pragma once
-
-#include "kernel/light/common.h"
-
-CCL_NAMESPACE_BEGIN
-
-template<bool in_volume_segment>
-ccl_device_inline bool point_light_sample(const ccl_global KernelLight *klight,
-                                          const float randu,
-                                          const float randv,
-                                          const float3 P,
-                                          ccl_private LightSample *ls)
-{
-  float3 center = klight->co;
-  float radius = klight->spot.radius;
-  /* disk oriented normal */
-  const float3 lightN = normalize(P - center);
-  ls->P = center;
-
-  if (radius > 0.0f) {
-    ls->P += disk_light_sample(lightN, randu, randv) * radius;
-  }
-  ls->pdf = klight->spot.invarea;
-
-  ls->D = normalize_len(ls->P - P, &ls->t);
-  /* we set the light normal to the outgoing direction to support texturing */
-  ls->Ng = -ls->D;
-
-  ls->eval_fac = M_1_PI_F * 0.25f * klight->spot.invarea;
-  if (!in_volume_segment && ls->eval_fac == 0.0f) {
-    return false;
-  }
-
-  float2 uv = map_to_sphere(ls->Ng);
-  ls->u = uv.x;
-  ls->v = uv.y;
-  ls->pdf *= lamp_light_pdf(lightN, -ls->D, ls->t);
-  return true;
-}
-
-ccl_device_forceinline void point_light_update_position(const ccl_global KernelLight *klight,
-                                                        ccl_private LightSample *ls,
-                                                        const float3 P)
-{
-  ls->D = normalize_len(ls->P - P, &ls->t);
-  ls->Ng = -ls->D;
-
-  float2 uv = map_to_sphere(ls->Ng);
-  ls->u = uv.x;
-  ls->v = uv.y;
-
-  float invarea = klight->spot.invarea;
-  ls->eval_fac = (0.25f * M_1_PI_F) * invarea;
-  ls->pdf = invarea;
-}
-
-ccl_device_inline bool point_light_intersect(const ccl_global KernelLight *klight,
-                                             const ccl_private Ray *ccl_restrict ray,
-                                             ccl_private float *t)
-{
-  /* Sphere light (aka, aligned disk light). */
-  const float3 lightP = klight->co;
-  const float radius = klight->spot.radius;
-  if (radius == 0.0f) {
-    return false;
-  }
-
-  /* disk oriented normal */
-  const float3 lightN = normalize(ray->P - lightP);
-  float3 P;
-  return ray_disk_intersect(ray->P, ray->D, ray->tmin, ray->tmax, lightP, lightN, radius, &P, t);
-}
-
-ccl_device_inline bool point_light_sample_from_intersection(
-    const ccl_global KernelLight *klight,
-    ccl_private const Intersection *ccl_restrict isect,
-    const float3 ray_P,
-    const float3 ray_D,
-    ccl_private LightSample *ccl_restrict ls)
-{
-  const float3 lighN = normalize(ray_P - klight->co);
-
-  /* We set the light normal to the outgoing direction to support texturing. */
-  ls->Ng = -ls->D;
-
-  float invarea = klight->spot.invarea;
-  ls->eval_fac = (0.25f * M_1_PI_F) * invarea;
-  ls->pdf = invarea;
-
-  if (ls->eval_fac == 0.0f) {
-    return false;
-  }
-
-  float2 uv = map_to_sphere(ls->Ng);
-  ls->u = uv.x;
-  ls->v = uv.y;
-
-  /* compute pdf */
-  if (ls->t != FLT_MAX) {
-    ls->pdf *= lamp_light_pdf(lighN, -ls->D, ls->t);
-  }
-  else {
-    ls->pdf = 0.f;
-  }
-
-  return true;
-}
-
-template<bool in_volume_segment>
-ccl_device_forceinline bool point_light_tree_parameters(const ccl_global KernelLight *klight,
-                                                        const float3 centroid,
-                                                        const float3 P,
-                                                        ccl_private float &cos_theta_u,
-                                                        ccl_private float2 &distance,
-                                                        ccl_private float3 &point_to_centroid)
-{
-  if (in_volume_segment) {
-    cos_theta_u = 1.0f; /* Any value in [-1, 1], irrelevant since theta = 0 */
-    return true;
-  }
-  float min_distance;
-  point_to_centroid = safe_normalize_len(centroid - P, &min_distance);
-
-  const float radius = klight->spot.radius;
-  const float hypotenus = sqrtf(sqr(radius) + sqr(min_distance));
-  cos_theta_u = min_distance / hypotenus;
-
-  distance = make_float2(hypotenus, min_distance);
-
-  return true;
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/kernel/light/sample.h
+++ b/intern/cycles/kernel/light/sample.h
@@ -6,13 +6,8 @@
 #include "kernel/integrator/path_state.h"
 #include "kernel/integrator/surface_shader.h"

-#include "kernel/light/distribution.h"
 #include "kernel/light/light.h"

-#ifdef __LIGHT_TREE__
-#  include "kernel/light/tree.h"
-#endif
-
 #include "kernel/sample/mapping.h"
 #include "kernel/sample/mis.h"

@@ -282,8 +277,6 @@ ccl_device_inline void light_sample_to_volume_shadow_ray(
  shadow_ray_setup(sd, ls, P, ray, false);
 }

-/* Multiple importance sampling weights. */
-
 ccl_device_inline float light_sample_mis_weight_forward(KernelGlobals kg,
                                                        const float forward_pdf,
                                                        const float nee_pdf)
@@ -316,333 +309,4 @@ ccl_device_inline float light_sample_mis_weight_nee(KernelGlobals kg,
    return power_heuristic(nee_pdf, forward_pdf);
 }

-/* Next event estimation sampling.
- *
- * Sample a position on a light in the scene, from a position on a surface or
- * from a volume segment.
- *
- * Uses either a flat distribution or light tree. */
-
-ccl_device_inline bool light_sample_from_volume_segment(KernelGlobals kg,
-                                                        float randu,
-                                                        float randv,
-                                                        const float time,
-                                                        const float3 P,
-                                                        const float3 D,
-                                                        const float t,
-                                                        const int bounce,
-                                                        const uint32_t path_flag,
-                                                        ccl_private LightSample *ls)
-{
-  /* Select an emitter. */
-  int emitter_object = 0;
-  int emitter_prim = 0;
-  int emitter_shader_flag = 0;
-  float emitter_pdf_selection = 0.0f;
-
-#ifdef __LIGHT_TREE__
-  if (kernel_data.integrator.use_light_tree) {
-    if (!light_tree_sample<true>(kg,
-                                 randu,
-                                 randv,
-                                 time,
-                                 P,
-                                 D,
-                                 t,
-                                 SD_BSDF_HAS_TRANSMISSION,
-                                 bounce,
-                                 path_flag,
-                                 emitter_object,
-                                 emitter_prim,
-                                 emitter_shader_flag,
-                                 emitter_pdf_selection)) {
-      return false;
-    }
-  }
-  else
-#endif
-  {
-    if (!light_distribution_sample(kg,
-                                   randu,
-                                   randv,
-                                   time,
-                                   P,
-                                   bounce,
-                                   path_flag,
-                                   emitter_object,
-                                   emitter_prim,
-                                   emitter_shader_flag,
-                                   emitter_pdf_selection)) {
-      return false;
-    }
-  }
-
-  /* Set first, triangle light sampling from flat distribution will override. */
-  ls->pdf_selection = emitter_pdf_selection;
-
-  /* Sample a point on the chosen emitter. */
-  if (emitter_prim >= 0) {
-    /* Mesh light. */
-    /* Exclude synthetic meshes from shadow catcher pass. */
-    if ((path_flag & PATH_RAY_SHADOW_CATCHER_PASS) &&
-        !(kernel_data_fetch(object_flag, emitter_object) & SD_OBJECT_SHADOW_CATCHER)) {
-      return false;
-    }
-
-    if (!triangle_light_sample<true>(
-            kg, emitter_prim, emitter_object, randu, randv, time, ls, P)) {
-      return false;
-    }
-  }
-  else {
-    /* Light object. */
-    const int lamp = ~emitter_prim;
-
-    if (UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) {
-      return false;
-    }
-
-    if (!light_sample<true>(kg, lamp, randu, randv, P, path_flag, ls)) {
-      return false;
-    }
-  }
-
-  ls->pdf *= ls->pdf_selection;
-  ls->shader |= emitter_shader_flag;
-
-  return (ls->pdf > 0);
-}
-
-ccl_device bool light_sample_from_position(KernelGlobals kg,
-                                           ccl_private const RNGState *rng_state,
-                                           float randu,
-                                           float randv,
-                                           const float time,
-                                           const float3 P,
-                                           const float3 N,
-                                           const int shader_flags,
-                                           const int bounce,
-                                           const uint32_t path_flag,
-                                           ccl_private LightSample *ls)
-{
-  /* Select an emitter. */
-  int emitter_object = 0;
-  int emitter_prim = 0;
-  int emitter_shader_flag = 0;
-  float emitter_pdf_selection = 0.0f;
-
-#ifdef __LIGHT_TREE__
-  if (kernel_data.integrator.use_light_tree) {
-    if (!light_tree_sample<false>(kg,
-                                  randu,
-                                  randv,
-                                  time,
-                                  P,
-                                  N,
-                                  0,
-                                  shader_flags,
-                                  bounce,
-                                  path_flag,
-                                  emitter_object,
-                                  emitter_prim,
-                                  emitter_shader_flag,
-                                  emitter_pdf_selection)) {
-      return false;
-    }
-  }
-  else
-#endif
-  {
-    if (!light_distribution_sample(kg,
-                                   randu,
-                                   randv,
-                                   time,
-                                   P,
-                                   bounce,
-                                   path_flag,
-                                   emitter_object,
-                                   emitter_prim,
-                                   emitter_shader_flag,
-                                   emitter_pdf_selection)) {
-      return false;
-    }
-  }
-
-  /* Set first, triangle light sampling from flat distribution will override. */
-  ls->pdf_selection = emitter_pdf_selection;
-
-  /* Sample a point on the chosen emitter.
-   * TODO: deduplicate code with light_sample_from_volume_segment? */
-  if (emitter_prim >= 0) {
-    /* Mesh light. */
-    /* Exclude synthetic meshes from shadow catcher pass. */
-    if ((path_flag & PATH_RAY_SHADOW_CATCHER_PASS) &&
-        !(kernel_data_fetch(object_flag, emitter_object) & SD_OBJECT_SHADOW_CATCHER)) {
-      return false;
-    }
-
-    if (!triangle_light_sample<false>(
-            kg, emitter_prim, emitter_object, randu, randv, time, ls, P)) {
-      return false;
-    }
-  }
-  else {
-    /* Light object. */
-    const int lamp = ~emitter_prim;
-
-    if (UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) {
-      return false;
-    }
-
-    if (!light_sample<false>(kg, lamp, randu, randv, P, path_flag, ls)) {
-      return false;
-    }
-  }
-
-  ls->pdf *= ls->pdf_selection;
-  ls->shader |= emitter_shader_flag;
-
-  return (ls->pdf > 0);
-}
-
-ccl_device_inline bool light_sample_new_position(KernelGlobals kg,
-                                                 const float randu,
-                                                 const float randv,
-                                                 const float time,
-                                                 const float3 P,
-                                                 ccl_private LightSample *ls)
-{
-  /* Sample a new position on the same light, for volume sampling. */
-  if (ls->type == LIGHT_TRIANGLE) {
-    if (!triangle_light_sample<false>(kg, ls->prim, ls->object, randu, randv, time, ls, P)) {
-      return false;
-    }
-
-#ifdef __LIGHT_TREE__
-    if (kernel_data.integrator.use_light_tree) {
-      ls->pdf *= ls->pdf_selection;
-    }
-    else
-#endif
-    {
-      /* Handled in triangle_light_sample for efficiency. */
-    }
-    return true;
-  }
-  else {
-    if (!light_sample<false>(kg, ls->lamp, randu, randv, P, 0, ls)) {
-      return false;
-    }
-    ls->pdf *= ls->pdf_selection;
-    return true;
-  }
-}
-
-ccl_device_forceinline void light_sample_update_position(KernelGlobals kg,
-                                                         ccl_private LightSample *ls,
-                                                         const float3 P)
-{
-  /* Update light sample for new shading point position, while keeping
-   * position on the light fixed. */
-
-  /* NOTE : preserve pdf in area measure. */
-  light_update_position(kg, ls, P);
-
-  /* Re-apply already computed selection pdf. */
-  ls->pdf *= ls->pdf_selection;
-}
-
-/* Forward sampling.
- *
- * Multiple importance sampling weights for hitting surface, light or background
- * through indirect light ray.
- *
- * The BSDF or phase pdf from the previous bounce was stored in mis_ray_pdf and
- * is used for balancing with the light sampling pdf. */
-
-ccl_device_inline float light_sample_mis_weight_forward_surface(KernelGlobals kg,
-                                                                IntegratorState state,
-                                                                const uint32_t path_flag,
-                                                                const ccl_private ShaderData *sd)
-{
-  const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
-  const float t = sd->ray_length;
-  float pdf = triangle_light_pdf(kg, sd, t);
-
-  /* Light selection pdf. */
-#ifdef __LIGHT_TREE__
-  if (kernel_data.integrator.use_light_tree) {
-    float3 ray_P = INTEGRATOR_STATE(state, ray, P);
-    const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n);
-    uint lookup_offset = kernel_data_fetch(object_lookup_offset, sd->object);
-    uint prim_offset = kernel_data_fetch(object_prim_offset, sd->object);
-    pdf *= light_tree_pdf(kg, ray_P, N, path_flag, sd->prim - prim_offset + lookup_offset);
-  }
-  else
-#endif
-  {
-    /* Handled in triangle_light_pdf for efficiency. */
-  }
-
-  return light_sample_mis_weight_forward(kg, bsdf_pdf, pdf);
-}
-
-ccl_device_inline float light_sample_mis_weight_forward_lamp(KernelGlobals kg,
-                                                             IntegratorState state,
-                                                             const uint32_t path_flag,
-                                                             const ccl_private LightSample *ls,
-                                                             const float3 P)
-{
-  const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
-  float pdf = ls->pdf;
-
-  /* Light selection pdf. */
-#ifdef __LIGHT_TREE__
-  if (kernel_data.integrator.use_light_tree) {
-    const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n);
-    pdf *= light_tree_pdf(kg, P, N, path_flag, ~ls->lamp);
-  }
-  else
-#endif
-  {
-    pdf *= light_distribution_pdf_lamp(kg);
-  }
-
-  return light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf);
-}
-
-ccl_device_inline float light_sample_mis_weight_forward_distant(KernelGlobals kg,
-                                                                IntegratorState state,
-                                                                const uint32_t path_flag,
-                                                                const ccl_private LightSample *ls)
-{
-  const float3 ray_P = INTEGRATOR_STATE(state, ray, P);
-  return light_sample_mis_weight_forward_lamp(kg, state, path_flag, ls, ray_P);
-}
-
-ccl_device_inline float light_sample_mis_weight_forward_background(KernelGlobals kg,
-                                                                   IntegratorState state,
-                                                                   const uint32_t path_flag)
-{
-  const float3 ray_P = INTEGRATOR_STATE(state, ray, P);
-  const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
-  const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
-
-  float pdf = background_light_pdf(kg, ray_P, ray_D);
-
-  /* Light selection pdf. */
-#ifdef __LIGHT_TREE__
-  if (kernel_data.integrator.use_light_tree) {
-    const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n);
-    pdf *= light_tree_pdf(kg, ray_P, N, path_flag, ~kernel_data.background.light_index);
-  }
-  else
-#endif
-  {
-    pdf *= light_distribution_pdf_lamp(kg);
-  }
-
-  return light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf);
-}
-
 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/light/spot.h
+++ b/intern/cycles/kernel/light/spot.h
@@ -1,179 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2022 Blender Foundation */
-
-#pragma once
-
-#include "kernel/light/common.h"
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device float spot_light_attenuation(float3 dir,
-                                        float cos_half_spot_angle,
-                                        float spot_smooth,
-                                        float3 N)
-{
-  float attenuation = dot(dir, N);
-
-  if (attenuation <= cos_half_spot_angle) {
-    attenuation = 0.0f;
-  }
-  else {
-    float t = attenuation - cos_half_spot_angle;
-
-    if (t < spot_smooth && spot_smooth != 0.0f)
-      attenuation *= smoothstepf(t / spot_smooth);
-  }
-
-  return attenuation;
-}
-
-template<bool in_volume_segment>
-ccl_device_inline bool spot_light_sample(const ccl_global KernelLight *klight,
-                                         const float randu,
-                                         const float randv,
-                                         const float3 P,
-                                         ccl_private LightSample *ls)
-{
-  ls->P = klight->co;
-
-  const float3 center = klight->co;
-  const float radius = klight->spot.radius;
-  /* disk oriented normal */
-  const float3 lightN = normalize(P - center);
-  ls->P = center;
-
-  if (radius > 0.0f) {
-    /* disk light */
-    ls->P += disk_light_sample(lightN, randu, randv) * radius;
-  }
-
-  const float invarea = klight->spot.invarea;
-  ls->pdf = invarea;
-
-  ls->D = normalize_len(ls->P - P, &ls->t);
-  /* we set the light normal to the outgoing direction to support texturing */
-  ls->Ng = -ls->D;
-
-  ls->eval_fac = (0.25f * M_1_PI_F) * invarea;
-
-  /* spot light attenuation */
-  ls->eval_fac *= spot_light_attenuation(
-      klight->spot.dir, klight->spot.cos_half_spot_angle, klight->spot.spot_smooth, -ls->D);
-  if (!in_volume_segment && ls->eval_fac == 0.0f) {
-    return false;
-  }
-
-  float2 uv = map_to_sphere(ls->Ng);
-  ls->u = uv.x;
-  ls->v = uv.y;
-
-  ls->pdf *= lamp_light_pdf(lightN, -ls->D, ls->t);
-  return true;
-}
-
-ccl_device_forceinline void spot_light_update_position(const ccl_global KernelLight *klight,
-                                                       ccl_private LightSample *ls,
-                                                       const float3 P)
-{
-  ls->D = normalize_len(ls->P - P, &ls->t);
-  ls->Ng = -ls->D;
-
-  float2 uv = map_to_sphere(ls->Ng);
-  ls->u = uv.x;
-  ls->v = uv.y;
-
-  float invarea = klight->spot.invarea;
-  ls->eval_fac = (0.25f * M_1_PI_F) * invarea;
-  ls->pdf = invarea;
-
-  /* spot light attenuation */
-  ls->eval_fac *= spot_light_attenuation(
-      klight->spot.dir, klight->spot.cos_half_spot_angle, klight->spot.spot_smooth, ls->Ng);
-}
-
-ccl_device_inline bool spot_light_intersect(const ccl_global KernelLight *klight,
-                                            const ccl_private Ray *ccl_restrict ray,
-                                            ccl_private float *t)
-{
-  /* Spot/Disk light. */
-  const float3 lightP = klight->co;
-  const float radius = klight->spot.radius;
-  if (radius == 0.0f) {
-    return false;
-  }
-  /* disk oriented normal */
-  const float3 lightN = normalize(ray->P - lightP);
-  /* One sided. */
-  if (dot(ray->D, lightN) >= 0.0f) {
-    return false;
-  }
-
-  float3 P;
-  return ray_disk_intersect(ray->P, ray->D, ray->tmin, ray->tmax, lightP, lightN, radius, &P, t);
-}
-
-ccl_device_inline bool spot_light_sample_from_intersection(
-    const ccl_global KernelLight *klight,
-    ccl_private const Intersection *ccl_restrict isect,
-    const float3 ray_P,
-    const float3 ray_D,
-    ccl_private LightSample *ccl_restrict ls)
-{
-  /* the normal of the oriented disk */
-  const float3 lightN = normalize(ray_P - klight->co);
-  /* We set the light normal to the outgoing direction to support texturing. */
-  ls->Ng = -ls->D;
-
-  float invarea = klight->spot.invarea;
-  ls->eval_fac = (0.25f * M_1_PI_F) * invarea;
-  ls->pdf = invarea;
-
-  /* spot light attenuation */
-  ls->eval_fac *= spot_light_attenuation(
-      klight->spot.dir, klight->spot.cos_half_spot_angle, klight->spot.spot_smooth, -ls->D);
-
-  if (ls->eval_fac == 0.0f) {
-    return false;
-  }
-
-  float2 uv = map_to_sphere(ls->Ng);
-  ls->u = uv.x;
-  ls->v = uv.y;
-
-  /* compute pdf */
-  if (ls->t != FLT_MAX) {
-    ls->pdf *= lamp_light_pdf(lightN, -ls->D, ls->t);
-  }
-  else {
-    ls->pdf = 0.f;
-  }
-
-  return true;
-}
-
-template<bool in_volume_segment>
-ccl_device_forceinline bool spot_light_tree_parameters(const ccl_global KernelLight *klight,
-                                                       const float3 centroid,
-                                                       const float3 P,
-                                                       ccl_private float &cos_theta_u,
-                                                       ccl_private float2 &distance,
-                                                       ccl_private float3 &point_to_centroid)
-{
-  float min_distance;
-  const float3 point_to_centroid_ = safe_normalize_len(centroid - P, &min_distance);
-
-  const float radius = klight->spot.radius;
-  const float hypotenus = sqrtf(sqr(radius) + sqr(min_distance));
-  cos_theta_u = min_distance / hypotenus;
-
-  if (in_volume_segment) {
-    return true;
-  }
-
-  distance = make_float2(hypotenus, min_distance);
-  point_to_centroid = point_to_centroid_;
-
-  return true;
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/kernel/light/tree.h
+++ b/intern/cycles/kernel/light/tree.h
@@ -1,691 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2022 Blender Foundation */
-
-#pragma once
-
-#include "kernel/light/area.h"
-#include "kernel/light/common.h"
-#include "kernel/light/light.h"
-#include "kernel/light/spot.h"
-#include "kernel/light/triangle.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* TODO: this seems like a relative expensive computation, and we can make it a lot cheaper
- * by using a bounding sphere instead of a bounding box. This will be more inaccurate, but it
- * might be fine when used along with the adaptive splitting. */
-ccl_device float light_tree_cos_bounding_box_angle(const BoundingBox bbox,
-                                                   const float3 P,
-                                                   const float3 point_to_centroid)
-{
-  if (P.x > bbox.min.x && P.y > bbox.min.y && P.z > bbox.min.z && P.x < bbox.max.x &&
-      P.y < bbox.max.y && P.z < bbox.max.z) {
-    /* If P is inside the bbox, `theta_u` covers the whole sphere */
-    return -1.0f;
-  }
-  float cos_theta_u = 1.0f;
-  /* Iterate through all 8 possible points of the bounding box. */
-  for (int i = 0; i < 8; ++i) {
-    const float3 corner = make_float3((i & 1) ? bbox.max.x : bbox.min.x,
-                                      (i & 2) ? bbox.max.y : bbox.min.y,
-                                      (i & 4) ? bbox.max.z : bbox.min.z);
-
-    /* Caculate the bounding box angle. */
-    float3 point_to_corner = normalize(corner - P);
-    cos_theta_u = fminf(cos_theta_u, dot(point_to_centroid, point_to_corner));
-  }
-  return cos_theta_u;
-}
-
-ccl_device_forceinline float sin_from_cos(const float c)
-{
-  return safe_sqrtf(1.0f - sqr(c));
-}
-
-/* Compute vector v as in Fig .8. P_v is the corresponding point along the ray ccl_device float3 */
-ccl_device float3 compute_v(
-    const float3 centroid, const float3 P, const float3 D, const float3 bcone_axis, const float t)
-{
-  const float3 unnormalized_v0 = P - centroid;
-  float len_v0;
-  const float3 unnormalized_v1 = unnormalized_v0 + D * fminf(t, 1e12f);
-  const float3 v0 = normalize_len(unnormalized_v0, &len_v0);
-  const float3 v1 = normalize(unnormalized_v1);
-
-  const float3 o0 = v0;
-  float3 o1, o2;
-  make_orthonormals_tangent(o0, v1, &o1, &o2);
-
-  const float dot_o0_a = dot(o0, bcone_axis);
-  const float dot_o1_a = dot(o1, bcone_axis);
-  const float cos_phi0 = dot_o0_a / sqrtf(sqr(dot_o0_a) + sqr(dot_o1_a));
-
-  return (dot_o1_a < 0 || dot(v0, v1) > cos_phi0) ? (dot_o0_a > dot(v1, bcone_axis) ? v0 : v1) :
-                                                    cos_phi0 * o0 + sin_from_cos(cos_phi0) * o1;
-}
-
-/* This is the general function for calculating the importance of either a cluster or an emitter.
- * Both of the specialized functions obtain the necessary data before calling this function. */
-template<bool in_volume_segment>
-ccl_device void light_tree_importance(const float3 N_or_D,
-                                      const bool has_transmission,
-                                      const float3 point_to_centroid,
-                                      const float cos_theta_u,
-                                      const BoundingCone bcone,
-                                      const float max_distance,
-                                      const float min_distance,
-                                      const float t,
-                                      const float energy,
-                                      ccl_private float &max_importance,
-                                      ccl_private float &min_importance)
-{
-  max_importance = 0.0f;
-  min_importance = 0.0f;
-
-  const float sin_theta_u = sin_from_cos(cos_theta_u);
-
-  /* cos(theta_i') in the paper, omitted for volume */
-  float cos_min_incidence_angle = 1.0f;
-  float cos_max_incidence_angle = 1.0f;
-
-  /* when sampling the light tree for the second time in `shade_volume.h` and when query the pdf in
-   * `sample.h` */
-  const bool in_volume = is_zero(N_or_D);
-  if (!in_volume_segment && !in_volume) {
-    const float3 N = N_or_D;
-    const float cos_theta_i = has_transmission ? fabsf(dot(point_to_centroid, N)) :
-                                                 dot(point_to_centroid, N);
-    const float sin_theta_i = sin_from_cos(cos_theta_i);
-
-    /* cos_min_incidence_angle = cos(max{theta_i - theta_u, 0}) = cos(theta_i') in the paper */
-    cos_min_incidence_angle = cos_theta_i >= cos_theta_u ?
-                                  1.0f :
-                                  cos_theta_i * cos_theta_u + sin_theta_i * sin_theta_u;
-
-    /* If the node is guaranteed to be behind the surface we're sampling, and the surface is
-     * opaque, then we can give the node an importance of 0 as it contributes nothing to the
-     * surface. This is more accurate than the bbox test if we are calculating the importance of
-     * an emitter with radius */
-    if (!has_transmission && cos_min_incidence_angle < 0) {
-      return;
-    }
-
-    /* cos_max_incidence_angle = cos(min{theta_i + theta_u, pi}) */
-    cos_max_incidence_angle = fmaxf(cos_theta_i * cos_theta_u - sin_theta_i * sin_theta_u, 0.0f);
-  }
-
-  /* cos(theta - theta_u) */
-  const float cos_theta = dot(bcone.axis, -point_to_centroid);
-  const float sin_theta = sin_from_cos(cos_theta);
-  const float cos_theta_minus_theta_u = cos_theta * cos_theta_u + sin_theta * sin_theta_u;
-
-  float cos_theta_o, sin_theta_o;
-  fast_sincosf(bcone.theta_o, &sin_theta_o, &cos_theta_o);
-
-  /* minimum angle an emitter’s axis would form with the direction to the shading point,
-   * cos(theta') in the paper */
-  float cos_min_outgoing_angle;
-  if ((cos_theta >= cos_theta_u) || (cos_theta_minus_theta_u >= cos_theta_o)) {
-    /* theta - theta_o - theta_u <= 0 */
-    kernel_assert((fast_acosf(cos_theta) - bcone.theta_o - fast_acosf(cos_theta_u)) < 5e-4f);
-    cos_min_outgoing_angle = 1.0f;
-  }
-  else if ((bcone.theta_o + bcone.theta_e > M_PI_F) ||
-           (cos_theta_minus_theta_u > cos(bcone.theta_o + bcone.theta_e))) {
-    /* theta' = theta - theta_o - theta_u < theta_e */
-    kernel_assert(
-        (fast_acosf(cos_theta) - bcone.theta_o - fast_acosf(cos_theta_u) - bcone.theta_e) < 5e-4f);
-    const float sin_theta_minus_theta_u = sin_from_cos(cos_theta_minus_theta_u);
-    cos_min_outgoing_angle = cos_theta_minus_theta_u * cos_theta_o +
-                             sin_theta_minus_theta_u * sin_theta_o;
-  }
-  else {
-    /* cluster invisible */
-    return;
-  }
-
-  /* TODO: find a good approximation for f_a. */
-  const float f_a = 1.0f;
-  /* TODO: also consider t (or theta_a, theta_b) for volume */
-  max_importance = fabsf(f_a * cos_min_incidence_angle * energy * cos_min_outgoing_angle /
-                         (in_volume_segment ? min_distance : sqr(min_distance)));
-
-  /* TODO: also min importance for volume? */
-  if (in_volume_segment) {
-    min_importance = max_importance;
-    return;
-  }
-
-  /* cos(theta + theta_o + theta_u) if theta + theta_o + theta_u < theta_e, 0 otherwise */
-  float cos_max_outgoing_angle;
-  const float cos_theta_plus_theta_u = cos_theta * cos_theta_u - sin_theta * sin_theta_u;
-  if (bcone.theta_e - bcone.theta_o < 0 || cos_theta < 0 || cos_theta_u < 0 ||
-      cos_theta_plus_theta_u < cos(bcone.theta_e - bcone.theta_o)) {
-    min_importance = 0.0f;
-  }
-  else {
-    const float sin_theta_plus_theta_u = sin_from_cos(cos_theta_plus_theta_u);
-    cos_max_outgoing_angle = cos_theta_plus_theta_u * cos_theta_o -
-                             sin_theta_plus_theta_u * sin_theta_o;
-    min_importance = fabsf(f_a * cos_max_incidence_angle * energy * cos_max_outgoing_angle /
-                           sqr(max_distance));
-  }
-}
-
-template<bool in_volume_segment>
-ccl_device bool compute_emitter_centroid_and_dir(KernelGlobals kg,
-                                                 ccl_global const KernelLightTreeEmitter *kemitter,
-                                                 const float3 P,
-                                                 ccl_private float3 &centroid,
-                                                 ccl_private packed_float3 &dir)
-{
-  const int prim_id = kemitter->prim_id;
-  if (prim_id < 0) {
-    const ccl_global KernelLight *klight = &kernel_data_fetch(lights, ~prim_id);
-    centroid = klight->co;
-
-    switch (klight->type) {
-      case LIGHT_SPOT:
-        dir = klight->spot.dir;
-        break;
-      case LIGHT_POINT:
-        /* Disk-oriented normal */
-        dir = safe_normalize(P - centroid);
-        break;
-      case LIGHT_AREA:
-        dir = klight->area.dir;
-        break;
-      case LIGHT_BACKGROUND:
-        /* Aarbitrary centroid and direction */
-        centroid = make_float3(0.0f, 0.0f, 1.0f);
-        dir = make_float3(0.0f, 0.0f, -1.0f);
-        return !in_volume_segment;
-      case LIGHT_DISTANT:
-        dir = centroid;
-        return !in_volume_segment;
-      default:
-        return false;
-    }
-  }
-  else {
-    const int object = kemitter->mesh_light.object_id;
-    float3 vertices[3];
-    triangle_world_space_vertices(kg, object, prim_id, -1.0f, vertices);
-    centroid = (vertices[0] + vertices[1] + vertices[2]) / 3.0f;
-
-    if (kemitter->mesh_light.emission_sampling == EMISSION_SAMPLING_FRONT) {
-      dir = safe_normalize(cross(vertices[1] - vertices[0], vertices[2] - vertices[0]));
-    }
-    else if (kemitter->mesh_light.emission_sampling == EMISSION_SAMPLING_BACK) {
-      dir = -safe_normalize(cross(vertices[1] - vertices[0], vertices[2] - vertices[0]));
-    }
-    else {
-      /* Double sided: any vector in the plane. */
-      dir = safe_normalize(vertices[0] - vertices[1]);
-    }
-  }
-  return true;
-}
-
-template<bool in_volume_segment>
-ccl_device void light_tree_emitter_importance(KernelGlobals kg,
-                                              const float3 P,
-                                              const float3 N_or_D,
-                                              const float t,
-                                              const bool has_transmission,
-                                              int emitter_index,
-                                              ccl_private float &max_importance,
-                                              ccl_private float &min_importance)
-{
-  const ccl_global KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters,
-                                                                         emitter_index);
-
-  max_importance = 0.0f;
-  min_importance = 0.0f;
-  BoundingCone bcone;
-  bcone.theta_o = kemitter->theta_o;
-  bcone.theta_e = kemitter->theta_e;
-  float cos_theta_u;
-  float2 distance; /* distance.x = max_distance, distance.y = mix_distance */
-  float3 centroid, point_to_centroid, P_c;
-
-  if (!compute_emitter_centroid_and_dir<in_volume_segment>(
-          kg, kemitter, P, centroid, bcone.axis)) {
-    return;
-  }
-
-  const int prim_id = kemitter->prim_id;
-
-  if (in_volume_segment) {
-    const float3 D = N_or_D;
-    /* Closest point */
-    P_c = P + dot(centroid - P, D) * D;
-    /* minimal distance of the ray to the cluster */
-    distance.x = len(centroid - P_c);
-    distance.y = distance.x;
-    point_to_centroid = -compute_v(centroid, P, D, bcone.axis, t);
-  }
-  else {
-    P_c = P;
-  }
-
-  bool is_visible;
-  if (prim_id < 0) {
-    const ccl_global KernelLight *klight = &kernel_data_fetch(lights, ~prim_id);
-    switch (klight->type) {
-      /* Function templates only modifies cos_theta_u when in_volume_segment = true */
-      case LIGHT_SPOT:
-        is_visible = spot_light_tree_parameters<in_volume_segment>(
-            klight, centroid, P_c, cos_theta_u, distance, point_to_centroid);
-        break;
-      case LIGHT_POINT:
-        is_visible = point_light_tree_parameters<in_volume_segment>(
-            klight, centroid, P_c, cos_theta_u, distance, point_to_centroid);
-        bcone.theta_o = 0.0f;
-        break;
-      case LIGHT_AREA:
-        is_visible = area_light_tree_parameters<in_volume_segment>(
-            klight, centroid, P_c, N_or_D, bcone.axis, cos_theta_u, distance, point_to_centroid);
-        break;
-      case LIGHT_BACKGROUND:
-        is_visible = background_light_tree_parameters(
-            centroid, cos_theta_u, distance, point_to_centroid);
-        break;
-      case LIGHT_DISTANT:
-        is_visible = distant_light_tree_parameters(
-            centroid, bcone.theta_e, cos_theta_u, distance, point_to_centroid);
-        break;
-      default:
-        return;
-    }
-  }
-  else { /* mesh light */
-    is_visible = triangle_light_tree_parameters<in_volume_segment>(
-        kg, kemitter, centroid, P_c, N_or_D, bcone, cos_theta_u, distance, point_to_centroid);
-  }
-
-  is_visible |= has_transmission;
-  if (!is_visible) {
-    return;
-  }
-
-  light_tree_importance<in_volume_segment>(N_or_D,
-                                           has_transmission,
-                                           point_to_centroid,
-                                           cos_theta_u,
-                                           bcone,
-                                           distance.x,
-                                           distance.y,
-                                           t,
-                                           kemitter->energy,
-                                           max_importance,
-                                           min_importance);
-}
-
-template<bool in_volume_segment>
-ccl_device void light_tree_node_importance(KernelGlobals kg,
-                                           const float3 P,
-                                           const float3 N_or_D,
-                                           const float t,
-                                           const bool has_transmission,
-                                           const ccl_global KernelLightTreeNode *knode,
-                                           ccl_private float &max_importance,
-                                           ccl_private float &min_importance)
-{
-  max_importance = 0.0f;
-  min_importance = 0.0f;
-  if (knode->num_prims == 1) {
-    /* At a leaf node with only one emitter */
-    light_tree_emitter_importance<in_volume_segment>(
-        kg, P, N_or_D, t, has_transmission, -knode->child_index, max_importance, min_importance);
-  }
-  else if (knode->num_prims != 0) {
-    const BoundingCone bcone = knode->bcone;
-    const BoundingBox bbox = knode->bbox;
-
-    float3 point_to_centroid;
-    float cos_theta_u;
-    float distance;
-    if (knode->bit_trail == 1) {
-      /* distant light node */
-      if (in_volume_segment) {
-        return;
-      }
-      point_to_centroid = -bcone.axis;
-      cos_theta_u = fast_cosf(bcone.theta_o);
-      distance = 1.0f;
-    }
-    else {
-      const float3 centroid = 0.5f * (bbox.min + bbox.max);
-
-      if (in_volume_segment) {
-        const float3 D = N_or_D;
-        const float3 closest_point = P + dot(centroid - P, D) * D;
-        /* minimal distance of the ray to the cluster */
-        distance = len(centroid - closest_point);
-        point_to_centroid = -compute_v(centroid, P, D, bcone.axis, t);
-        cos_theta_u = light_tree_cos_bounding_box_angle(bbox, closest_point, point_to_centroid);
-      }
-      else {
-        const float3 N = N_or_D;
-        const float3 bbox_extent = bbox.max - centroid;
-        const bool bbox_is_visible = has_transmission |
-                                     (dot(N, centroid - P) + dot(fabs(N), fabs(bbox_extent)) > 0);
-
-        /* If the node is guaranteed to be behind the surface we're sampling, and the surface is
-         * opaque, then we can give the node an importance of 0 as it contributes nothing to the
-         * surface. */
-        if (!bbox_is_visible) {
-          return;
-        }
-
-        point_to_centroid = normalize_len(centroid - P, &distance);
-        cos_theta_u = light_tree_cos_bounding_box_angle(bbox, P, point_to_centroid);
-      }
-      /* clamp distance to half the radius of the cluster when splitting is disabled */
-      distance = fmaxf(0.5f * len(centroid - bbox.max), distance);
-    }
-    /* TODO: currently max_distance = min_distance, max_importance = min_importance for the
-     * nodes. Do we need better weights for complex scenes? */
-    light_tree_importance<in_volume_segment>(N_or_D,
-                                             has_transmission,
-                                             point_to_centroid,
-                                             cos_theta_u,
-                                             bcone,
-                                             distance,
-                                             distance,
-                                             t,
-                                             knode->energy,
-                                             max_importance,
-                                             min_importance);
-  }
-}
-
-ccl_device void sample_resevoir(const int current_index,
-                                const float current_weight,
-                                ccl_private int &selected_index,
-                                ccl_private float &selected_weight,
-                                ccl_private float &total_weight,
-                                ccl_private float &rand)
-{
-  if (current_weight == 0.0f) {
-    return;
-  }
-  total_weight += current_weight;
-  float thresh = current_weight / total_weight;
-  if (rand <= thresh) {
-    selected_index = current_index;
-    selected_weight = current_weight;
-    rand = rand / thresh;
-  }
-  else {
-    rand = (rand - thresh) / (1.0f - thresh);
-  }
-  kernel_assert(rand >= 0.0f && rand <= 1.0f);
-  return;
-}
-
-/* pick an emitter from a leaf node using resevoir sampling, keep two reservoirs for upper and
- * lower bounds */
-template<bool in_volume_segment>
-ccl_device int light_tree_cluster_select_emitter(KernelGlobals kg,
-                                                 ccl_private float &rand,
-                                                 const float3 P,
-                                                 const float3 N_or_D,
-                                                 const float t,
-                                                 const bool has_transmission,
-                                                 const ccl_global KernelLightTreeNode *knode,
-                                                 ccl_private float *pdf_factor)
-{
-  float selected_importance[2] = {0.0f, 0.0f};
-  float total_importance[2] = {0.0f, 0.0f};
-  int selected_index = -1;
-
-  /* Mark emitters with zero importance. Used for resevoir when total minimum importance = 0 */
-  kernel_assert(knode->num_prims <= sizeof(uint) * 8);
-  uint has_importance = 0;
-
-  const bool sample_max = (rand > 0.5f); /* sampling using the maximum importance */
-  rand = rand * 2.0f - float(sample_max);
-
-  for (int i = 0; i < knode->num_prims; i++) {
-    int current_index = -knode->child_index + i;
-    /* maximum importance = importance[0], mininum importance = importance[1] */
-    float importance[2];
-    light_tree_emitter_importance<in_volume_segment>(
-        kg, P, N_or_D, t, has_transmission, current_index, importance[0], importance[1]);
-
-    sample_resevoir(current_index,
-                    importance[!sample_max],
-                    selected_index,
-                    selected_importance[!sample_max],
-                    total_importance[!sample_max],
-                    rand);
-    if (selected_index == current_index) {
-      selected_importance[sample_max] = importance[sample_max];
-    }
-    total_importance[sample_max] += importance[sample_max];
-
-    has_importance |= ((importance[0] > 0) << i);
-  }
-
-  if (total_importance[0] == 0.0f) {
-    return -1;
-  }
-
-  if (total_importance[1] == 0.0f) {
-    /* uniformly sample emitters with positive maximum importance */
-    if (sample_max) {
-      selected_importance[1] = 1.0f;
-      total_importance[1] = float(popcount(has_importance));
-    }
-    else {
-      selected_index = -1;
-      for (int i = 0; i < knode->num_prims; i++) {
-        int current_index = -knode->child_index + i;
-        sample_resevoir(current_index,
-                        float(has_importance & 1),
-                        selected_index,
-                        selected_importance[1],
-                        total_importance[1],
-                        rand);
-        has_importance >>= 1;
-      }
-
-      float discard;
-      light_tree_emitter_importance<in_volume_segment>(
-          kg, P, N_or_D, t, has_transmission, selected_index, selected_importance[0], discard);
-    }
-  }
-
-  *pdf_factor = 0.5f * (selected_importance[0] / total_importance[0] +
-                        selected_importance[1] / total_importance[1]);
-
-  return selected_index;
-}
-
-template<bool in_volume_segment>
-ccl_device bool get_left_probability(KernelGlobals kg,
-                                     const float3 P,
-                                     const float3 N_or_D,
-                                     const float t,
-                                     const bool has_transmission,
-                                     const int left_index,
-                                     const int right_index,
-                                     ccl_private float &left_probability)
-{
-  const ccl_global KernelLightTreeNode *left = &kernel_data_fetch(light_tree_nodes, left_index);
-  const ccl_global KernelLightTreeNode *right = &kernel_data_fetch(light_tree_nodes, right_index);
-
-  float min_left_importance, max_left_importance, min_right_importance, max_right_importance;
-  light_tree_node_importance<in_volume_segment>(
-      kg, P, N_or_D, t, has_transmission, left, max_left_importance, min_left_importance);
-  light_tree_node_importance<in_volume_segment>(
-      kg, P, N_or_D, t, has_transmission, right, max_right_importance, min_right_importance);
-
-  const float total_max_importance = max_left_importance + max_right_importance;
-  if (total_max_importance == 0.0f) {
-    return false;
-  }
-  const float total_min_importance = min_left_importance + min_right_importance;
-
-  /* average two probabilities of picking the left child node using lower and upper bounds */
-  const float probability_max = max_left_importance / total_max_importance;
-  const float probability_min = total_min_importance > 0 ?
-                                    min_left_importance / total_min_importance :
-                                    0.5f * (float(max_left_importance > 0) +
-                                            float(max_right_importance == 0.0f));
-  left_probability = 0.5f * (probability_max + probability_min);
-  return true;
-}
-
-template<bool in_volume_segment>
-ccl_device_noinline bool light_tree_sample(KernelGlobals kg,
-                                           ccl_private float &randu,
-                                           ccl_private float &randv,
-                                           const float time,
-                                           const float3 P,
-                                           const float3 N_or_D,
-                                           const float t,
-                                           const int shader_flags,
-                                           const int bounce,
-                                           const uint32_t path_flag,
-                                           ccl_private int &emitter_object,
-                                           ccl_private int &emitter_prim,
-                                           ccl_private int &emitter_shader_flag,
-                                           ccl_private float &emitter_pdf_selection)
-{
-  if (!kernel_data.integrator.use_direct_light) {
-    return false;
-  }
-
-  const bool has_transmission = (shader_flags & SD_BSDF_HAS_TRANSMISSION);
-  float pdf_leaf = 1.0f;
-  float pdf_emitter_from_leaf = 1.0f;
-  int selected_light = -1;
-
-  int node_index = 0; /* root node */
-
-  /* Traverse the light tree until a leaf node is reached. */
-  while (true) {
-    const ccl_global KernelLightTreeNode *knode = &kernel_data_fetch(light_tree_nodes, node_index);
-
-    if (knode->child_index <= 0) {
-      /* At a leaf node, we pick an emitter */
-      selected_light = light_tree_cluster_select_emitter<in_volume_segment>(
-          kg, randv, P, N_or_D, t, has_transmission, knode, &pdf_emitter_from_leaf);
-      break;
-    }
-
-    /* At an interior node, the left child is directly after the parent,
-     * while the right child is stored as the child index. */
-    const int left_index = node_index + 1;
-    const int right_index = knode->child_index;
-
-    float left_prob;
-    if (!get_left_probability<in_volume_segment>(
-            kg, P, N_or_D, t, has_transmission, left_index, right_index, left_prob)) {
-      return false; /* both child nodes have zero importance */
-    }
-
-    float discard;
-    float total_prob = left_prob;
-    node_index = left_index;
-    sample_resevoir(right_index, 1.0f - left_prob, node_index, discard, total_prob, randu);
-    pdf_leaf *= (node_index == left_index) ? left_prob : (1.0f - left_prob);
-  }
-
-  if (selected_light < 0) {
-    return false;
-  }
-
-  /* Return info about chosen emitter. */
-  ccl_global const KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters,
-                                                                         selected_light);
-
-  emitter_object = kemitter->mesh_light.object_id;
-  emitter_prim = kemitter->prim_id;
-  emitter_shader_flag = kemitter->mesh_light.shader_flag;
-  emitter_pdf_selection = pdf_leaf * pdf_emitter_from_leaf;
-
-  return true;
-}
-
-/* We need to be able to find the probability of selecting a given light for MIS. */
-ccl_device float light_tree_pdf(
-    KernelGlobals kg, const float3 P, const float3 N, const int path_flag, const int prim)
-{
-  const bool has_transmission = (path_flag & PATH_RAY_MIS_HAD_TRANSMISSION);
-  /* Target emitter info */
-  const int target_emitter = (prim >= 0) ? kernel_data_fetch(triangle_to_tree, prim) :
-                                           kernel_data_fetch(light_to_tree, ~prim);
-  ccl_global const KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters,
-                                                                         target_emitter);
-  const int target_leaf = kemitter->parent_index;
-  ccl_global const KernelLightTreeNode *kleaf = &kernel_data_fetch(light_tree_nodes, target_leaf);
-  uint bit_trail = kleaf->bit_trail;
-
-  int node_index = 0; /* root node */
-
-  float pdf = 1.0f;
-
-  /* Traverse the light tree until we reach the target leaf node */
-  while (true) {
-    const ccl_global KernelLightTreeNode *knode = &kernel_data_fetch(light_tree_nodes, node_index);
-
-    if (knode->child_index <= 0) {
-      break;
-    }
-
-    /* Interior node */
-    const int left_index = node_index + 1;
-    const int right_index = knode->child_index;
-
-    float left_prob;
-    if (!get_left_probability<false>(
-            kg, P, N, 0, has_transmission, left_index, right_index, left_prob)) {
-      return 0.0f;
-    }
-
-    const bool go_left = (bit_trail & 1) == 0;
-    bit_trail >>= 1;
-    pdf *= go_left ? left_prob : (1.0f - left_prob);
-    node_index = go_left ? left_index : right_index;
-
-    if (pdf == 0) {
-      return 0.0f;
-    }
-  }
-
-  kernel_assert(node_index == target_leaf);
-
-  /* Iterate through leaf node to find the probability of sampling the target emitter. */
-  float target_max_importance = 0.0f;
-  float target_min_importance = 0.0f;
-  float total_max_importance = 0.0f;
-  float total_min_importance = 0.0f;
-  int num_has_importance = 0;
-  for (int i = 0; i < kleaf->num_prims; i++) {
-    const int emitter = -kleaf->child_index + i;
-    float max_importance, min_importance;
-    light_tree_emitter_importance<false>(
-        kg, P, N, 0, has_transmission, emitter, max_importance, min_importance);
-    num_has_importance += (max_importance > 0);
-    if (emitter == target_emitter) {
-      target_max_importance = max_importance;
-      target_min_importance = min_importance;
-    }
-    total_max_importance += max_importance;
-    total_min_importance += min_importance;
-  }
-
-  if (target_max_importance > 0.0f) {
-    return pdf * 0.5f *
-           (target_max_importance / total_max_importance +
-            (total_min_importance > 0 ? target_min_importance / total_min_importance :
-                                        1.0f / num_has_importance));
-  }
-  return 0.0f;
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/kernel/light/triangle.h
+++ b/intern/cycles/kernel/light/triangle.h
@@ -1,329 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2022 Blender Foundation */
-
-#pragma once
-
-#include "kernel/geom/geom.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* returns true if the triangle is has motion blur or an instancing transform applied */
-ccl_device_inline bool triangle_world_space_vertices(
-    KernelGlobals kg, int object, int prim, float time, float3 V[3])
-{
-  bool has_motion = false;
-  const int object_flag = kernel_data_fetch(object_flag, object);
-
-  if (object_flag & SD_OBJECT_HAS_VERTEX_MOTION && time >= 0.0f) {
-    motion_triangle_vertices(kg, object, prim, time, V);
-    has_motion = true;
-  }
-  else {
-    triangle_vertices(kg, prim, V);
-  }
-
-  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-#ifdef __OBJECT_MOTION__
-    float object_time = (time >= 0.0f) ? time : 0.5f;
-    Transform tfm = object_fetch_transform_motion_test(kg, object, object_time, NULL);
-#else
-    Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
-#endif
-    V[0] = transform_point(&tfm, V[0]);
-    V[1] = transform_point(&tfm, V[1]);
-    V[2] = transform_point(&tfm, V[2]);
-    has_motion = true;
-  }
-  return has_motion;
-}
-
-ccl_device_inline float triangle_light_pdf_area_sampling(const float3 Ng, const float3 I, float t)
-{
-  float cos_pi = fabsf(dot(Ng, I));
-
-  if (cos_pi == 0.0f)
-    return 0.0f;
-
-  return t * t / cos_pi;
-}
-
-ccl_device_forceinline float triangle_light_pdf(KernelGlobals kg,
-                                                ccl_private const ShaderData *sd,
-                                                float t)
-{
-  /* A naive heuristic to decide between costly solid angle sampling
-   * and simple area sampling, comparing the distance to the triangle plane
-   * to the length of the edges of the triangle. */
-
-  float3 V[3];
-  bool has_motion = triangle_world_space_vertices(kg, sd->object, sd->prim, sd->time, V);
-
-  const float3 e0 = V[1] - V[0];
-  const float3 e1 = V[2] - V[0];
-  const float3 e2 = V[2] - V[1];
-  const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
-  const float3 N = cross(e0, e1);
-  const float distance_to_plane = fabsf(dot(N, sd->I * t)) / dot(N, N);
-  const float area = 0.5f * len(N);
-
-  float pdf;
-
-  if (longest_edge_squared > distance_to_plane * distance_to_plane) {
-    /* sd contains the point on the light source
-     * calculate Px, the point that we're shading */
-    const float3 Px = sd->P + sd->I * t;
-    const float3 v0_p = V[0] - Px;
-    const float3 v1_p = V[1] - Px;
-    const float3 v2_p = V[2] - Px;
-
-    const float3 u01 = safe_normalize(cross(v0_p, v1_p));
-    const float3 u02 = safe_normalize(cross(v0_p, v2_p));
-    const float3 u12 = safe_normalize(cross(v1_p, v2_p));
-
-    const float alpha = fast_acosf(dot(u02, u01));
-    const float beta = fast_acosf(-dot(u01, u12));
-    const float gamma = fast_acosf(dot(u02, u12));
-    const float solid_angle = alpha + beta + gamma - M_PI_F;
-
-    /* distribution_pdf_triangles is calculated over triangle area, but we're not sampling over
-     * its area */
-    if (UNLIKELY(solid_angle == 0.0f)) {
-      return 0.0f;
-    }
-    else {
-      pdf = 1.0f / solid_angle;
-    }
-  }
-  else {
-    if (UNLIKELY(area == 0.0f)) {
-      return 0.0f;
-    }
-
-    pdf = triangle_light_pdf_area_sampling(sd->Ng, sd->I, t) / area;
-  }
-
-  /* Belongs in distribution.h but can reuse computations here. */
-  if (!kernel_data.integrator.use_light_tree) {
-    float distribution_area = area;
-
-    if (has_motion && area != 0.0f) {
-      /* For motion blur need area of triangle at fixed time as used in the CDF. */
-      triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V);
-      distribution_area = triangle_area(V[0], V[1], V[2]);
-    }
-
-    pdf *= distribution_area * kernel_data.integrator.distribution_pdf_triangles;
-  }
-
-  return pdf;
-}
-
-template<bool in_volume_segment>
-ccl_device_forceinline bool triangle_light_sample(KernelGlobals kg,
-                                                  int prim,
-                                                  int object,
-                                                  float randu,
-                                                  float randv,
-                                                  float time,
-                                                  ccl_private LightSample *ls,
-                                                  const float3 P)
-{
-  /* A naive heuristic to decide between costly solid angle sampling
-   * and simple area sampling, comparing the distance to the triangle plane
-   * to the length of the edges of the triangle. */
-
-  float3 V[3];
-  bool has_motion = triangle_world_space_vertices(kg, object, prim, time, V);
-
-  const float3 e0 = V[1] - V[0];
-  const float3 e1 = V[2] - V[0];
-  const float3 e2 = V[2] - V[1];
-  const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
-  const float3 N0 = cross(e0, e1);
-  float Nl = 0.0f;
-  ls->Ng = safe_normalize_len(N0, &Nl);
-  const float area = 0.5f * Nl;
-
-  /* flip normal if necessary */
-  const int object_flag = kernel_data_fetch(object_flag, object);
-  if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
-    ls->Ng = -ls->Ng;
-  }
-  ls->eval_fac = 1.0f;
-  ls->shader = kernel_data_fetch(tri_shader, prim);
-  ls->object = object;
-  ls->prim = prim;
-  ls->lamp = LAMP_NONE;
-  ls->shader |= SHADER_USE_MIS;
-  ls->type = LIGHT_TRIANGLE;
-  ls->group = object_lightgroup(kg, object);
-
-  float distance_to_plane = fabsf(dot(N0, V[0] - P) / dot(N0, N0));
-
-  if (!in_volume_segment && (longest_edge_squared > distance_to_plane * distance_to_plane)) {
-    /* see James Arvo, "Stratified Sampling of Spherical Triangles"
-     * http://www.graphics.cornell.edu/pubs/1995/Arv95c.pdf */
-
-    /* project the triangle to the unit sphere
-     * and calculate its edges and angles */
-    const float3 v0_p = V[0] - P;
-    const float3 v1_p = V[1] - P;
-    const float3 v2_p = V[2] - P;
-
-    const float3 u01 = safe_normalize(cross(v0_p, v1_p));
-    const float3 u02 = safe_normalize(cross(v0_p, v2_p));
-    const float3 u12 = safe_normalize(cross(v1_p, v2_p));
-
-    const float3 A = safe_normalize(v0_p);
-    const float3 B = safe_normalize(v1_p);
-    const float3 C = safe_normalize(v2_p);
-
-    const float cos_alpha = dot(u02, u01);
-    const float cos_beta = -dot(u01, u12);
-    const float cos_gamma = dot(u02, u12);
-
-    /* calculate dihedral angles */
-    const float alpha = fast_acosf(cos_alpha);
-    const float beta = fast_acosf(cos_beta);
-    const float gamma = fast_acosf(cos_gamma);
-    /* the area of the unit spherical triangle = solid angle */
-    const float solid_angle = alpha + beta + gamma - M_PI_F;
-
-    /* precompute a few things
-     * these could be re-used to take several samples
-     * as they are independent of randu/randv */
-    const float cos_c = dot(A, B);
-    const float sin_alpha = fast_sinf(alpha);
-    const float product = sin_alpha * cos_c;
-
-    /* Select a random sub-area of the spherical triangle
-     * and calculate the third vertex C_ of that new triangle */
-    const float phi = randu * solid_angle - alpha;
-    float s, t;
-    fast_sincosf(phi, &s, &t);
-    const float u = t - cos_alpha;
-    const float v = s + product;
-
-    const float3 U = safe_normalize(C - dot(C, A) * A);
-
-    float q = 1.0f;
-    const float det = ((v * s + u * t) * sin_alpha);
-    if (det != 0.0f) {
-      q = ((v * t - u * s) * cos_alpha - v) / det;
-    }
-    const float temp = max(1.0f - q * q, 0.0f);
-
-    const float3 C_ = safe_normalize(q * A + sqrtf(temp) * U);
-
-    /* Finally, select a random point along the edge of the new triangle
-     * That point on the spherical triangle is the sampled ray direction */
-    const float z = 1.0f - randv * (1.0f - dot(C_, B));
-    ls->D = z * B + safe_sqrtf(1.0f - z * z) * safe_normalize(C_ - dot(C_, B) * B);
-
-    /* calculate intersection with the planar triangle */
-    if (!ray_triangle_intersect(
-            P, ls->D, 0.0f, FLT_MAX, V[0], V[1], V[2], &ls->u, &ls->v, &ls->t)) {
-      ls->pdf = 0.0f;
-      return false;
-    }
-
-    ls->P = P + ls->D * ls->t;
-
-    /* distribution_pdf_triangles is calculated over triangle area, but we're sampling over solid
-     * angle */
-    if (UNLIKELY(solid_angle == 0.0f)) {
-      ls->pdf = 0.0f;
-      return false;
-    }
-    else {
-      ls->pdf = 1.0f / solid_angle;
-    }
-  }
-  else {
-    if (UNLIKELY(area == 0.0f)) {
-      return 0.0f;
-    }
-
-    /* compute random point in triangle. From Eric Heitz's "A Low-Distortion Map Between Triangle
-     * and Square" */
-    float u = randu;
-    float v = randv;
-    if (v > u) {
-      u *= 0.5f;
-      v -= u;
-    }
-    else {
-      v *= 0.5f;
-      u -= v;
-    }
-
-    const float t = 1.0f - u - v;
-    ls->P = u * V[0] + v * V[1] + t * V[2];
-    /* compute incoming direction, distance and pdf */
-    ls->D = normalize_len(ls->P - P, &ls->t);
-    ls->pdf = triangle_light_pdf_area_sampling(ls->Ng, -ls->D, ls->t) / area;
-    ls->u = u;
-    ls->v = v;
-  }
-
-  /* Belongs in distribution.h but can reuse computations here. */
-  if (!kernel_data.integrator.use_light_tree) {
-    float distribution_area = area;
-
-    if (has_motion && area != 0.0f) {
-      /* For motion blur need area of triangle at fixed time as used in the CDF. */
-      triangle_world_space_vertices(kg, object, prim, -1.0f, V);
-      distribution_area = triangle_area(V[0], V[1], V[2]);
-    }
-
-    ls->pdf_selection = distribution_area * kernel_data.integrator.distribution_pdf_triangles;
-  }
-
-  return (ls->pdf > 0.0f);
-}
-
-template<bool in_volume_segment>
-ccl_device_forceinline bool triangle_light_tree_parameters(
-    KernelGlobals kg,
-    const ccl_global KernelLightTreeEmitter *kemitter,
-    const float3 centroid,
-    const float3 P,
-    const float3 N,
-    const BoundingCone bcone,
-    ccl_private float &cos_theta_u,
-    ccl_private float2 &distance,
-    ccl_private float3 &point_to_centroid)
-{
-  if (!in_volume_segment) {
-    /* TODO: a cheap substitute for minimal distance between point and primitive. Does it
-     * worth the overhead to compute the accurate minimal distance? */
-    float min_distance;
-    point_to_centroid = safe_normalize_len(centroid - P, &min_distance);
-    distance = make_float2(min_distance, min_distance);
-  }
-
-  cos_theta_u = FLT_MAX;
-
-  const int object = kemitter->mesh_light.object_id;
-  float3 vertices[3];
-  triangle_world_space_vertices(kg, object, kemitter->prim_id, -1.0f, vertices);
-
-  bool shape_above_surface = false;
-  for (int i = 0; i < 3; i++) {
-    const float3 corner = vertices[i];
-    float distance_point_to_corner;
-    const float3 point_to_corner = safe_normalize_len(corner - P, &distance_point_to_corner);
-    cos_theta_u = fminf(cos_theta_u, dot(point_to_centroid, point_to_corner));
-    shape_above_surface |= dot(point_to_corner, N) > 0;
-    if (!in_volume_segment) {
-      distance.x = fmaxf(distance.x, distance_point_to_corner);
-    }
-  }
-
-  const bool front_facing = bcone.theta_o != 0.0f || dot(bcone.axis, point_to_centroid) < 0;
-  const bool in_volume = is_zero(N);
-
-  return (front_facing && shape_above_surface) || in_volume;
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/kernel/sample/jitter.h
+++ b/intern/cycles/kernel/sample/jitter.h
@@ -7,25 +7,6 @@
 #pragma once
 CCL_NAMESPACE_BEGIN

-ccl_device uint pmj_shuffled_sample_index(KernelGlobals kg, uint sample, uint dimension, uint seed)
-{
-  const uint sample_count = kernel_data.integrator.pmj_sequence_size;
-
-  /* Shuffle the pattern order and sample index to better decorrelate
-   * dimensions and make the most of the finite patterns we have.
-   * The funky sample mask stuff is to ensure that we only shuffle
-   * *within* the current sample pattern, which is necessary to avoid
-   * early repeat pattern use. */
-  const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
-  /* sample_count should always be a power of two, so this results in a mask. */
-  const uint sample_mask = sample_count - 1;
-  const uint sample_shuffled = nested_uniform_scramble(sample,
-                                                       hash_wang_seeded_uint(dimension, seed));
-  sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask);
-
-  return ((pattern_i * sample_count) + sample) % (sample_count * NUM_PMJ_PATTERNS);
-}
-
 ccl_device float pmj_sample_1D(KernelGlobals kg,
                               uint sample,
                               const uint rng_hash,
@@ -39,9 +20,22 @@ ccl_device float pmj_sample_1D(KernelGlobals kg,
    seed = kernel_data.integrator.seed;
  }

+  /* Shuffle the pattern order and sample index to better decorrelate
+   * dimensions and make the most of the finite patterns we have.
+   * The funky sample mask stuff is to ensure that we only shuffle
+   * *within* the current sample pattern, which is necessary to avoid
+   * early repeat pattern use. */
+  const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
+  /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */
+  const uint sample_mask = NUM_PMJ_SAMPLES - 1;
+  const uint sample_shuffled = nested_uniform_scramble(sample,
+                                                       hash_wang_seeded_uint(dimension, seed));
+  sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask);
+
  /* Fetch the sample. */
-  const uint index = pmj_shuffled_sample_index(kg, sample, dimension, seed);
-  float x = kernel_data_fetch(sample_pattern_lut, index * NUM_PMJ_DIMENSIONS);
+  const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) %
+                     (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS);
+  float x = kernel_data_fetch(sample_pattern_lut, index * 2);

  /* Do limited Cranley-Patterson rotation when using scrambling distance. */
  if (kernel_data.integrator.scrambling_distance < 1.0f) {
@@ -67,10 +61,23 @@ ccl_device float2 pmj_sample_2D(KernelGlobals kg,
    seed = kernel_data.integrator.seed;
  }

+  /* Shuffle the pattern order and sample index to better decorrelate
+   * dimensions and make the most of the finite patterns we have.
+   * The funky sample mask stuff is to ensure that we only shuffle
+   * *within* the current sample pattern, which is necessary to avoid
+   * early repeat pattern use. */
+  const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
+  /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */
+  const uint sample_mask = NUM_PMJ_SAMPLES - 1;
+  const uint sample_shuffled = nested_uniform_scramble(sample,
+                                                       hash_wang_seeded_uint(dimension, seed));
+  sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask);
+
  /* Fetch the sample. */
-  const uint index = pmj_shuffled_sample_index(kg, sample, dimension, seed);
-  float x = kernel_data_fetch(sample_pattern_lut, index * NUM_PMJ_DIMENSIONS);
-  float y = kernel_data_fetch(sample_pattern_lut, index * NUM_PMJ_DIMENSIONS + 1);
+  const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) %
+                     (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS);
+  float x = kernel_data_fetch(sample_pattern_lut, index * 2);
+  float y = kernel_data_fetch(sample_pattern_lut, index * 2 + 1);

  /* Do limited Cranley-Patterson rotation when using scrambling distance. */
  if (kernel_data.integrator.scrambling_distance < 1.0f) {
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -60,7 +60,6 @@ CCL_NAMESPACE_BEGIN
 #define __DENOISING_FEATURES__
 #define __DPDU__
 #define __HAIR__
-#define __LIGHT_TREE__
 #define __OBJECT_MOTION__
 #define __PASSES__
 #define __PATCH_EVAL__
@@ -75,11 +74,6 @@ CCL_NAMESPACE_BEGIN
 #define __VISIBILITY_FLAG__
 #define __VOLUME__

-/* TODO: solve internal compiler errors and enable light tree on HIP. */
-#ifdef __KERNEL_HIP__
-#  undef __LIGHT_TREE__
-#endif
-
 /* Device specific features */
 #ifdef WITH_OSL
 #  define __OSL__
@@ -166,8 +160,7 @@ enum PathTraceDimension {
  PRNG_VOLUME_SCATTER_DISTANCE = 5,
  PRNG_VOLUME_OFFSET = 6,
  PRNG_VOLUME_SHADE_OFFSET = 7,
-  PRNG_VOLUME_PHASE_GUIDING_DISTANCE = 8,
-  PRNG_VOLUME_PHASE_GUIDING_EQUIANGULAR = 9,
+  PRNG_VOLUME_PHASE_GUIDING = 8,

  /* Subsurface random walk bounces */
  PRNG_SUBSURFACE_BSDF = 0,
@@ -215,26 +208,21 @@ enum PathRayFlag : uint32_t {
  PATH_RAY_SHADOW_TRANSPARENT = (1U << 9U),
  PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE | PATH_RAY_SHADOW_TRANSPARENT),

+  /* Special flag to tag unaligned BVH nodes.
+   * Only set and used in BVH nodes to distinguish how to interpret bounding box information stored
+   * in the node (either it should be intersected as AABB or as OBBU). */
+  PATH_RAY_NODE_UNALIGNED = (1U << 10U),
+
  /* Subset of flags used for ray visibility for intersection.
   *
   * NOTE: SHADOW_CATCHER macros below assume there are no more than
   * 16 visibility bits. */
-  PATH_RAY_ALL_VISIBILITY = ((1U << 10U) - 1U),
-
-  /* Special flag to tag unaligned BVH nodes.
-   * Only set and used in BVH nodes to distinguish how to interpret bounding box information stored
-   * in the node (either it should be intersected as AABB or as OBBU).
-   * So this can overlap with path flags. */
-  PATH_RAY_NODE_UNALIGNED = (1U << 10U),
+  PATH_RAY_ALL_VISIBILITY = ((1U << 11U) - 1U),

  /* --------------------------------------------------------------------
   * Path flags.
   */

-  /* Surface had transmission component at previous bounce. Used for light tree
-   * traversal and culling to be consistent with MIS PDF at the next bounce. */
-  PATH_RAY_MIS_HAD_TRANSMISSION = (1U << 10U),
-
  /* Don't apply multiple importance sampling weights to emission from
   * lamp or surface hits, because they were not direct light sampled. */
  PATH_RAY_MIS_SKIP = (1U << 11U),
@@ -354,6 +342,7 @@ typedef enum PassType {
  PASS_EMISSION,
  PASS_BACKGROUND,
  PASS_AO,
+  PASS_SHADOW,
  PASS_DIFFUSE,
  PASS_DIFFUSE_DIRECT,
  PASS_DIFFUSE_INDIRECT,
@@ -472,16 +461,6 @@ typedef enum ShaderFlag {
                  SHADER_EXCLUDE_ANY)
 } ShaderFlag;

-enum EmissionSampling {
-  EMISSION_SAMPLING_NONE = 0,
-  EMISSION_SAMPLING_AUTO = 1,
-  EMISSION_SAMPLING_FRONT = 2,
-  EMISSION_SAMPLING_BACK = 3,
-  EMISSION_SAMPLING_FRONT_BACK = 4,
-
-  EMISSION_SAMPLING_NUM
-};
-
 /* Light Type */

 typedef enum LightType {
@@ -795,16 +774,14 @@ enum ShaderDataFlag {
  SD_TRANSPARENT = (1 << 9),
  /* BSDF requires LCG for evaluation. */
  SD_BSDF_NEEDS_LCG = (1 << 10),
-  /* BSDF has a transmissive component. */
-  SD_BSDF_HAS_TRANSMISSION = (1 << 11),

  SD_CLOSURE_FLAGS = (SD_EMISSION | SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSSRDF | SD_HOLDOUT |
-                      SD_EXTINCTION | SD_SCATTER | SD_BSDF_NEEDS_LCG | SD_BSDF_HAS_TRANSMISSION),
+                      SD_EXTINCTION | SD_SCATTER | SD_BSDF_NEEDS_LCG),

  /* Shader flags. */

-  /* Use front side for direct light sampling. */
-  SD_MIS_FRONT = (1 << 16),
+  /* direct light sample */
+  SD_USE_MIS = (1 << 16),
  /* Has transparent shadow. */
  SD_HAS_TRANSPARENT_SHADOW = (1 << 17),
  /* Has volume shader. */
@@ -833,14 +810,12 @@ enum ShaderDataFlag {
  SD_HAS_EMISSION = (1 << 29),
  /* Shader has raytracing */
  SD_HAS_RAYTRACE = (1 << 30),
-  /* Use back side for direct light sampling. */
-  SD_MIS_BACK = (1 << 31),

-  SD_SHADER_FLAGS = (SD_MIS_FRONT | SD_HAS_TRANSPARENT_SHADOW | SD_HAS_VOLUME |
-                     SD_HAS_ONLY_VOLUME | SD_HETEROGENEOUS_VOLUME | SD_HAS_BSSRDF_BUMP |
-                     SD_VOLUME_EQUIANGULAR | SD_VOLUME_MIS | SD_VOLUME_CUBIC | SD_HAS_BUMP |
-                     SD_HAS_DISPLACEMENT | SD_HAS_CONSTANT_EMISSION | SD_NEED_VOLUME_ATTRIBUTES |
-                     SD_HAS_EMISSION | SD_HAS_RAYTRACE | SD_MIS_BACK)
+  SD_SHADER_FLAGS = (SD_USE_MIS | SD_HAS_TRANSPARENT_SHADOW | SD_HAS_VOLUME | SD_HAS_ONLY_VOLUME |
+                     SD_HETEROGENEOUS_VOLUME | SD_HAS_BSSRDF_BUMP | SD_VOLUME_EQUIANGULAR |
+                     SD_VOLUME_MIS | SD_VOLUME_CUBIC | SD_HAS_BUMP | SD_HAS_DISPLACEMENT |
+                     SD_HAS_CONSTANT_EMISSION | SD_NEED_VOLUME_ATTRIBUTES | SD_HAS_EMISSION |
+                     SD_HAS_RAYTRACE)
 };

 /* Object flags. */
@@ -1292,24 +1267,21 @@ static_assert_align(KernelCurveSegment, 8);
 typedef struct KernelSpotLight {
  float radius;
  float invarea;
-  float cos_half_spot_angle;
+  float spot_angle;
  float spot_smooth;
-  packed_float3 dir;
+  float dir[3];
  float pad;
 } KernelSpotLight;

 /* PointLight is SpotLight with only radius and invarea being used. */

 typedef struct KernelAreaLight {
-  packed_float3 axis_u;
-  float len_u;
-  packed_float3 axis_v;
-  float len_v;
-  packed_float3 dir;
+  float axisu[3];
  float invarea;
-  float cot_half_spread;
+  float axisv[3];
+  float tan_spread;
+  float dir[3];
  float normalize_spread;
-  float pad[2];
 } KernelAreaLight;

 typedef struct KernelDistantLight {
@@ -1321,7 +1293,7 @@ typedef struct KernelDistantLight {

 typedef struct KernelLight {
  int type;
-  packed_float3 co;
+  float co[3];
  int shader_id;
  float max_bounces;
  float random;
@@ -1341,70 +1313,19 @@ static_assert_align(KernelLight, 16);
 typedef struct KernelLightDistribution {
  float totarea;
  int prim;
-  struct {
-    int shader_flag;
-    int object_id;
-  } mesh_light;
+  union {
+    struct {
+      int shader_flag;
+      int object_id;
+    } mesh_light;
+    struct {
+      float pad;
+      float size;
+    } lamp;
+  };
 } KernelLightDistribution;
 static_assert_align(KernelLightDistribution, 16);

-/* Bounding box. */
-using BoundingBox = struct BoundingBox {
-  packed_float3 min;
-  packed_float3 max;
-};
-
-using BoundingCone = struct BoundingCone {
-  packed_float3 axis;
-  float theta_o;
-  float theta_e;
-};
-
-typedef struct KernelLightTreeNode {
-  /* Bounding box. */
-  BoundingBox bbox;
-
-  /* Bounding cone. */
-  BoundingCone bcone;
-
-  /* Energy. */
-  float energy;
-
-  /* If this is 0 or less, we're at a leaf node
-   * and the negative value indexes into the first child of the light array.
-   * Otherwise, it's an index to the node's second child. */
-  int child_index;
-  int num_prims; /* leaf nodes need to know the number of primitives stored. */
-
-  /* Bit trail. */
-  uint bit_trail;
-
-  /* Padding. */
-  int pad;
-} KernelLightTreeNode;
-static_assert_align(KernelLightTreeNode, 16);
-
-typedef struct KernelLightTreeEmitter {
-  /* Bounding cone. */
-  float theta_o;
-  float theta_e;
-
-  /* Energy. */
-  float energy;
-
-  /* prim_id denotes the location in the lights or triangles array. */
-  int prim_id;
-  struct {
-    int shader_flag;
-    int object_id;
-    EmissionSampling emission_sampling;
-  } mesh_light;
-
-  /* Parent. */
-  int parent_index;
-} KernelLightTreeEmitter;
-static_assert_align(KernelLightTreeEmitter, 16);
-
 typedef struct KernelParticle {
  int index;
  float age;
@@ -1469,13 +1390,12 @@ static_assert_align(KernelShaderEvalInput, 16);

 /* Pre-computed sample table sizes for PMJ02 sampler.
 *
- * NOTE: min and max samples *must* be a power of two, and patterns
+ * NOTE: divisions *must* be a power of two, and patterns
 * ideally should be as well.
 */
-#define MIN_PMJ_SAMPLES 256
-#define MAX_PMJ_SAMPLES 8192
-#define NUM_PMJ_DIMENSIONS 2
-#define NUM_PMJ_PATTERNS 256
+#define NUM_PMJ_DIVISIONS 32
+#define NUM_PMJ_SAMPLES ((NUM_PMJ_DIVISIONS) * (NUM_PMJ_DIVISIONS))
+#define NUM_PMJ_PATTERNS 64

 /* Device kernels.
 *
@@ -1604,19 +1524,22 @@ enum KernelFeatureFlag : uint32_t {
  /* Light render passes. */
  KERNEL_FEATURE_LIGHT_PASSES = (1U << 21U),

+  /* Shadow render pass. */
+  KERNEL_FEATURE_SHADOW_PASS = (1U << 22U),
+
  /* AO. */
-  KERNEL_FEATURE_AO_PASS = (1U << 22U),
-  KERNEL_FEATURE_AO_ADDITIVE = (1U << 23U),
+  KERNEL_FEATURE_AO_PASS = (1U << 23U),
+  KERNEL_FEATURE_AO_ADDITIVE = (1U << 24U),
  KERNEL_FEATURE_AO = (KERNEL_FEATURE_AO_PASS | KERNEL_FEATURE_AO_ADDITIVE),

  /* MNEE. */
-  KERNEL_FEATURE_MNEE = (1U << 24U),
+  KERNEL_FEATURE_MNEE = (1U << 25U),

  /* Path guiding. */
-  KERNEL_FEATURE_PATH_GUIDING = (1U << 25U),
+  KERNEL_FEATURE_PATH_GUIDING = (1U << 26U),

  /* OSL. */
-  KERNEL_FEATURE_OSL = (1U << 26U),
+  KERNEL_FEATURE_OSL = (1U << 27U),
 };

 /* Shader node feature mask, to specialize shader evaluation for kernels. */
--- a/intern/cycles/scene/CMakeLists.txt
+++ b/intern/cycles/scene/CMakeLists.txt
@@ -25,7 +25,6 @@ set(SRC
  integrator.cpp
  jitter.cpp
  light.cpp
-  light_tree.cpp
  mesh.cpp
  mesh_displace.cpp
  mesh_subdivision.cpp
@@ -64,7 +63,6 @@ set(SRC_HEADERS
  image_vdb.h
  integrator.h
  light.h
-  light_tree.h
  jitter.h
  mesh.h
  object.h
--- a/intern/cycles/scene/background.cpp
+++ b/intern/cycles/scene/background.cpp
@@ -4,7 +4,6 @@
 #include "scene/background.h"
 #include "device/device.h"
 #include "scene/integrator.h"
-#include "scene/light.h"
 #include "scene/scene.h"
 #include "scene/shader.h"
 #include "scene/shader_graph.h"
--- a/intern/cycles/scene/film.cpp
+++ b/intern/cycles/scene/film.cpp
@@ -187,6 +187,7 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
  kfilm->pass_transmission_indirect = PASS_UNUSED;
  kfilm->pass_volume_direct = PASS_UNUSED;
  kfilm->pass_volume_indirect = PASS_UNUSED;
+  kfilm->pass_shadow = PASS_UNUSED;
  kfilm->pass_lightgroup = PASS_UNUSED;

  /* Mark passes as unused so that the kernel knows the pass is inaccessible. */
@@ -294,6 +295,9 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
      case PASS_AO:
        kfilm->pass_ao = kfilm->pass_stride;
        break;
+      case PASS_SHADOW:
+        kfilm->pass_shadow = kfilm->pass_stride;
+        break;

      case PASS_DIFFUSE_COLOR:
        kfilm->pass_diffuse_color = kfilm->pass_stride;
@@ -723,6 +727,10 @@ uint Film::get_kernel_features(const Scene *scene) const
      kernel_features |= KERNEL_FEATURE_LIGHT_PASSES;
    }

+    if (pass_type == PASS_SHADOW) {
+      kernel_features |= KERNEL_FEATURE_SHADOW_PASS;
+    }
+
    if (pass_type == PASS_AO) {
      kernel_features |= KERNEL_FEATURE_AO_PASS;
    }
--- a/intern/cycles/scene/geometry.cpp
+++ b/intern/cycles/scene/geometry.cpp
@@ -271,7 +271,7 @@ void Geometry::tag_update(Scene *scene, bool rebuild)
  else {
    foreach (Node *node, used_shaders) {
      Shader *shader = static_cast<Shader *>(node);
-      if (shader->emission_sampling != EMISSION_SAMPLING_NONE) {
+      if (shader->has_surface_emission) {
        scene->light_manager->tag_update(scene, LightManager::EMISSIVE_MESH_MODIFIED);
        break;
      }
--- a/intern/cycles/scene/integrator.cpp
+++ b/intern/cycles/scene/integrator.cpp
@@ -102,8 +102,7 @@ NODE_DEFINE(Integrator)
  SOCKET_FLOAT(adaptive_threshold, "Adaptive Threshold", 0.01f);
  SOCKET_INT(adaptive_min_samples, "Adaptive Min Samples", 0);

-  SOCKET_BOOLEAN(use_light_tree, "Use light tree to optimize many light sampling", true);
-  SOCKET_FLOAT(light_sampling_threshold, "Light Sampling Threshold", 0.0f);
+  SOCKET_FLOAT(light_sampling_threshold, "Light Sampling Threshold", 0.01f);

  static NodeEnum sampling_pattern_enum;
  sampling_pattern_enum.insert("sobol_burley", SAMPLING_PATTERN_SOBOL_BURLEY);
@@ -251,7 +250,6 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
  kintegrator->sampling_pattern = sampling_pattern;
  kintegrator->scrambling_distance = scrambling_distance;

-  kintegrator->use_light_tree = scene->integrator->use_light_tree;
  if (light_sampling_threshold > 0.0f) {
    kintegrator->light_inv_rr_threshold = 1.0f / light_sampling_threshold;
  }
@@ -259,18 +257,12 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
    kintegrator->light_inv_rr_threshold = 0.0f;
  }

-  constexpr int num_sequences = NUM_PMJ_PATTERNS;
-  int sequence_size = clamp(next_power_of_two(aa_samples - 1), MIN_PMJ_SAMPLES, MAX_PMJ_SAMPLES);
  if (kintegrator->sampling_pattern == SAMPLING_PATTERN_PMJ &&
-      dscene->sample_pattern_lut.size() !=
-          (sequence_size * NUM_PMJ_DIMENSIONS * NUM_PMJ_PATTERNS)) {
-    kintegrator->pmj_sequence_size = sequence_size;
-
-    if (dscene->sample_pattern_lut.size() != 0) {
-      dscene->sample_pattern_lut.free();
-    }
+      dscene->sample_pattern_lut.size() == 0) {
+    constexpr int sequence_size = NUM_PMJ_SAMPLES;
+    constexpr int num_sequences = NUM_PMJ_PATTERNS;
    float2 *directions = (float2 *)dscene->sample_pattern_lut.alloc(sequence_size * num_sequences *
-                                                                    NUM_PMJ_DIMENSIONS);
+                                                                    2);
    TaskPool pool;
    for (int j = 0; j < num_sequences; ++j) {
      float2 *sequence = directions + j * sequence_size;
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Jeroen Bakker	f7cdba506b	Fix compilation error after rebase.	2022-11-15 14:20:06 +01:00
Jeroen Bakker	37b374d9ba	Merge branch 'master' into temp-xr-virtual-camera-experiment	2022-11-15 14:06:05 +01:00
Jeroen Bakker	80f0d9e29d	Add support for XrSession (untested).	2022-11-11 16:06:09 +01:00
Jeroen Bakker	3a3947bcf8	Revert incorrect change.	2022-11-11 15:07:23 +01:00
Jeroen Bakker	e37d254170	Rename to virtual monitor.	2022-11-11 14:53:21 +01:00
Jeroen Bakker	3da5748787	Fix corrupt rv3d.	2022-11-11 14:09:19 +01:00
Jeroen Bakker	ca134581a2	First setup of rv3d.	2022-11-09 14:32:56 +01:00
Jeroen Bakker	0a01bec400	Fixed using correct texture.	2022-11-09 13:18:12 +01:00
Jeroen Bakker	d63d2c8b9e	Use Material flag to identify virtual camera.	2022-11-09 12:18:22 +01:00
Jeroen Bakker	027ca9b91e	Add GPU debugging group.	2022-11-09 11:17:07 +01:00
Jeroen Bakker	935cabdb6a	Initial implementation of 2 stage drawing.	2022-11-02 15:47:53 +01:00
Jeroen Bakker	a083b23ceb	First iteration of a virtual camera node.	2022-11-02 13:17:35 +01:00