FIx: Change to instance domain

Merge branch 'master' into temp-geometry-nodes-text
Add default value to String to Curves string input
2021-11-24 01:29:45 +01:00 · 2021-11-23 17:59:13 +01:00 · 2021-10-29 16:59:07 +02:00 · 2021-10-29 16:58:30 +02:00 · 2021-10-29 16:57:27 +02:00 · 2021-10-29 16:56:20 +02:00
2881 changed files with 63419 additions and 100991 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -12,8 +12,6 @@ Checks:  >
  -readability-avoid-const-params-in-decls,
  -readability-simplify-boolean-expr,
  -readability-make-member-function-const,
-  -readability-suspicious-call-argument,
-  -readability-redundant-member-init,

  -readability-misleading-indentation,

@@ -27,8 +25,6 @@ Checks:  >
  -bugprone-branch-clone,
  -bugprone-macro-parentheses,
  -bugprone-reserved-identifier,
-  -bugprone-easily-swappable-parameters,
-  -bugprone-implicit-widening-of-multiplication-result,

  -bugprone-sizeof-expression,
  -bugprone-integer-division,
@@ -44,8 +40,7 @@ Checks:  >
  -modernize-pass-by-value,
  # Cannot be enabled yet, because using raw string literals in tests breaks
  # the windows compiler currently.
-  -modernize-raw-string-literal,
-  -modernize-return-braced-init-list
+  -modernize-raw-string-literal

 CheckOptions:
  - key: modernize-use-default-member-init.UseAssignment
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -187,13 +187,6 @@ mark_as_advanced(CPACK_OVERRIDE_PACKAGENAME)
 mark_as_advanced(BUILDINFO_OVERRIDE_DATE)
 mark_as_advanced(BUILDINFO_OVERRIDE_TIME)

-if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.16")
-  option(WITH_UNITY_BUILD   "Enable unity build for modules that support it to improve compile times" ON)
-  mark_as_advanced(WITH_UNITY_BUILD)
-else()
-  set(WITH_UNITY_BUILD OFF)
-endif()
-
 option(WITH_IK_ITASC      "Enable ITASC IK solver (only disable for development & for incompatible C++ compilers)" ON)
 option(WITH_IK_SOLVER     "Enable Legacy IK solver (only disable for development)" ON)
 option(WITH_FFTW3         "Enable FFTW3 support (Used for smoke, ocean sim, and audio effects)" ON)
@@ -433,40 +426,30 @@ mark_as_advanced(WITH_CYCLES_DEBUG_NAN)
 mark_as_advanced(WITH_CYCLES_NATIVE_ONLY)

 # NVIDIA CUDA & OptiX
-if(NOT APPLE)
-  option(WITH_CYCLES_DEVICE_CUDA       "Enable Cycles NVIDIA CUDA compute support" ON)
-  option(WITH_CYCLES_DEVICE_OPTIX      "Enable Cycles NVIDIA OptiX support" ON)
-  mark_as_advanced(WITH_CYCLES_DEVICE_CUDA)
+option(WITH_CYCLES_DEVICE_CUDA       "Enable Cycles NVIDIA CUDA compute support" ON)
+option(WITH_CYCLES_DEVICE_OPTIX      "Enable Cycles NVIDIA OptiX support" ON)
+mark_as_advanced(WITH_CYCLES_DEVICE_CUDA)

-  option(WITH_CYCLES_CUDA_BINARIES     "Build Cycles NVIDIA CUDA binaries" OFF)
-  set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for")
-  option(WITH_CYCLES_CUBIN_COMPILER    "Build cubins with nvrtc based compiler instead of nvcc" OFF)
-  option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF)
-  option(WITH_CUDA_DYNLOAD             "Dynamically load CUDA libraries at runtime (for developers, makes cuda-gdb work)" ON)
-  mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
-  mark_as_advanced(WITH_CYCLES_CUBIN_COMPILER)
-  mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
-  mark_as_advanced(WITH_CUDA_DYNLOAD)
-endif()
+option(WITH_CYCLES_CUDA_BINARIES     "Build Cycles NVIDIA CUDA binaries" OFF)
+set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for")
+option(WITH_CYCLES_CUBIN_COMPILER    "Build cubins with nvrtc based compiler instead of nvcc" OFF)
+option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF)
+option(WITH_CUDA_DYNLOAD             "Dynamically load CUDA libraries at runtime (for developers, makes cuda-gdb work)" ON)
+mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
+mark_as_advanced(WITH_CYCLES_CUBIN_COMPILER)
+mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
+mark_as_advanced(WITH_CUDA_DYNLOAD)

 # AMD HIP
-if(NOT APPLE)
-  if(WIN32)
-    option(WITH_CYCLES_DEVICE_HIP        "Enable Cycles AMD HIP support" ON)
-  else()
-    option(WITH_CYCLES_DEVICE_HIP        "Enable Cycles AMD HIP support" OFF)
-  endif()
-
-  option(WITH_CYCLES_HIP_BINARIES      "Build Cycles AMD HIP binaries" OFF)
-  set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 CACHE STRING "AMD HIP architectures to build binaries for")
-  mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
-  mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
-endif()
-
-# Apple Metal
-if(APPLE)
-  option(WITH_CYCLES_DEVICE_METAL       "Enable Cycles Apple Metal compute support" ON)
+if(WIN32)
+  option(WITH_CYCLES_DEVICE_HIP        "Enable Cycles AMD HIP support" ON)
+else()
+  option(WITH_CYCLES_DEVICE_HIP        "Enable Cycles AMD HIP support" OFF)
 endif()
+option(WITH_CYCLES_HIP_BINARIES      "Build Cycles AMD HIP binaries" OFF)
+set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 CACHE STRING "AMD HIP architectures to build binaries for")
+mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
+mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)

 # Draw Manager
 option(WITH_DRAW_DEBUG "Add extra debug capabilities to Draw Manager" OFF)
@@ -511,10 +494,11 @@ if(WIN32)
 endif()

 # This should be turned off when Blender enter beta/rc/release
-if("${BLENDER_VERSION_CYCLE}" STREQUAL "alpha")
-  set(WITH_EXPERIMENTAL_FEATURES ON)
-else()
+if("${BLENDER_VERSION_CYCLE}" STREQUAL "release" OR
+   "${BLENDER_VERSION_CYCLE}" STREQUAL "rc")
  set(WITH_EXPERIMENTAL_FEATURES OFF)
+else()
+  set(WITH_EXPERIMENTAL_FEATURES ON)
 endif()

 # Unit testsing
@@ -856,7 +840,7 @@ if(WITH_AUDASPACE)
 endif()

 # Auto-enable CUDA dynload if toolkit is not found.
-if(WITH_CYCLES AND WITH_CYCLES_DEVICE_CUDA AND NOT WITH_CUDA_DYNLOAD)
+if(NOT WITH_CUDA_DYNLOAD)
  find_package(CUDA)
  if(NOT CUDA_FOUND)
    message(STATUS "CUDA toolkit not found, using dynamic runtime loading of libraries (WITH_CUDA_DYNLOAD) instead")
--- a/build_files/build_environment/install_deps.sh
+++ b/build_files/build_environment/install_deps.sh
@@ -2083,9 +2083,9 @@ compile_OIIO() {
    cmake_d="$cmake_d -D OPENEXR_VERSION=$OPENEXR_VERSION"

    if [ "$_with_built_openexr" = true ]; then
-      cmake_d="$cmake_d -D ILMBASE_ROOT=$INST/openexr"
-      cmake_d="$cmake_d -D OPENEXR_ROOT=$INST/openexr"
-      INFO "Ilmbase_ROOT=$INST/openexr"
+      cmake_d="$cmake_d -D ILMBASE_HOME=$INST/openexr"
+      cmake_d="$cmake_d -D OPENEXR_HOME=$INST/openexr"
+      INFO "ILMBASE_HOME=$INST/openexr"
    fi

    # ptex is only needed when nicholas bishop is ready
@@ -2374,9 +2374,9 @@ compile_OSL() {
    #~ cmake_d="$cmake_d -D ILMBASE_VERSION=$ILMBASE_VERSION"

    if [ "$_with_built_openexr" = true ]; then
-      cmake_d="$cmake_d -D ILMBASE_ROOT=$INST/openexr"
-      cmake_d="$cmake_d -D OPENEXR_ROOT=$INST/openexr"
-      INFO "Ilmbase_ROOT=$INST/openexr"
+      INFO "ILMBASE_HOME=$INST/openexr"
+      cmake_d="$cmake_d -D OPENEXR_ROOT_DIR=$INST/openexr"
+      cmake_d="$cmake_d -D ILMBASE_ROOT_DIR=$INST/openexr"
      # XXX Temp workaround... sigh, ILMBase really messed the things up by defining their custom names ON by default :(
    fi

@@ -5801,7 +5801,7 @@ print_info() {
  PRINT "If you're using CMake add this to your configuration flags:"

  _buildargs="-U *SNDFILE* -U PYTHON* -U *BOOST* -U *Boost* -U *TBB*"
-  _buildargs="$_buildargs -U *OPENCOLORIO* -U *OPENEXR* -U *OPENIMAGEIO* -U *LLVM* -U *CLANG* -U *CYCLES*"
+  _buildargs="$_buildargs -U *OPENCOLORIO* -U *OPENEXR* -U *OPENIMAGEIO* -U *LLVM* -U *CYCLES*"
  _buildargs="$_buildargs -U *OPENSUBDIV* -U *OPENVDB*  -U *BLOSC* -U *COLLADA* -U *FFMPEG* -U *ALEMBIC* -U *USD*"
  _buildargs="$_buildargs -U *EMBREE* -U *OPENIMAGEDENOISE* -U *OPENXR*"

--- a/build_files/build_environment/patches/usd.diff
+++ b/build_files/build_environment/patches/usd.diff
@@ -197,38 +197,3 @@ index 67ec0d15f..6dc3e85a0 100644
 #else
 #error Unknown architecture.
 #endif
-
-diff --git a/pxr/base/arch/demangle.cpp b/pxr/base/arch/demangle.cpp
-index 67ec0d15f..6dc3e85a0 100644
--- a/pxr/base/arch/demangle.cpp
-+++ b/pxr/base/arch/demangle.cpp
-@@ -36,6 +36,7 @@
- #if (ARCH_COMPILER_GCC_MAJOR == 3 && ARCH_COMPILER_GCC_MINOR >= 1) || \
-     ARCH_COMPILER_GCC_MAJOR > 3 || defined(ARCH_COMPILER_CLANG)
- #define _AT_LEAST_GCC_THREE_ONE_OR_CLANG
-+#include <cxxabi.h>
- #endif
- 
- PXR_NAMESPACE_OPEN_SCOPE
-@@ -138,7 +139,6 @@
- #endif
- 
- #if defined(_AT_LEAST_GCC_THREE_ONE_OR_CLANG)
-#include <cxxabi.h>
- 
- /*
-  * This routine doesn't work when you get to gcc3.4.
-
-diff --git a/pxr/base/work/singularTask.h b/pxr/base/work/singularTask.h
-index 67ec0d15f..6dc3e85a0 100644
--- a/pxr/base/work/singularTask.h
-+++ b/pxr/base/work/singularTask.h
-@@ -120,7 +120,7 @@
-                     // case we go again to ensure the task can do whatever it
-                     // was awakened to do.  Once we successfully take the count
-                     // to zero, we stop.
-                    size_t old = count;
-+                    std::size_t old = count;
-                     do { _fn(); } while (
-                         !count.compare_exchange_strong(old, 0));
-                 });
--- a/build_files/cmake/Modules/FindOptiX.cmake
+++ b/build_files/cmake/Modules/FindOptiX.cmake
@@ -21,7 +21,7 @@ ENDIF()

 SET(_optix_SEARCH_DIRS
  ${OPTIX_ROOT_DIR}
-  "$ENV{PROGRAMDATA}/NVIDIA Corporation/OptiX SDK 7.3.0"
+  "$ENV{PROGRAMDATA}/NVIDIA Corporation/OptiX SDK 7.0.0"
 )

 FIND_PATH(OPTIX_INCLUDE_DIR
--- a/build_files/cmake/cmake_consistency_check.py
+++ b/build_files/cmake/cmake_consistency_check.py
@@ -114,7 +114,7 @@ def is_c_header(filename: str) -> bool:

 def is_c(filename: str) -> bool:
    ext = splitext(filename)[1]
-    return (ext in {".c", ".cpp", ".cxx", ".m", ".mm", ".rc", ".cc", ".inl", ".metal"})
+    return (ext in {".c", ".cpp", ".cxx", ".m", ".mm", ".rc", ".cc", ".inl"})


 def is_c_any(filename: str) -> bool:
--- a/build_files/cmake/config/blender_lite.cmake
+++ b/build_files/cmake/config/blender_lite.cmake
@@ -19,6 +19,9 @@ set(WITH_CODEC_SNDFILE       OFF CACHE BOOL "" FORCE)
 set(WITH_COMPOSITOR          OFF CACHE BOOL "" FORCE)
 set(WITH_COREAUDIO           OFF CACHE BOOL "" FORCE)
 set(WITH_CYCLES              OFF CACHE BOOL "" FORCE)
+set(WITH_CYCLES_DEVICE_OPTIX OFF CACHE BOOL "" FORCE)
+set(WITH_CYCLES_EMBREE       OFF CACHE BOOL "" FORCE)
+set(WITH_CYCLES_OSL          OFF CACHE BOOL "" FORCE)
 set(WITH_DRACO               OFF CACHE BOOL "" FORCE)
 set(WITH_FFTW3               OFF CACHE BOOL "" FORCE)
 set(WITH_FREESTYLE           OFF CACHE BOOL "" FORCE)
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -61,7 +61,6 @@ set(WITH_MEM_JEMALLOC          ON  CACHE BOOL "" FORCE)
 # platform dependent options
 if(APPLE)
  set(WITH_COREAUDIO           ON  CACHE BOOL "" FORCE)
-  set(WITH_CYCLES_DEVICE_METAL ON  CACHE BOOL "" FORCE)
 endif()
 if(NOT WIN32)
  set(WITH_JACK                ON  CACHE BOOL "" FORCE)
--- a/build_files/cmake/platform/platform_apple.cmake
+++ b/build_files/cmake/platform/platform_apple.cmake
@@ -257,6 +257,9 @@ if(WITH_BOOST)
  if(WITH_INTERNATIONAL)
    list(APPEND _boost_FIND_COMPONENTS locale)
  endif()
+  if(WITH_CYCLES_NETWORK)
+    list(APPEND _boost_FIND_COMPONENTS serialization)
+  endif()
  if(WITH_OPENVDB)
    list(APPEND _boost_FIND_COMPONENTS iostreams)
  endif()
@@ -336,7 +339,7 @@ if(WITH_LLVM)

 endif()

-if(WITH_CYCLES AND WITH_CYCLES_OSL)
+if(WITH_CYCLES_OSL)
  set(CYCLES_OSL ${LIBDIR}/osl)

  find_library(OSL_LIB_EXEC NAMES oslexec PATHS ${CYCLES_OSL}/lib)
@@ -356,7 +359,7 @@ if(WITH_CYCLES AND WITH_CYCLES_OSL)
  endif()
 endif()

-if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
+if(WITH_CYCLES_EMBREE)
  find_package(Embree 3.8.0 REQUIRED)
  # Increase stack size for Embree, only works for executables.
  if(NOT WITH_PYTHON_MODULE)
--- a/build_files/cmake/platform/platform_apple_xcode.cmake
+++ b/build_files/cmake/platform/platform_apple_xcode.cmake
@@ -96,7 +96,7 @@ else()
    # Detect SDK version to use.
    if(NOT DEFINED OSX_SYSTEM)
      execute_process(
-          COMMAND xcrun --sdk macosx --show-sdk-version
+          COMMAND xcrun --show-sdk-version
          OUTPUT_VARIABLE OSX_SYSTEM
          OUTPUT_STRIP_TRAILING_WHITESPACE)
    endif()
--- a/build_files/cmake/platform/platform_unix.cmake
+++ b/build_files/cmake/platform/platform_unix.cmake
@@ -241,7 +241,7 @@ if(WITH_INPUT_NDOF)
  endif()
 endif()

-if(WITH_CYCLES AND WITH_CYCLES_OSL)
+if(WITH_CYCLES_OSL)
  set(CYCLES_OSL ${LIBDIR}/osl CACHE PATH "Path to OpenShadingLanguage installation")
  if(EXISTS ${CYCLES_OSL} AND NOT OSL_ROOT)
    set(OSL_ROOT ${CYCLES_OSL})
@@ -314,7 +314,7 @@ if(WITH_BOOST)
    endif()
    set(Boost_USE_MULTITHREADED ON)
    set(__boost_packages filesystem regex thread date_time)
-    if(WITH_CYCLES AND WITH_CYCLES_OSL)
+    if(WITH_CYCLES_OSL)
      if(NOT (${OSL_LIBRARY_VERSION_MAJOR} EQUAL "1" AND ${OSL_LIBRARY_VERSION_MINOR} LESS "6"))
        list(APPEND __boost_packages wave)
      else()
@@ -323,6 +323,9 @@ if(WITH_BOOST)
    if(WITH_INTERNATIONAL)
      list(APPEND __boost_packages locale)
    endif()
+    if(WITH_CYCLES_NETWORK)
+      list(APPEND __boost_packages serialization)
+    endif()
    if(WITH_OPENVDB)
      list(APPEND __boost_packages iostreams)
    endif()
@@ -400,7 +403,7 @@ if(WITH_OPENCOLORIO)
  endif()
 endif()

-if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
+if(WITH_CYCLES_EMBREE)
  find_package(Embree 3.8.0 REQUIRED)
 endif()

--- a/build_files/cmake/platform/platform_win32.cmake
+++ b/build_files/cmake/platform/platform_win32.cmake
@@ -477,7 +477,7 @@ if(WITH_PYTHON)
 endif()

 if(WITH_BOOST)
-  if(WITH_CYCLES AND WITH_CYCLES_OSL)
+  if(WITH_CYCLES_OSL)
    set(boost_extra_libs wave)
  endif()
  if(WITH_INTERNATIONAL)
@@ -520,7 +520,7 @@ if(WITH_BOOST)
      debug ${BOOST_LIBPATH}/libboost_thread-${BOOST_DEBUG_POSTFIX}
      debug ${BOOST_LIBPATH}/libboost_chrono-${BOOST_DEBUG_POSTFIX}
    )
-    if(WITH_CYCLES AND WITH_CYCLES_OSL)
+    if(WITH_CYCLES_OSL)
      set(BOOST_LIBRARIES ${BOOST_LIBRARIES}
        optimized ${BOOST_LIBPATH}/libboost_wave-${BOOST_POSTFIX}
        debug ${BOOST_LIBPATH}/libboost_wave-${BOOST_DEBUG_POSTFIX})
@@ -708,7 +708,7 @@ if(WITH_CODEC_SNDFILE)
  set(LIBSNDFILE_LIBRARIES ${LIBSNDFILE_LIBPATH}/libsndfile-1.lib)
 endif()

-if(WITH_CYCLES AND WITH_CYCLES_OSL)
+if(WITH_CYCLES_OSL)
  set(CYCLES_OSL ${LIBDIR}/osl CACHE PATH "Path to OpenShadingLanguage installation")
  set(OSL_SHADER_DIR ${CYCLES_OSL}/shaders)
  # Shaders have moved around a bit between OSL versions, check multiple locations
@@ -741,7 +741,7 @@ if(WITH_CYCLES AND WITH_CYCLES_OSL)
  endif()
 endif()

-if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
+if(WITH_CYCLES_EMBREE)
  windows_find_package(Embree)
  if(NOT EMBREE_FOUND)
    set(EMBREE_INCLUDE_DIRS ${LIBDIR}/embree/include)
--- a/doc/doxygen/doxygen.intern.h
+++ b/doc/doxygen/doxygen.intern.h
@@ -6,87 +6,91 @@
 *  as part of the normal development process.
 */

-/* TODO: other modules.
- * - `libmv`
- * - `cycles`
- * - `opencolorio`
- * - `opensubdiv`
- * - `openvdb`
- * - `quadriflow`
+/** \defgroup MEM Guarded memory (de)allocation
+ *  \ingroup intern
 */

-/** \defgroup intern_atomic Atomic Operations
- *  \ingroup intern */
+/** \defgroup clog C-Logging (CLOG)
+ *  \ingroup intern
+ */

-/** \defgroup intern_clog C-Logging (CLOG)
- *  \ingroup intern */
+/** \defgroup ctr container
+ *  \ingroup intern
+ */

-/** \defgroup intern_eigen Eigen
- *  \ingroup intern */
+/** \defgroup iksolver iksolver
+ *  \ingroup intern
+ */

-/** \defgroup intern_glew-mx GLEW with Multiple Rendering Context's
- *  \ingroup intern */
+/** \defgroup itasc itasc
+ *  \ingroup intern
+ */

-/** \defgroup intern_iksolver Inverse Kinematics (Solver)
- *  \ingroup intern */
+/** \defgroup memutil memutil
+ *  \ingroup intern
+ */

-/** \defgroup intern_itasc Inverse Kinematics (ITASC)
- *  \ingroup intern */
+/** \defgroup mikktspace mikktspace
+ *  \ingroup intern
+ */

-/** \defgroup intern_libc_compat libc Compatibility For Linux
- *  \ingroup intern */
+/** \defgroup moto moto
+ *  \ingroup intern
+ */

-/** \defgroup intern_locale Locale
- *  \ingroup intern */
+/** \defgroup eigen eigen
+ *  \ingroup intern
+ */

-/** \defgroup intern_mantaflow Manta-Flow Fluid Simulation
- *  \ingroup intern */
+/** \defgroup smoke smoke
+ *  \ingroup intern
+ */

-/** \defgroup intern_mem Guarded Memory (de)allocation
- *  \ingroup intern */
-
-/** \defgroup intern_memutil Memory Utilities (memutil)
- *  \ingroup intern */
-
-/** \defgroup intern_mikktspace MikktSpace
- *  \ingroup intern */
-
-/** \defgroup intern_rigidbody Rigid-Body C-API
- *  \ingroup intern */
-
-/** \defgroup intern_sky_model Sky Model
- *  \ingroup intern */
-
-/** \defgroup intern_utf_conv UTF-8/16 Conversion (utfconv)
- *  \ingroup intern */
+/** \defgroup string string
+ *  \ingroup intern
+ */

 /** \defgroup audaspace Audaspace
 *  \ingroup intern undoc
- *  \todo add to doxygen */
+ *  \todo add to doxygen
+ */
 /** \defgroup audcoreaudio Audaspace CoreAudio
- *  \ingroup audaspace */
+ *  \ingroup audaspace
+ */
 /** \defgroup audfx Audaspace FX
- *  \ingroup audaspace */
+ *  \ingroup audaspace
+ */
 /** \defgroup audopenal Audaspace OpenAL
- *  \ingroup audaspace */
+ *  \ingroup audaspace
+ */
 /** \defgroup audpulseaudio Audaspace PulseAudio
- *  \ingroup audaspace */
+ *  \ingroup audaspace
+ */
 /** \defgroup audwasapi Audaspace WASAPI
- *  \ingroup audaspace */
+ *  \ingroup audaspace
+ */
 /** \defgroup audpython Audaspace Python
- *  \ingroup audaspace */
+ *  \ingroup audaspace
+ */
 /** \defgroup audsdl Audaspace SDL
- *  \ingroup audaspace */
+ *  \ingroup audaspace
+ */
 /** \defgroup audsrc Audaspace SRC
- *  \ingroup audaspace */
+ *
+ *  \ingroup audaspace
+ */
 /** \defgroup audffmpeg Audaspace FFMpeg
- *  \ingroup audaspace */
+ *  \ingroup audaspace
+ */
 /** \defgroup audfftw Audaspace FFTW
- *  \ingroup audaspace */
+ *  \ingroup audaspace
+ */
 /** \defgroup audjack Audaspace Jack
- *  \ingroup audaspace */
+ *  \ingroup audaspace
+ */
 /** \defgroup audsndfile Audaspace sndfile
- *  \ingroup audaspace */
+ *  \ingroup audaspace
+ */

 /** \defgroup GHOST GHOST API
 * \ingroup intern GUI
--- a/doc/doxygen/doxygen.source.h
+++ b/doc/doxygen/doxygen.source.h
@@ -5,8 +5,7 @@
 /** \defgroup bmesh BMesh
 *  \ingroup blender
 */
-/** \defgroup compositor Compositing
- *  \ingroup blender */
+/** \defgroup compositor Compositing */

 /** \defgroup python Python
 *  \ingroup blender
@@ -79,8 +78,7 @@
 *  \ingroup blender
 */

-/** \defgroup data DNA, RNA and .blend access
- *  \ingroup blender */
+/** \defgroup data DNA, RNA and .blend access*/

 /** \defgroup gpu GPU
 *  \ingroup blender
@@ -103,12 +101,11 @@
 *   merged in docs.
 */

-/**
- * \defgroup gui GUI
- * \ingroup blender */
+/** \defgroup gui GUI */

 /** \defgroup wm Window Manager
- *  \ingroup gui */
+ *  \ingroup blender gui
+ */

 /* ================================ */

@@ -282,8 +279,7 @@
 *  \ingroup gui
 */

-/** \defgroup externformats External Formats
- *  \ingroup blender */
+/** \defgroup externformats External Formats */

 /** \defgroup collada COLLADA
 *  \ingroup externformats
@@ -312,7 +308,4 @@
 /* ================================ */

 /** \defgroup undoc Undocumented
- *
- * \brief Modules and libraries that are still undocumented,
- * or lacking proper integration into the doxygen system, are marked in this group.
- */
+ *  \brief Modules and libraries that are still undocumented, or lacking proper integration into the doxygen system, are marked in this group. */
--- a/doc/manpage/blender.1.py
+++ b/doc/manpage/blender.1.py
@@ -61,7 +61,7 @@ def blender_extract_info(blender_bin: str) -> Dict[str, str]:
        stdout=subprocess.PIPE,
    ).stdout.decode(encoding="utf-8")

-    blender_version_output = subprocess.run(
+    blender_version_ouput = subprocess.run(
        [blender_bin, "--version"],
        env=blender_env,
        check=True,
@@ -73,7 +73,7 @@ def blender_extract_info(blender_bin: str) -> Dict[str, str]:
    # check for each lines prefix to ensure these aren't included.
    blender_version = ""
    blender_date = ""
-    for l in blender_version_output.split("\n"):
+    for l in blender_version_ouput.split("\n"):
        if l.startswith("Blender "):
            # Remove 'Blender' prefix.
            blender_version = l.split(" ", 1)[1].strip()
--- a/doc/python_api/sphinx_doc_gen.py
+++ b/doc/python_api/sphinx_doc_gen.py
@@ -1103,7 +1103,6 @@ context_type_map = {
    "selectable_objects": ("Object", True),
    "selected_asset_files": ("FileSelectEntry", True),
    "selected_bones": ("EditBone", True),
-    "selected_editable_actions": ("Action", True),
    "selected_editable_bones": ("EditBone", True),
    "selected_editable_fcurves": ("FCurve", True),
    "selected_editable_keyframes": ("Keyframe", True),
@@ -1119,13 +1118,12 @@ context_type_map = {
    "selected_pose_bones": ("PoseBone", True),
    "selected_pose_bones_from_active_object": ("PoseBone", True),
    "selected_sequences": ("Sequence", True),
-    "selected_visible_actions": ("Action", True),
    "selected_visible_fcurves": ("FCurve", True),
    "sequences": ("Sequence", True),
    "soft_body": ("SoftBodyModifier", False),
    "speaker": ("Speaker", False),
    "texture": ("Texture", False),
-    "texture_slot": ("TextureSlot", False),
+    "texture_slot": ("MaterialTextureSlot", False),
    "texture_user": ("ID", False),
    "texture_user_property": ("Property", False),
    "ui_list": ("UIList", False),
--- a/extern/hipew/README
+++ b/extern/hipew/README
@@ -1,12 +0,0 @@
-The HIP Extension Wrangler Library (HIPEW) is a cross-platform open-source
-C/C++ library to dynamically load the HIP library.
-
-HIP (Heterogeneous-Compute Interface for Portability) is an API for C++
-programming on AMD GPUs.
-
-It is maintained as part of the Blender project, but included in extern/
-for consistency with CUEW and CLEW libraries.
-
-LICENSE
-
-HIPEW is released under the Apache 2.0 license.
--- a/extern/hipew/README.blender
+++ b/extern/hipew/README.blender
@@ -1,5 +0,0 @@
-Project: Blender
-URL: https://git.blender.org/blender.git
-License: Apache 2.0
-Upstream version: N/A
-Local modifications: None
--- a/extern/hipew/src/hipew.c
+++ b/extern/hipew/src/hipew.c
@@ -219,17 +219,17 @@ static int hipewHasOldDriver(const char *hip_path) {
  DWORD verHandle = 0;
  DWORD verSize = GetFileVersionInfoSize(hip_path, &verHandle);
  int old_driver = 0;
-  if (verSize != 0) {
+  if(verSize != 0) {
    LPSTR verData = (LPSTR)malloc(verSize);
-    if (GetFileVersionInfo(hip_path, verHandle, verSize, verData)) {
+    if(GetFileVersionInfo(hip_path, verHandle, verSize, verData)) {
      LPBYTE lpBuffer = NULL;
      UINT size = 0;
-      if (VerQueryValue(verData, "\\", (VOID FAR * FAR *)&lpBuffer, &size)) {
-        if (size) {
+      if(VerQueryValue(verData, "\\", (VOID FAR * FAR *)&lpBuffer, &size)) {
+        if(size) {
          VS_FIXEDFILEINFO *verInfo = (VS_FIXEDFILEINFO *)lpBuffer;
          /* Magic value from
           * https://docs.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-vs_fixedfileinfo */
-          if (verInfo->dwSignature == 0xfeef04bd) {
+          if(verInfo->dwSignature == 0xfeef04bd) {
            unsigned int fileVersionLS0 = (verInfo->dwFileVersionLS >> 16) & 0xffff;
            unsigned int fileversionLS1 = (verInfo->dwFileVersionLS >> 0) & 0xffff;
            /* Corresponds to versions older than AMD Radeon Pro 21.Q4. */
--- a/extern/nanosvg/README.blender
+++ b/extern/nanosvg/README.blender
@@ -1,7 +1,7 @@
 Project: NanoSVG
 URL: https://github.com/memononen/nanosvg
 License: zlib
-Upstream version: 3cdd4a9d7886
+Upstream version: 
 Local modifications: Added some functionality to manage grease pencil layers

 Added a fix to SVG import arc and float errors (https://developer.blender.org/rB11dc674c78b49fc4e0b7c134c375b6c8b8eacbcc)
--- a/intern/CMakeLists.txt
+++ b/intern/CMakeLists.txt
@@ -25,6 +25,7 @@ add_subdirectory(ghost)
 add_subdirectory(guardedalloc)
 add_subdirectory(libmv)
 add_subdirectory(memutil)
+add_subdirectory(numaapi)
 add_subdirectory(opencolorio)
 add_subdirectory(opensubdiv)
 add_subdirectory(mikktspace)
--- a/intern/atomic/atomic_ops.h
+++ b/intern/atomic/atomic_ops.h
@@ -45,7 +45,7 @@
 */

 /** \file
- * \ingroup intern_atomic
+ * \ingroup Atomic
 *
 * \brief Provides wrapper around system-specific atomic primitives,
 * and some extensions (faked-atomic operations over float numbers).
--- a/intern/atomic/intern/atomic_ops_ext.h
+++ b/intern/atomic/intern/atomic_ops_ext.h
@@ -44,10 +44,6 @@
 * The Original Code is: adapted from jemalloc.
 */

-/** \file
- * \ingroup intern_atomic
- */
-
 #ifndef __ATOMIC_OPS_EXT_H__
 #define __ATOMIC_OPS_EXT_H__

--- a/intern/atomic/intern/atomic_ops_msvc.h
+++ b/intern/atomic/intern/atomic_ops_msvc.h
@@ -5,7 +5,7 @@
 * All rights reserved.
 * Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
 * Copyright (C) 2009-2013 Facebook, Inc.  All rights reserved.
- *
+
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 1. Redistributions of source code must retain the above copyright notice(s),
@@ -13,7 +13,7 @@
 * 2. Redistributions in binary form must reproduce the above copyright notice(s),
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
- *
+
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
@@ -26,10 +26,6 @@
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-/** \file
- * \ingroup intern_atomic
- */
-
 #ifndef __ATOMIC_OPS_MSVC_H__
 #define __ATOMIC_OPS_MSVC_H__

--- a/intern/atomic/intern/atomic_ops_unix.h
+++ b/intern/atomic/intern/atomic_ops_unix.h
@@ -44,10 +44,6 @@
 * The Original Code is: adapted from jemalloc.
 */

-/** \file
- * \ingroup intern_atomic
- */
-
 #ifndef __ATOMIC_OPS_UNIX_H__
 #define __ATOMIC_OPS_UNIX_H__

--- a/intern/atomic/intern/atomic_ops_utils.h
+++ b/intern/atomic/intern/atomic_ops_utils.h
@@ -44,10 +44,6 @@
 * The Original Code is: adapted from jemalloc.
 */

-/** \file
- * \ingroup intern_atomic
- */
-
 #ifndef __ATOMIC_OPS_UTILS_H__
 #define __ATOMIC_OPS_UTILS_H__

--- a/intern/clog/CLG_log.h
+++ b/intern/clog/CLG_log.h
@@ -14,8 +14,11 @@
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

+#ifndef __CLG_LOG_H__
+#define __CLG_LOG_H__
+
 /** \file
- * \ingroup intern_clog
+ * \ingroup clog
 *
 * C Logging Library (clog)
 * ========================
@@ -65,9 +68,6 @@
 * - 4+: May be used for more details than 3, should be avoided but not prevented.
 */

-#ifndef __CLG_LOG_H__
-#define __CLG_LOG_H__
-
 #ifdef __cplusplus
 extern "C" {
 #endif /* __cplusplus */
--- a/intern/clog/clog.c
+++ b/intern/clog/clog.c
@@ -15,7 +15,7 @@
 */

 /** \file
- * \ingroup intern_clog
+ * \ingroup clog
 */

 #include <assert.h>
@@ -388,7 +388,7 @@ static void clg_ctx_fatal_action(CLogContext *ctx)

 static void clg_ctx_backtrace(CLogContext *ctx)
 {
-  /* NOTE: we avoid writing to 'FILE', for back-trace we make an exception,
+  /* Note: we avoid writing to 'FILE', for back-trace we make an exception,
   * if necessary we could have a version of the callback that writes to file
   * descriptor all at once. */
  ctx->callbacks.backtrace_fn(ctx->output_file);
--- a/intern/cycles/app/cycles_standalone.cpp
+++ b/intern/cycles/app/cycles_standalone.cpp
@@ -82,7 +82,7 @@ static void session_print_status()
  string status, substatus;

  /* get status */
-  double progress = options.session->progress.get_progress();
+  float progress = options.session->progress.get_progress();
  options.session->progress.get_status(status, substatus);

  if (substatus != "")
@@ -183,7 +183,7 @@ static void display_info(Progress &progress)

  progress.get_time(total_time, sample_time);
  progress.get_status(status, substatus);
-  double progress_val = progress.get_progress();
+  float progress_val = progress.get_progress();

  if (substatus != "")
    status += ": " + substatus;
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -40,7 +40,6 @@ set(SRC
  object_cull.cpp
  output_driver.cpp
  particles.cpp
-  pointcloud.cpp
  curves.cpp
  logging.cpp
  python.cpp
@@ -88,7 +87,6 @@ endif()

 set(ADDON_FILES
  addon/__init__.py
-  addon/camera.py
  addon/engine.py
  addon/operators.py
  addon/osl.py
@@ -103,11 +101,6 @@ add_definitions(${GL_DEFINITIONS})
 if(WITH_CYCLES_DEVICE_HIP)
  add_definitions(-DWITH_HIP)
 endif()
-
-if(WITH_CYCLES_DEVICE_METAL)
-  add_definitions(-DWITH_METAL)
-endif()
-
 if(WITH_MOD_FLUID)
  add_definitions(-DWITH_FLUID)
 endif()
--- a/intern/cycles/blender/addon/camera.py
+++ b/intern/cycles/blender/addon/camera.py
@@ -1,84 +0,0 @@
-#
-# Copyright 2011-2021 Blender Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# <pep8 compliant>
-
-# Fit to match default projective camera with focal_length 50 and sensor_width 36.
-default_fisheye_polynomial = [-1.1735143712967577e-05,
-                              -0.019988736953434998,
-                              -3.3525322965709175e-06,
-                              3.099275275886036e-06,
-                              -2.6064646454854524e-08]
-
-# Utilities to generate lens polynomials to match built-in camera types, only here
-# for reference at the moment, not used by the code.
-def create_grid(sensor_height, sensor_width):
-    import numpy as np
-    if sensor_height is None:
-        sensor_height = sensor_width / (16 / 9)  # Default aspect ration 16:9
-    uu, vv = np.meshgrid(np.linspace(0, 1, 100), np.linspace(0, 1, 100))
-    uu = (uu - 0.5) * sensor_width
-    vv = (vv - 0.5) * sensor_height
-    rr = np.sqrt(uu ** 2 + vv ** 2)
-    return rr
-
-
-def fisheye_lens_polynomial_from_projective(focal_length=50, sensor_width=36, sensor_height=None):
-    import numpy as np
-    rr = create_grid(sensor_height, sensor_width)
-    polynomial = np.polyfit(rr.flat, (-np.arctan(rr / focal_length)).flat, 4)
-    return list(reversed(polynomial))
-
-
-def fisheye_lens_polynomial_from_projective_fov(fov, sensor_width=36, sensor_height=None):
-    import numpy as np
-    f = sensor_width / 2 / np.tan(fov / 2)
-    return fisheye_lens_polynomial_from_projective(f, sensor_width, sensor_height)
-
-
-def fisheye_lens_polynomial_from_equisolid(lens=10.5, sensor_width=36, sensor_height=None):
-    import numpy as np
-    rr = create_grid(sensor_height, sensor_width)
-    x = rr.reshape(-1)
-    x = np.stack([x**i for i in [1, 2, 3, 4]])
-    y = (-2 * np.arcsin(rr / (2 * lens))).reshape(-1)
-    polynomial = np.linalg.lstsq(x.T, y.T, rcond=None)[0]
-    return [0] + list(polynomial)
-
-
-def fisheye_lens_polynomial_from_equidistant(fov=180, sensor_width=36, sensor_height=None):
-    import numpy as np
-    return [0, -np.radians(fov) / sensor_width, 0, 0, 0]
-
-
-def fisheye_lens_polynomial_from_distorted_projective_polynomial(k1, k2, k3, focal_length=50, sensor_width=36, sensor_height=None):
-    import numpy as np
-    rr = create_grid(sensor_height, sensor_width)
-    r2 = (rr / focal_length) ** 2
-    r4 = r2 * r2
-    r6 = r4 * r2
-    r_coeff = 1 + k1 * r2 + k2 * r4 + k3 * r6
-    polynomial = np.polyfit(rr.flat, (-np.arctan(rr / focal_length * r_coeff)).flat, 4)
-    return list(reversed(polynomial))
-
-def fisheye_lens_polynomial_from_distorted_projective_divisions(k1, k2, focal_length=50, sensor_width=36, sensor_height=None):
-    import numpy as np
-    rr = create_grid(sensor_height, sensor_width)
-    r2 = (rr / focal_length) ** 2
-    r4 = r2 * r2
-    r_coeff = 1 + k1 * r2 + k2 * r4
-    polynomial = np.polyfit(rr.flat, (-np.arctan(rr / focal_length / r_coeff)).flat, 4)
-    return list(reversed(polynomial))
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -28,7 +28,7 @@ def _configure_argument_parser():
                        action='store_true')
    parser.add_argument("--cycles-device",
                        help="Set the device to use for Cycles, overriding user preferences and the scene setting."
-                             "Valid options are 'CPU', 'CUDA', 'OPTIX', 'HIP' or 'METAL'."
+                             "Valid options are 'CPU', 'CUDA', 'OPTIX', or 'HIP'"
                             "Additionally, you can append '+CPU' to any GPU type for hybrid rendering.",
                        default=None)
    return parser
@@ -60,8 +60,9 @@ def init():

    path = os.path.dirname(__file__)
    user_path = os.path.dirname(os.path.abspath(bpy.utils.user_resource('CONFIG', path='')))
+    temp_path = bpy.app.tempdir

-    _cycles.init(path, user_path, bpy.app.background)
+    _cycles.init(path, user_path, temp_path, bpy.app.background)
    _parse_command_line()


--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -33,7 +33,6 @@ from math import pi
 # enums

 from . import engine
-from . import camera

 enum_devices = (
    ('CPU', "CPU", "Use CPU for rendering"),
@@ -73,8 +72,6 @@ enum_panorama_types = (
    ('FISHEYE_EQUISOLID', "Fisheye Equisolid",
                          "Similar to most fisheye modern lens, takes sensor dimensions into consideration"),
    ('MIRRORBALL', "Mirror Ball", "Uses the mirror ball mapping"),
-    ('FISHEYE_LENS_POLYNOMIAL', "Fisheye Lens Polynomial",
-     "Defines the lens projection as polynomial to allow real world camera lenses to be mimicked."),
 )

 enum_curve_shape = (
@@ -114,8 +111,7 @@ enum_device_type = (
    ('CPU', "CPU", "CPU", 0),
    ('CUDA', "CUDA", "CUDA", 1),
    ('OPTIX', "OptiX", "OptiX", 3),
-    ('HIP', "HIP", "HIP", 4),
-    ('METAL', "Metal", "Metal", 5)
+    ("HIP", "HIP", "HIP", 4)
 )

 enum_texture_limit = (
@@ -433,7 +429,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
    )

    direct_light_sampling_type: EnumProperty(
-        name="Direct Light Sampling",
+        name="Direct Light Sampling Type",
        description="The type of strategy used for sampling direct light contributions",
        items=enum_direct_light_sampling_type,
        default='MULTIPLE_IMPORTANCE_SAMPLING',
@@ -794,7 +790,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
    )

    use_auto_tile: BoolProperty(
-        name="Use Tiling",
+        name="Using Tiling",
        description="Render high resolution images in tiles to reduce memory usage, using the specified tile size. Tiles are cached to disk while rendering to save memory",
        default=True,
    )
@@ -802,7 +798,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
        name="Tile Size",
        default=2048,
        description="",
-        min=8, max=8192,
+        min=8, max=16384,
    )

    # Various fine-tuning debug flags
@@ -894,32 +890,6 @@ class CyclesCameraSettings(bpy.types.PropertyGroup):
        default=pi,
    )

-    fisheye_polynomial_k0: FloatProperty(
-        name="Fisheye Polynomial K0",
-        description="Coefficient K0 of the lens polinomial",
-        default=camera.default_fisheye_polynomial[0], precision=6, step=0.1, subtype='ANGLE',
-    )
-    fisheye_polynomial_k1: FloatProperty(
-        name="Fisheye Polynomial K1",
-        description="Coefficient K1 of the lens polinomial",
-        default=camera.default_fisheye_polynomial[1], precision=6, step=0.1, subtype='ANGLE',
-    )
-    fisheye_polynomial_k2: FloatProperty(
-        name="Fisheye Polynomial K2",
-        description="Coefficient K2 of the lens polinomial",
-        default=camera.default_fisheye_polynomial[2], precision=6, step=0.1, subtype='ANGLE',
-    )
-    fisheye_polynomial_k3: FloatProperty(
-        name="Fisheye Polynomial K3",
-        description="Coefficient K3 of the lens polinomial",
-        default=camera.default_fisheye_polynomial[3], precision=6, step=0.1, subtype='ANGLE',
-    )
-    fisheye_polynomial_k4: FloatProperty(
-        name="Fisheye Polynomial K4",
-        description="Coefficient K4 of the lens polinomial",
-        default=camera.default_fisheye_polynomial[4], precision=6, step=0.1, subtype='ANGLE',
-    )
-
    @classmethod
    def register(cls):
        bpy.types.Camera.cycles = PointerProperty(
@@ -1342,7 +1312,8 @@ class CyclesPreferences(bpy.types.AddonPreferences):

    def get_device_types(self, context):
        import _cycles
-        has_cuda, has_optix, has_hip, has_metal = _cycles.get_device_types()
+        has_cuda, has_optix, has_hip = _cycles.get_device_types()
+
        list = [('NONE', "None", "Don't use compute device", 0)]
        if has_cuda:
            list.append(('CUDA', "CUDA", "Use CUDA for GPU acceleration", 1))
@@ -1350,8 +1321,6 @@ class CyclesPreferences(bpy.types.AddonPreferences):
            list.append(('OPTIX', "OptiX", "Use OptiX for GPU acceleration", 3))
        if has_hip:
            list.append(('HIP', "HIP", "Use HIP for GPU acceleration", 4))
-        if has_metal:
-            list.append(('METAL', "Metal", "Use Metal for GPU acceleration", 5))

        return list

@@ -1377,7 +1346,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):

    def update_device_entries(self, device_list):
        for device in device_list:
-            if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP', 'METAL'}:
+            if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP'}:
                continue
            # Try to find existing Device entry
            entry = self.find_existing_device_entry(device)
@@ -1421,7 +1390,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
        import _cycles
        # Ensure `self.devices` is not re-allocated when the second call to
        # get_devices_for_type is made, freeing items from the first list.
-        for device_type in ('CUDA', 'OPTIX', 'HIP', 'METAL'):
+        for device_type in ('CUDA', 'OPTIX', 'HIP'):
            self.update_device_entries(_cycles.available_devices(device_type))

    # Deprecated: use refresh_devices instead.
@@ -1473,8 +1442,6 @@ class CyclesPreferences(bpy.types.AddonPreferences):
                col.label(text="Requires discrete AMD GPU with RDNA architecture", icon='BLANK1')
                if sys.platform[:3] == "win":
                    col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
-            elif device_type == 'METAL':
-                col.label(text="Requires Apple Silicon and macOS 12.0 or newer", icon='BLANK1')
            return

        for device in devices:
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -97,11 +97,6 @@ def use_cpu(context):
    return (get_device_type(context) == 'NONE' or cscene.device == 'CPU')


-def use_metal(context):
-    cscene = context.scene.cycles
-
-    return (get_device_type(context) == 'METAL' and cscene.device == 'GPU')
-
 def use_cuda(context):
    cscene = context.scene.cycles

@@ -1020,7 +1015,7 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
    def poll(cls, context):
        ob = context.object
        if CyclesButtonsPanel.poll(context) and ob:
-            if ob.type in {'MESH', 'CURVE', 'CURVE', 'SURFACE', 'FONT', 'META', 'CAMERA', 'HAIR', 'POINTCLOUD'}:
+            if ob.type in {'MESH', 'CURVE', 'CURVE', 'SURFACE', 'FONT', 'META', 'CAMERA'}:
                return True
            if ob.instance_type == 'COLLECTION' and ob.instance_collection:
                return True
@@ -1824,38 +1819,37 @@ class CYCLES_RENDER_PT_debug(CyclesDebugButtonsPanel, Panel):

    def draw(self, context):
        layout = self.layout
-        layout.use_property_split = True
-        layout.use_property_decorate = False  # No animation.

        scene = context.scene
        cscene = scene.cycles

-        col = layout.column(heading="CPU")
+        col = layout.column()

+        col.label(text="CPU Flags:")
        row = col.row(align=True)
        row.prop(cscene, "debug_use_cpu_sse2", toggle=True)
        row.prop(cscene, "debug_use_cpu_sse3", toggle=True)
        row.prop(cscene, "debug_use_cpu_sse41", toggle=True)
        row.prop(cscene, "debug_use_cpu_avx", toggle=True)
        row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
-        col.prop(cscene, "debug_bvh_layout", text="BVH")
+        col.prop(cscene, "debug_bvh_layout")

        col.separator()

-        col = layout.column(heading="CUDA")
+        col = layout.column()
+        col.label(text="CUDA Flags:")
        col.prop(cscene, "debug_use_cuda_adaptive_compile")
-        col = layout.column(heading="OptiX")
-        col.prop(cscene, "debug_use_optix_debug", text="Module Debug")

        col.separator()

-        col.prop(cscene, "debug_bvh_type", text="Viewport BVH")
+        col = layout.column()
+        col.label(text="OptiX Flags:")
+        col.prop(cscene, "debug_use_optix_debug")

        col.separator()

-        import _cycles
-        if _cycles.with_debug:
-            col.prop(cscene, "direct_light_sampling_type")
+        col = layout.column()
+        col.prop(cscene, "debug_bvh_type")


 class CYCLES_RENDER_PT_simplify(CyclesButtonsPanel, Panel):
--- a/intern/cycles/blender/camera.cpp
+++ b/intern/cycles/blender/camera.cpp
@@ -69,12 +69,6 @@ struct BlenderCamera {
  float pole_merge_angle_from;
  float pole_merge_angle_to;

-  float fisheye_polynomial_k0;
-  float fisheye_polynomial_k1;
-  float fisheye_polynomial_k2;
-  float fisheye_polynomial_k3;
-  float fisheye_polynomial_k4;
-
  enum { AUTO, HORIZONTAL, VERTICAL } sensor_fit;
  float sensor_width;
  float sensor_height;
@@ -206,12 +200,6 @@ static void blender_camera_from_object(BlenderCamera *bcam,
    bcam->longitude_min = RNA_float_get(&ccamera, "longitude_min");
    bcam->longitude_max = RNA_float_get(&ccamera, "longitude_max");

-    bcam->fisheye_polynomial_k0 = RNA_float_get(&ccamera, "fisheye_polynomial_k0");
-    bcam->fisheye_polynomial_k1 = RNA_float_get(&ccamera, "fisheye_polynomial_k1");
-    bcam->fisheye_polynomial_k2 = RNA_float_get(&ccamera, "fisheye_polynomial_k2");
-    bcam->fisheye_polynomial_k3 = RNA_float_get(&ccamera, "fisheye_polynomial_k3");
-    bcam->fisheye_polynomial_k4 = RNA_float_get(&ccamera, "fisheye_polynomial_k4");
-
    bcam->interocular_distance = b_camera.stereo().interocular_distance();
    if (b_camera.stereo().convergence_mode() == BL::CameraStereoData::convergence_mode_PARALLEL) {
      bcam->convergence_distance = FLT_MAX;
@@ -434,8 +422,7 @@ static void blender_camera_sync(Camera *cam,
  cam->set_full_height(height);

  /* panorama sensor */
-  if (bcam->type == CAMERA_PANORAMA && (bcam->panorama_type == PANORAMA_FISHEYE_EQUISOLID ||
-                                        bcam->panorama_type == PANORAMA_FISHEYE_LENS_POLYNOMIAL)) {
+  if (bcam->type == CAMERA_PANORAMA && bcam->panorama_type == PANORAMA_FISHEYE_EQUISOLID) {
    float fit_xratio = (float)bcam->render_width * bcam->pixelaspect.x;
    float fit_yratio = (float)bcam->render_height * bcam->pixelaspect.y;
    bool horizontal_fit;
@@ -478,12 +465,6 @@ static void blender_camera_sync(Camera *cam,
  cam->set_latitude_min(bcam->latitude_min);
  cam->set_latitude_max(bcam->latitude_max);

-  cam->set_fisheye_polynomial_k0(bcam->fisheye_polynomial_k0);
-  cam->set_fisheye_polynomial_k1(bcam->fisheye_polynomial_k1);
-  cam->set_fisheye_polynomial_k2(bcam->fisheye_polynomial_k2);
-  cam->set_fisheye_polynomial_k3(bcam->fisheye_polynomial_k3);
-  cam->set_fisheye_polynomial_k4(bcam->fisheye_polynomial_k4);
-
  cam->set_longitude_min(bcam->longitude_min);
  cam->set_longitude_max(bcam->longitude_max);

--- a/intern/cycles/blender/curves.cpp
+++ b/intern/cycles/blender/curves.cpp
@@ -819,14 +819,11 @@ void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, H
  new_hair.set_used_shaders(used_shaders);

  if (view_layer.use_hair) {
-#ifdef WITH_HAIR_NODES
    if (b_ob_info.object_data.is_a(&RNA_Hair)) {
      /* Hair object. */
      sync_hair(&new_hair, b_ob_info, false);
    }
-    else
-#endif
-    {
+    else {
      /* Particle hair. */
      bool need_undeformed = new_hair.need_attribute(scene, ATTR_STD_GENERATED);
      BL::Mesh b_mesh = object_to_mesh(
@@ -873,15 +870,12 @@ void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph,

  /* Export deformed coordinates. */
  if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
-#ifdef WITH_HAIR_NODES
    if (b_ob_info.object_data.is_a(&RNA_Hair)) {
      /* Hair object. */
      sync_hair(hair, b_ob_info, true, motion_step);
      return;
    }
-    else
-#endif
-    {
+    else {
      /* Particle hair. */
      BL::Mesh b_mesh = object_to_mesh(
          b_data, b_ob_info, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
--- a/intern/cycles/blender/device.cpp
+++ b/intern/cycles/blender/device.cpp
@@ -27,7 +27,6 @@ enum ComputeDevice {
  COMPUTE_DEVICE_CUDA = 1,
  COMPUTE_DEVICE_OPTIX = 3,
  COMPUTE_DEVICE_HIP = 4,
-  COMPUTE_DEVICE_METAL = 5,

  COMPUTE_DEVICE_NUM
 };
@@ -86,9 +85,6 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
      else if (compute_device == COMPUTE_DEVICE_HIP) {
        mask |= DEVICE_MASK_HIP;
      }
-      else if (compute_device == COMPUTE_DEVICE_METAL) {
-        mask |= DEVICE_MASK_METAL;
-      }
      vector<DeviceInfo> devices = Device::available_devices(mask);

      /* Match device preferences and available devices. */
--- a/intern/cycles/blender/display_driver.cpp
+++ b/intern/cycles/blender/display_driver.cpp
@@ -272,300 +272,12 @@ uint BlenderDisplaySpaceShader::get_shader_program()
  return shader_program_;
 }

-/* --------------------------------------------------------------------
- * DrawTile.
- */
-
-/* Higher level representation of a texture from the graphics library. */
-class GLTexture {
- public:
-  /* Global counter for all allocated OpenGL textures used by instances of this class. */
-  static inline std::atomic<int> num_used = 0;
-
-  GLTexture() = default;
-
-  ~GLTexture()
-  {
-    assert(gl_id == 0);
-  }
-
-  GLTexture(const GLTexture &other) = delete;
-  GLTexture &operator=(GLTexture &other) = delete;
-
-  GLTexture(GLTexture &&other) noexcept
-      : gl_id(other.gl_id), width(other.width), height(other.height)
-  {
-    other.reset();
-  }
-
-  GLTexture &operator=(GLTexture &&other)
-  {
-    if (this == &other) {
-      return *this;
-    }
-
-    gl_id = other.gl_id;
-    width = other.width;
-    height = other.height;
-
-    other.reset();
-
-    return *this;
-  }
-
-  bool gl_resources_ensure()
-  {
-    if (gl_id) {
-      return true;
-    }
-
-    /* Create texture. */
-    glGenTextures(1, &gl_id);
-    if (!gl_id) {
-      LOG(ERROR) << "Error creating texture.";
-      return false;
-    }
-
-    /* Configure the texture. */
-    glActiveTexture(GL_TEXTURE0);
-    glBindTexture(GL_TEXTURE_2D, gl_id);
-
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-
-    /* Clamp to edge so that precision issues when zoomed out (which forces linear interpolation)
-     * does not cause unwanted repetition. */
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-
-    glBindTexture(GL_TEXTURE_2D, 0);
-
-    ++num_used;
-
-    return true;
-  }
-
-  void gl_resources_destroy()
-  {
-    if (!gl_id) {
-      return;
-    }
-
-    glDeleteTextures(1, &gl_id);
-
-    reset();
-
-    --num_used;
-  }
-
-  /* OpenGL resource IDs of the texture.
-   *
-   * NOTE: Allocated on the render engine's context. */
-  uint gl_id = 0;
-
-  /* Dimensions of the texture in pixels. */
-  int width = 0;
-  int height = 0;
-
- protected:
-  void reset()
-  {
-    gl_id = 0;
-    width = 0;
-    height = 0;
-  }
-};
-
-/* Higher level representation of a Pixel Buffer Object (PBO) from the graphics library. */
-class GLPixelBufferObject {
- public:
-  /* Global counter for all allocated OpenGL PBOs used by instances of this class. */
-  static inline std::atomic<int> num_used = 0;
-
-  GLPixelBufferObject() = default;
-
-  ~GLPixelBufferObject()
-  {
-    assert(gl_id == 0);
-  }
-
-  GLPixelBufferObject(const GLPixelBufferObject &other) = delete;
-  GLPixelBufferObject &operator=(GLPixelBufferObject &other) = delete;
-
-  GLPixelBufferObject(GLPixelBufferObject &&other) noexcept
-      : gl_id(other.gl_id), width(other.width), height(other.height)
-  {
-    other.reset();
-  }
-
-  GLPixelBufferObject &operator=(GLPixelBufferObject &&other)
-  {
-    if (this == &other) {
-      return *this;
-    }
-
-    gl_id = other.gl_id;
-    width = other.width;
-    height = other.height;
-
-    other.reset();
-
-    return *this;
-  }
-
-  bool gl_resources_ensure()
-  {
-    if (gl_id) {
-      return true;
-    }
-
-    glGenBuffers(1, &gl_id);
-    if (!gl_id) {
-      LOG(ERROR) << "Error creating texture pixel buffer object.";
-      return false;
-    }
-
-    ++num_used;
-
-    return true;
-  }
-
-  void gl_resources_destroy()
-  {
-    if (!gl_id) {
-      return;
-    }
-
-    glDeleteBuffers(1, &gl_id);
-
-    reset();
-
-    --num_used;
-  }
-
-  /* OpenGL resource IDs of the PBO.
-   *
-   * NOTE: Allocated on the render engine's context. */
-  uint gl_id = 0;
-
-  /* Dimensions of the PBO. */
-  int width = 0;
-  int height = 0;
-
- protected:
-  void reset()
-  {
-    gl_id = 0;
-    width = 0;
-    height = 0;
-  }
-};
-
-class DrawTile {
- public:
-  DrawTile() = default;
-  ~DrawTile() = default;
-
-  DrawTile(const DrawTile &other) = delete;
-  DrawTile &operator=(const DrawTile &other) = delete;
-
-  DrawTile(DrawTile &&other) noexcept = default;
-
-  DrawTile &operator=(DrawTile &&other) = default;
-
-  bool gl_resources_ensure()
-  {
-    if (!texture.gl_resources_ensure()) {
-      gl_resources_destroy();
-      return false;
-    }
-
-    if (!gl_vertex_buffer) {
-      glGenBuffers(1, &gl_vertex_buffer);
-      if (!gl_vertex_buffer) {
-        LOG(ERROR) << "Error allocating tile VBO.";
-        gl_resources_destroy();
-        return false;
-      }
-    }
-
-    return true;
-  }
-
-  void gl_resources_destroy()
-  {
-    texture.gl_resources_destroy();
-
-    if (gl_vertex_buffer) {
-      glDeleteBuffers(1, &gl_vertex_buffer);
-      gl_vertex_buffer = 0;
-    }
-  }
-
-  inline bool ready_to_draw() const
-  {
-    return texture.gl_id != 0;
-  }
-
-  /* Texture which contains pixels of the tile. */
-  GLTexture texture;
-
-  /* Display parameters the texture of this tile has been updated for. */
-  BlenderDisplayDriver::Params params;
-
-  /* OpenGL resources needed for drawing. */
-  uint gl_vertex_buffer = 0;
-};
-
-class DrawTileAndPBO {
- public:
-  bool gl_resources_ensure()
-  {
-    if (!tile.gl_resources_ensure() || !buffer_object.gl_resources_ensure()) {
-      gl_resources_destroy();
-      return false;
-    }
-
-    return true;
-  }
-
-  void gl_resources_destroy()
-  {
-    tile.gl_resources_destroy();
-    buffer_object.gl_resources_destroy();
-  }
-
-  DrawTile tile;
-  GLPixelBufferObject buffer_object;
-};
-
 /* --------------------------------------------------------------------
 * BlenderDisplayDriver.
 */

-struct BlenderDisplayDriver::Tiles {
-  /* Resources of a tile which is being currently rendered. */
-  DrawTileAndPBO current_tile;
-
-  /* All tiles which rendering is finished and which content will not be changed. */
-  struct {
-    vector<DrawTile> tiles;
-
-    void gl_resources_destroy_and_clear()
-    {
-      for (DrawTile &tile : tiles) {
-        tile.gl_resources_destroy();
-      }
-
-      tiles.clear();
-    }
-  } finished_tiles;
-};
-
 BlenderDisplayDriver::BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene)
-    : b_engine_(b_engine),
-      display_shader_(BlenderDisplayShader::create(b_engine, b_scene)),
-      tiles_(make_unique<Tiles>())
+    : b_engine_(b_engine), display_shader_(BlenderDisplayShader::create(b_engine, b_scene))
 {
  /* Create context while on the main thread. */
  gl_context_create();
@@ -580,21 +292,6 @@ BlenderDisplayDriver::~BlenderDisplayDriver()
 * Update procedure.
 */

-void BlenderDisplayDriver::next_tile_begin()
-{
-  if (!tiles_->current_tile.tile.ready_to_draw()) {
-    LOG(ERROR)
-        << "Unexpectedly moving to the next tile without any data provided for current tile.";
-    return;
-  }
-
-  /* Moving to the next tile without giving render data for the current tile is not an expected
-   * situation. */
-  DCHECK(!need_clear_);
-
-  tiles_->finished_tiles.tiles.emplace_back(std::move(tiles_->current_tile.tile));
-}
-
 bool BlenderDisplayDriver::update_begin(const Params &params,
                                        int texture_width,
                                        int texture_height)
@@ -615,33 +312,24 @@ bool BlenderDisplayDriver::update_begin(const Params &params,
    glWaitSync((GLsync)gl_render_sync_, 0, GL_TIMEOUT_IGNORED);
  }

-  DrawTile &current_tile = tiles_->current_tile.tile;
-  GLPixelBufferObject &current_tile_buffer_object = tiles_->current_tile.buffer_object;
-
-  /* Clear storage of all finished tiles when display clear is requested.
-   * Do it when new tile data is provided to handle the display clear flag in a single place.
-   * It also makes the logic reliable from the whether drawing did happen or not point of view. */
-  if (need_clear_) {
-    tiles_->finished_tiles.gl_resources_destroy_and_clear();
-    need_clear_ = false;
-  }
-
-  if (!tiles_->current_tile.gl_resources_ensure()) {
-    tiles_->current_tile.gl_resources_destroy();
+  if (!gl_texture_resources_ensure()) {
    gl_context_disable();
    return false;
  }

  /* Update texture dimensions if needed. */
-  if (current_tile.texture.width != texture_width ||
-      current_tile.texture.height != texture_height) {
+  if (texture_.width != texture_width || texture_.height != texture_height) {
    glActiveTexture(GL_TEXTURE0);
-    glBindTexture(GL_TEXTURE_2D, current_tile.texture.gl_id);
+    glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
    glTexImage2D(
        GL_TEXTURE_2D, 0, GL_RGBA16F, texture_width, texture_height, 0, GL_RGBA, GL_HALF_FLOAT, 0);
-    current_tile.texture.width = texture_width;
-    current_tile.texture.height = texture_height;
+    texture_.width = texture_width;
+    texture_.height = texture_height;
    glBindTexture(GL_TEXTURE_2D, 0);
+
+    /* Texture did change, and no pixel storage was provided. Tag for an explicit zeroing out to
+     * avoid undefined content. */
+    texture_.need_clear = true;
  }

  /* Update PBO dimensions if needed.
@@ -653,58 +341,29 @@ bool BlenderDisplayDriver::update_begin(const Params &params,
   * sending too much data to GPU when resolution divider is not 1. */
  /* TODO(sergey): Investigate whether keeping the PBO exact size of the texture makes non-interop
   * mode faster. */
-  const int buffer_width = params.size.x;
-  const int buffer_height = params.size.y;
-  if (current_tile_buffer_object.width != buffer_width ||
-      current_tile_buffer_object.height != buffer_height) {
+  const int buffer_width = params.full_size.x;
+  const int buffer_height = params.full_size.y;
+  if (texture_.buffer_width != buffer_width || texture_.buffer_height != buffer_height) {
    const size_t size_in_bytes = sizeof(half4) * buffer_width * buffer_height;
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, current_tile_buffer_object.gl_id);
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
    glBufferData(GL_PIXEL_UNPACK_BUFFER, size_in_bytes, 0, GL_DYNAMIC_DRAW);
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);

-    current_tile_buffer_object.width = buffer_width;
-    current_tile_buffer_object.height = buffer_height;
+    texture_.buffer_width = buffer_width;
+    texture_.buffer_height = buffer_height;
  }

-  /* Store an updated parameters of the current tile.
-   * In theory it is only needed once per update of the tile, but doing it on every update is
-   * the easiest and is not expensive. */
-  tiles_->current_tile.tile.params = params;
+  /* New content will be provided to the texture in one way or another, so mark this in a
+   * centralized place. */
+  texture_.need_update = true;
+
+  texture_.params = params;

  return true;
 }

-static void update_tile_texture_pixels(const DrawTileAndPBO &tile)
-{
-  const GLTexture &texture = tile.tile.texture;
-
-  DCHECK_NE(tile.buffer_object.gl_id, 0);
-
-  glActiveTexture(GL_TEXTURE0);
-  glBindTexture(GL_TEXTURE_2D, texture.gl_id);
-  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, tile.buffer_object.gl_id);
-
-  glTexSubImage2D(
-      GL_TEXTURE_2D, 0, 0, 0, texture.width, texture.height, GL_RGBA, GL_HALF_FLOAT, 0);
-
-  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-  glBindTexture(GL_TEXTURE_2D, 0);
-}
-
 void BlenderDisplayDriver::update_end()
 {
-  /* Unpack the PBO into the texture as soon as the new content is provided.
-   *
-   * This allows to ensure that the unpacking happens while resources like graphics interop (which
-   * lifetime is outside of control of the display driver) are still valid, as well as allows to
-   * move the tile from being current to finished immediately after this call.
-   *
-   * One concern with this approach is that if the update happens more often than drawing then
-   * doing the unpack here occupies GPU transfer for no good reason. However, the render scheduler
-   * takes care of ensuring updates don't happen that often. In regular applications redraw will
-   * happen much more often than this update. */
-  update_tile_texture_pixels(tiles_->current_tile);
-
  gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
  glFlush();

@@ -717,11 +376,7 @@ void BlenderDisplayDriver::update_end()

 half4 *BlenderDisplayDriver::map_texture_buffer()
 {
-  const uint pbo_gl_id = tiles_->current_tile.buffer_object.gl_id;
-
-  DCHECK_NE(pbo_gl_id, 0);
-
-  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_gl_id);
+  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);

  half4 *mapped_rgba_pixels = reinterpret_cast<half4 *>(
      glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY));
@@ -729,6 +384,15 @@ half4 *BlenderDisplayDriver::map_texture_buffer()
    LOG(ERROR) << "Error mapping BlenderDisplayDriver pixel buffer object.";
  }

+  if (texture_.need_clear) {
+    const int64_t texture_width = texture_.width;
+    const int64_t texture_height = texture_.height;
+    memset(reinterpret_cast<void *>(mapped_rgba_pixels),
+           0,
+           texture_width * texture_height * sizeof(half4));
+    texture_.need_clear = false;
+  }
+
  return mapped_rgba_pixels;
 }

@@ -747,9 +411,12 @@ BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get
 {
  GraphicsInterop interop_dst;

-  interop_dst.buffer_width = tiles_->current_tile.buffer_object.width;
-  interop_dst.buffer_height = tiles_->current_tile.buffer_object.height;
-  interop_dst.opengl_pbo_id = tiles_->current_tile.buffer_object.gl_id;
+  interop_dst.buffer_width = texture_.buffer_width;
+  interop_dst.buffer_height = texture_.buffer_height;
+  interop_dst.opengl_pbo_id = texture_.gl_pbo_id;
+
+  interop_dst.need_clear = texture_.need_clear;
+  texture_.need_clear = false;

  return interop_dst;
 }
@@ -770,7 +437,7 @@ void BlenderDisplayDriver::graphics_interop_deactivate()

 void BlenderDisplayDriver::clear()
 {
-  need_clear_ = true;
+  texture_.need_clear = true;
 }

 void BlenderDisplayDriver::set_zoom(float zoom_x, float zoom_y)
@@ -778,155 +445,26 @@ void BlenderDisplayDriver::set_zoom(float zoom_x, float zoom_y)
  zoom_ = make_float2(zoom_x, zoom_y);
 }

-/* Update vertex buffer with new coordinates of vertex positions and texture coordinates.
- * This buffer is used to render texture in the viewport.
- *
- * NOTE: The buffer needs to be bound. */
-static void vertex_buffer_update(const DisplayDriver::Params &params)
-{
-  const int x = params.full_offset.x;
-  const int y = params.full_offset.y;
-
-  const int width = params.size.x;
-  const int height = params.size.y;
-
-  /* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be
-   * rendered. */
-  glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
-
-  float *vpointer = reinterpret_cast<float *>(glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY));
-  if (!vpointer) {
-    return;
-  }
-
-  vpointer[0] = 0.0f;
-  vpointer[1] = 0.0f;
-  vpointer[2] = x;
-  vpointer[3] = y;
-
-  vpointer[4] = 1.0f;
-  vpointer[5] = 0.0f;
-  vpointer[6] = x + width;
-  vpointer[7] = y;
-
-  vpointer[8] = 1.0f;
-  vpointer[9] = 1.0f;
-  vpointer[10] = x + width;
-  vpointer[11] = y + height;
-
-  vpointer[12] = 0.0f;
-  vpointer[13] = 1.0f;
-  vpointer[14] = x;
-  vpointer[15] = y + height;
-
-  glUnmapBuffer(GL_ARRAY_BUFFER);
-}
-
-static void draw_tile(const float2 &zoom,
-                      const int texcoord_attribute,
-                      const int position_attribute,
-                      const DrawTile &draw_tile)
-{
-  if (!draw_tile.ready_to_draw()) {
-    return;
-  }
-
-  const GLTexture &texture = draw_tile.texture;
-
-  DCHECK_NE(texture.gl_id, 0);
-  DCHECK_NE(draw_tile.gl_vertex_buffer, 0);
-
-  glBindBuffer(GL_ARRAY_BUFFER, draw_tile.gl_vertex_buffer);
-
-  /* Draw at the parameters for which the texture has been updated for. This allows to always draw
-   * texture during bordered-rendered camera view without flickering. The validness of the display
-   * parameters for a texture is guaranteed by the initial "clear" state which makes drawing to
-   * have an early output.
-   *
-   * Such approach can cause some extra "jelly" effect during panning, but it is not more jelly
-   * than overlay of selected objects. Also, it's possible to redraw texture at an intersection of
-   * the texture draw parameters and the latest updated draw parameters (although, complexity of
-   * doing it might not worth it. */
-  vertex_buffer_update(draw_tile.params);
-
-  glBindTexture(GL_TEXTURE_2D, texture.gl_id);
-
-  /* Trick to keep sharp rendering without jagged edges on all GPUs.
-   *
-   * The idea here is to enforce driver to use linear interpolation when the image is not zoomed
-   * in.
-   * For the render result with a resolution divider in effect we always use nearest interpolation.
-   *
-   * Use explicit MIN assignment to make sure the driver does not have an undefined behavior at
-   * the zoom level 1. The MAG filter is always NEAREST. */
-  const float zoomed_width = draw_tile.params.size.x * zoom.x;
-  const float zoomed_height = draw_tile.params.size.y * zoom.y;
-  if (texture.width != draw_tile.params.size.x || texture.height != draw_tile.params.size.y) {
-    /* Resolution divider is different from 1, force nearest interpolation. */
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-  }
-  else if (zoomed_width - draw_tile.params.size.x > 0.5f ||
-           zoomed_height - draw_tile.params.size.y > 0.5f) {
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-  }
-  else {
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-  }
-
-  glVertexAttribPointer(
-      texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
-  glVertexAttribPointer(position_attribute,
-                        2,
-                        GL_FLOAT,
-                        GL_FALSE,
-                        4 * sizeof(float),
-                        (const GLvoid *)(sizeof(float) * 2));
-
-  glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
-}
-
-void BlenderDisplayDriver::flush()
-{
-  /* This is called from the render thread that also calls update_begin/end, right before ending
-   * the render loop. We wait for any queued PBO and render commands to be done, before destroying
-   * the render thread and activating the context in the main thread to destroy resources.
-   *
-   * If we don't do this, the NVIDIA driver hangs for a few seconds for when ending 3D viewport
-   * rendering, for unknown reasons. This was found with NVIDIA driver version 470.73 and a Quadro
-   * RTX 6000 on Linux. */
-  if (!gl_context_enable()) {
-    return;
-  }
-
-  if (gl_upload_sync_) {
-    glWaitSync((GLsync)gl_upload_sync_, 0, GL_TIMEOUT_IGNORED);
-  }
-
-  if (gl_render_sync_) {
-    glWaitSync((GLsync)gl_render_sync_, 0, GL_TIMEOUT_IGNORED);
-  }
-
-  gl_context_disable();
-}
-
 void BlenderDisplayDriver::draw(const Params &params)
 {
  /* See do_update_begin() for why no locking is required here. */
  const bool transparent = true;  // TODO(sergey): Derive this from Film.

+  if (!gl_draw_resources_ensure()) {
+    return;
+  }
+
  if (use_gl_context_) {
    gl_context_mutex_.lock();
  }

-  if (need_clear_) {
+  if (texture_.need_clear) {
    /* Texture is requested to be cleared and was not yet cleared.
     *
     * Do early return which should be equivalent of drawing all-zero texture.
     * Watch out for the lock though so that the clear happening during update is properly
     * synchronized here. */
-    if (use_gl_context_) {
-      gl_context_mutex_.unlock();
-    }
+    gl_context_mutex_.unlock();
    return;
  }

@@ -939,37 +477,66 @@ void BlenderDisplayDriver::draw(const Params &params)
    glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
  }

-  glActiveTexture(GL_TEXTURE0);
+  display_shader_->bind(params.full_size.x, params.full_size.y);

-  /* NOTE: The VAO is to be allocated on the drawing context as it is not shared across contexts.
-   * Simplest is to allocate it on every redraw so that it is possible to destroy it from a
-   * correct context. */
+  glActiveTexture(GL_TEXTURE0);
+  glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
+
+  /* Trick to keep sharp rendering without jagged edges on all GPUs.
+   *
+   * The idea here is to enforce driver to use linear interpolation when the image is not zoomed
+   * in.
+   * For the render result with a resolution divider in effect we always use nearest interpolation.
+   *
+   * Use explicit MIN assignment to make sure the driver does not have an undefined behavior at
+   * the zoom level 1. The MAG filter is always NEAREST. */
+  const float zoomed_width = params.size.x * zoom_.x;
+  const float zoomed_height = params.size.y * zoom_.y;
+  if (texture_.width != params.size.x || texture_.height != params.size.y) {
+    /* Resolution divider is different from 1, force nearest interpolation. */
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+  }
+  else if (zoomed_width - params.size.x > 0.5f || zoomed_height - params.size.y > 0.5f) {
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+  }
+  else {
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+  }
+
+  glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_);
+
+  texture_update_if_needed();
+  vertex_buffer_update(params);
+
+  /* TODO(sergey): Does it make sense/possible to cache/reuse the VAO? */
  GLuint vertex_array_object;
  glGenVertexArrays(1, &vertex_array_object);
  glBindVertexArray(vertex_array_object);

-  display_shader_->bind(params.full_size.x, params.full_size.y);
-
  const int texcoord_attribute = display_shader_->get_tex_coord_attrib_location();
  const int position_attribute = display_shader_->get_position_attrib_location();

  glEnableVertexAttribArray(texcoord_attribute);
  glEnableVertexAttribArray(position_attribute);

-  draw_tile(zoom_, texcoord_attribute, position_attribute, tiles_->current_tile.tile);
+  glVertexAttribPointer(
+      texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
+  glVertexAttribPointer(position_attribute,
+                        2,
+                        GL_FLOAT,
+                        GL_FALSE,
+                        4 * sizeof(float),
+                        (const GLvoid *)(sizeof(float) * 2));

-  for (const DrawTile &tile : tiles_->finished_tiles.tiles) {
-    draw_tile(zoom_, texcoord_attribute, position_attribute, tile);
-  }
+  glDrawArrays(GL_TRIANGLE_FAN, 0, 4);

-  display_shader_->unbind();
-
-  glBindTexture(GL_TEXTURE_2D, 0);
-  glBindVertexArray(0);
  glBindBuffer(GL_ARRAY_BUFFER, 0);
+  glBindTexture(GL_TEXTURE_2D, 0);

  glDeleteVertexArrays(1, &vertex_array_object);

+  display_shader_->unbind();
+
  if (transparent) {
    glDisable(GL_BLEND);
  }
@@ -977,11 +544,6 @@ void BlenderDisplayDriver::draw(const Params &params)
  gl_render_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
  glFlush();

-  if (VLOG_IS_ON(5)) {
-    VLOG(5) << "Number of textures: " << GLTexture::num_used;
-    VLOG(5) << "Number of PBOs: " << GLPixelBufferObject::num_used;
-  }
-
  if (use_gl_context_) {
    gl_context_mutex_.unlock();
  }
@@ -1056,16 +618,154 @@ void BlenderDisplayDriver::gl_context_dispose()
  }
 }

+bool BlenderDisplayDriver::gl_draw_resources_ensure()
+{
+  if (!texture_.gl_id) {
+    /* If there is no texture allocated, there is nothing to draw. Inform the draw call that it can
+     * can not continue. Note that this is not an unrecoverable error, so once the texture is known
+     * we will come back here and create all the GPU resources needed for draw. */
+    return false;
+  }
+
+  if (gl_draw_resource_creation_attempted_) {
+    return gl_draw_resources_created_;
+  }
+  gl_draw_resource_creation_attempted_ = true;
+
+  if (!vertex_buffer_) {
+    glGenBuffers(1, &vertex_buffer_);
+    if (!vertex_buffer_) {
+      LOG(ERROR) << "Error creating vertex buffer.";
+      return false;
+    }
+  }
+
+  gl_draw_resources_created_ = true;
+
+  return true;
+}
+
 void BlenderDisplayDriver::gl_resources_destroy()
 {
  gl_context_enable();

-  tiles_->current_tile.gl_resources_destroy();
-  tiles_->finished_tiles.gl_resources_destroy_and_clear();
+  if (vertex_buffer_ != 0) {
+    glDeleteBuffers(1, &vertex_buffer_);
+  }
+
+  if (texture_.gl_pbo_id) {
+    glDeleteBuffers(1, &texture_.gl_pbo_id);
+    texture_.gl_pbo_id = 0;
+  }
+
+  if (texture_.gl_id) {
+    glDeleteTextures(1, &texture_.gl_id);
+    texture_.gl_id = 0;
+  }

  gl_context_disable();

  gl_context_dispose();
 }

+bool BlenderDisplayDriver::gl_texture_resources_ensure()
+{
+  if (texture_.creation_attempted) {
+    return texture_.is_created;
+  }
+  texture_.creation_attempted = true;
+
+  DCHECK(!texture_.gl_id);
+  DCHECK(!texture_.gl_pbo_id);
+
+  /* Create texture. */
+  glGenTextures(1, &texture_.gl_id);
+  if (!texture_.gl_id) {
+    LOG(ERROR) << "Error creating texture.";
+    return false;
+  }
+
+  /* Configure the texture. */
+  glActiveTexture(GL_TEXTURE0);
+  glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
+  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+  glBindTexture(GL_TEXTURE_2D, 0);
+
+  /* Create PBO for the texture. */
+  glGenBuffers(1, &texture_.gl_pbo_id);
+  if (!texture_.gl_pbo_id) {
+    LOG(ERROR) << "Error creating texture pixel buffer object.";
+    return false;
+  }
+
+  /* Creation finished with a success. */
+  texture_.is_created = true;
+
+  return true;
+}
+
+void BlenderDisplayDriver::texture_update_if_needed()
+{
+  if (!texture_.need_update) {
+    return;
+  }
+
+  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
+  glTexSubImage2D(
+      GL_TEXTURE_2D, 0, 0, 0, texture_.width, texture_.height, GL_RGBA, GL_HALF_FLOAT, 0);
+  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+  texture_.need_update = false;
+}
+
+void BlenderDisplayDriver::vertex_buffer_update(const Params & /*params*/)
+{
+  /* Draw at the parameters for which the texture has been updated for. This allows to always draw
+   * texture during bordered-rendered camera view without flickering. The validness of the display
+   * parameters for a texture is guaranteed by the initial "clear" state which makes drawing to
+   * have an early output.
+   *
+   * Such approach can cause some extra "jelly" effect during panning, but it is not more jelly
+   * than overlay of selected objects. Also, it's possible to redraw texture at an intersection of
+   * the texture draw parameters and the latest updated draw parameters (although, complexity of
+   * doing it might not worth it. */
+  const int x = texture_.params.full_offset.x;
+  const int y = texture_.params.full_offset.y;
+
+  const int width = texture_.params.size.x;
+  const int height = texture_.params.size.y;
+
+  /* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be
+   * rendered. */
+  glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
+
+  float *vpointer = reinterpret_cast<float *>(glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY));
+  if (!vpointer) {
+    return;
+  }
+
+  vpointer[0] = 0.0f;
+  vpointer[1] = 0.0f;
+  vpointer[2] = x;
+  vpointer[3] = y;
+
+  vpointer[4] = 1.0f;
+  vpointer[5] = 0.0f;
+  vpointer[6] = x + width;
+  vpointer[7] = y;
+
+  vpointer[8] = 1.0f;
+  vpointer[9] = 1.0f;
+  vpointer[10] = x + width;
+  vpointer[11] = y + height;
+
+  vpointer[12] = 0.0f;
+  vpointer[13] = 1.0f;
+  vpointer[14] = x;
+  vpointer[15] = y + height;
+
+  glUnmapBuffer(GL_ARRAY_BUFFER);
+}
+
 CCL_NAMESPACE_END
--- a/intern/cycles/blender/display_driver.h
+++ b/intern/cycles/blender/display_driver.h
@@ -26,7 +26,6 @@

 #include "util/thread.h"
 #include "util/unique_ptr.h"
-#include "util/vector.h"

 CCL_NAMESPACE_BEGIN

@@ -113,8 +112,6 @@ class BlenderDisplayDriver : public DisplayDriver {
  void set_zoom(float zoom_x, float zoom_y);

 protected:
-  virtual void next_tile_begin() override;
-
  virtual bool update_begin(const Params &params, int texture_width, int texture_height) override;
  virtual void update_end() override;

@@ -125,17 +122,33 @@ class BlenderDisplayDriver : public DisplayDriver {

  virtual void draw(const Params &params) override;

-  virtual void flush() override;
-
  /* Helper function which allocates new GPU context. */
  void gl_context_create();
  bool gl_context_enable();
  void gl_context_disable();
  void gl_context_dispose();

+  /* Make sure texture is allocated and its initial configuration is performed. */
+  bool gl_texture_resources_ensure();
+
+  /* Ensure all runtime GPU resources needed for drawing are allocated.
+   * Returns true if all resources needed for drawing are available. */
+  bool gl_draw_resources_ensure();
+
  /* Destroy all GPU resources which are being used by this object. */
  void gl_resources_destroy();

+  /* Update GPU texture dimensions and content if needed (new pixel data was provided).
+   *
+   * NOTE: The texture needs to be bound. */
+  void texture_update_if_needed();
+
+  /* Update vertex buffer with new coordinates of vertex positions and texture coordinates.
+   * This buffer is used to render texture in the viewport.
+   *
+   * NOTE: The buffer needs to be bound. */
+  void vertex_buffer_update(const Params &params);
+
  BL::RenderEngine b_engine_;

  /* OpenGL context which is used the render engine doesn't have its own. */
@@ -146,14 +159,50 @@ class BlenderDisplayDriver : public DisplayDriver {
  /* Mutex used to guard the `gl_context_`. */
  thread_mutex gl_context_mutex_;

-  /* Content of the display is to be filled with zeroes. */
-  std::atomic<bool> need_clear_ = true;
+  /* Texture which contains pixels of the render result. */
+  struct {
+    /* Indicates whether texture creation was attempted and succeeded.
+     * Used to avoid multiple attempts of texture creation on GPU issues or GPU context
+     * misconfiguration. */
+    bool creation_attempted = false;
+    bool is_created = false;
+
+    /* OpenGL resource IDs of the texture itself and Pixel Buffer Object (PBO) used to write
+     * pixels to it.
+     *
+     * NOTE: Allocated on the engine's context. */
+    uint gl_id = 0;
+    uint gl_pbo_id = 0;
+
+    /* Is true when new data was written to the PBO, meaning, the texture might need to be resized
+     * and new data is to be uploaded to the GPU. */
+    bool need_update = false;
+
+    /* Content of the texture is to be filled with zeroes. */
+    std::atomic<bool> need_clear = true;
+
+    /* Dimensions of the texture in pixels. */
+    int width = 0;
+    int height = 0;
+
+    /* Dimensions of the underlying PBO. */
+    int buffer_width = 0;
+    int buffer_height = 0;
+
+    /* Display parameters the texture has been updated for. */
+    Params params;
+  } texture_;

  unique_ptr<BlenderDisplayShader> display_shader_;

-  /* Opaque storage for an internal state and data for tiles. */
-  struct Tiles;
-  unique_ptr<Tiles> tiles_;
+  /* Special track of whether GPU resources were attempted to be created, to avoid attempts of
+   * their re-creation on failure on every redraw. */
+  bool gl_draw_resource_creation_attempted_ = false;
+  bool gl_draw_resources_created_ = false;
+
+  /* Vertex buffer which hold vertices of a triangle fan which is textures with the texture
+   * holding the render result. */
+  uint vertex_buffer_ = 0;

  void *gl_render_sync_ = nullptr;
  void *gl_upload_sync_ = nullptr;
--- a/intern/cycles/blender/geometry.cpp
+++ b/intern/cycles/blender/geometry.cpp
@@ -19,7 +19,6 @@
 #include "scene/hair.h"
 #include "scene/mesh.h"
 #include "scene/object.h"
-#include "scene/pointcloud.h"
 #include "scene/volume.h"

 #include "blender/sync.h"
@@ -32,18 +31,10 @@ CCL_NAMESPACE_BEGIN

 static Geometry::Type determine_geom_type(BObjectInfo &b_ob_info, bool use_particle_hair)
 {
-#ifdef WITH_HAIR_NODES
  if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
-#else
-  if (use_particle_hair) {
-#endif
    return Geometry::HAIR;
  }

-  if (b_ob_info.object_data.is_a(&RNA_PointCloud)) {
-    return Geometry::POINTCLOUD;
-  }
-
  if (b_ob_info.object_data.is_a(&RNA_Volume) ||
      (b_ob_info.object_data == b_ob_info.real_object.data() &&
       object_fluid_gas_domain_find(b_ob_info.real_object))) {
@@ -116,9 +107,6 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
    else if (geom_type == Geometry::VOLUME) {
      geom = scene->create_node<Volume>();
    }
-    else if (geom_type == Geometry::POINTCLOUD) {
-      geom = scene->create_node<PointCloud>();
-    }
    else {
      geom = scene->create_node<Mesh>();
    }
@@ -178,10 +166,6 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
      Volume *volume = static_cast<Volume *>(geom);
      sync_volume(b_ob_info, volume);
    }
-    else if (geom_type == Geometry::POINTCLOUD) {
-      PointCloud *pointcloud = static_cast<PointCloud *>(geom);
-      sync_pointcloud(pointcloud, b_ob_info);
-    }
    else {
      Mesh *mesh = static_cast<Mesh *>(geom);
      sync_mesh(b_depsgraph, b_ob_info, mesh);
@@ -231,11 +215,7 @@ void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
    if (progress.get_cancel())
      return;

-#ifdef WITH_HAIR_NODES
    if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
-#else
-    if (use_particle_hair) {
-#endif
      Hair *hair = static_cast<Hair *>(geom);
      sync_hair_motion(b_depsgraph, b_ob_info, hair, motion_step);
    }
@@ -243,10 +223,6 @@ void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
             object_fluid_gas_domain_find(b_ob_info.real_object)) {
      /* No volume motion blur support yet. */
    }
-    else if (b_ob_info.object_data.is_a(&RNA_PointCloud)) {
-      PointCloud *pointcloud = static_cast<PointCloud *>(geom);
-      sync_pointcloud_motion(pointcloud, b_ob_info, motion_step);
-    }
    else {
      Mesh *mesh = static_cast<Mesh *>(geom);
      sync_mesh_motion(b_depsgraph, b_ob_info, mesh, motion_step);
--- a/intern/cycles/blender/image.cpp
+++ b/intern/cycles/blender/image.cpp
@@ -24,14 +24,8 @@ CCL_NAMESPACE_BEGIN

 /* Packed Images */

-BlenderImageLoader::BlenderImageLoader(BL::Image b_image,
-                                       const int frame,
-                                       const bool is_preview_render)
-    : b_image(b_image),
-      frame(frame),
-      /* Don't free cache for preview render to avoid race condition from T93560, to be fixed
-         properly later as we are close to release. */
-      free_cache(!is_preview_render && !b_image.has_data())
+BlenderImageLoader::BlenderImageLoader(BL::Image b_image, int frame)
+    : b_image(b_image), frame(frame), free_cache(!b_image.has_data())
 {
 }

--- a/intern/cycles/blender/image.h
+++ b/intern/cycles/blender/image.h
@@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN

 class BlenderImageLoader : public ImageLoader {
 public:
-  BlenderImageLoader(BL::Image b_image, const int frame, const bool is_preview_render);
+  BlenderImageLoader(BL::Image b_image, int frame);

  bool load_metadata(const ImageDeviceFeatures &features, ImageMetaData &metadata) override;
  bool load_pixels(const ImageMetaData &metadata,
--- a/intern/cycles/blender/mesh.cpp
+++ b/intern/cycles/blender/mesh.cpp
@@ -1086,6 +1086,40 @@ static void create_subd_mesh(Scene *scene,

 /* Sync */

+/* Check whether some of "built-in" motion-related attributes are needed to be exported (includes
+ * things like velocity from cache modifier, fluid simulation).
+ *
+ * NOTE: This code is run prior to object motion blur initialization. so can not access properties
+ * set by `sync_object_motion_init()`. */
+static bool mesh_need_motion_attribute(BObjectInfo &b_ob_info, Scene *scene)
+{
+  const Scene::MotionType need_motion = scene->need_motion();
+  if (need_motion == Scene::MOTION_NONE) {
+    /* Simple case: neither motion pass nor motion blur is needed, no need in the motion related
+     * attributes. */
+    return false;
+  }
+
+  if (need_motion == Scene::MOTION_BLUR) {
+    /* A bit tricky and implicit case:
+     * - Motion blur is enabled in the scene, which implies specific number of time steps for
+     *   objects.
+     * - If the object has motion blur disabled on it, it will have 0 time steps.
+     * - Motion attribute expects non-zero time steps.
+     *
+     * Avoid adding motion attributes if the motion blur will enforce 0 motion steps. */
+    PointerRNA cobject = RNA_pointer_get(&b_ob_info.real_object.ptr, "cycles");
+    const bool use_motion = get_boolean(cobject, "use_motion_blur");
+    if (!use_motion) {
+      return false;
+    }
+  }
+
+  /* Motion pass which implies 3 motion steps, or motion blur which is not disabled on object
+   * level. */
+  return true;
+}
+
 void BlenderSync::sync_mesh(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, Mesh *mesh)
 {
  /* make a copy of the shaders as the caller in the main thread still need them for syncing the
@@ -1110,7 +1144,7 @@ void BlenderSync::sync_mesh(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, M

    if (b_mesh) {
      /* Motion blur attribute is relative to seconds, we need it relative to frames. */
-      const bool need_motion = object_need_motion_attribute(b_ob_info, scene);
+      const bool need_motion = mesh_need_motion_attribute(b_ob_info, scene);
      const float motion_scale = (need_motion) ?
                                     scene->motion_shutter_time() /
                                         (b_scene.render().fps() / b_scene.render().fps_base()) :
--- a/intern/cycles/blender/object.cpp
+++ b/intern/cycles/blender/object.cpp
@@ -72,8 +72,7 @@ bool BlenderSync::object_is_geometry(BObjectInfo &b_ob_info)

  BL::Object::type_enum type = b_ob_info.iter_object.type();

-  if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR ||
-      type == BL::Object::type_POINTCLOUD) {
+  if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR) {
    /* Will be exported attached to mesh. */
    return true;
  }
@@ -207,7 +206,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
    return NULL;
  }

-  /* only interested in object that we can create geometry from */
+  /* only interested in object that we can create meshes from */
  if (!object_is_geometry(b_ob_info)) {
    return NULL;
  }
--- a/intern/cycles/blender/output_driver.cpp
+++ b/intern/cycles/blender/output_driver.cpp
@@ -66,7 +66,7 @@ bool BlenderOutputDriver::read_render_tile(const Tile &tile)

 bool BlenderOutputDriver::update_render_tile(const Tile &tile)
 {
-  /* Use final write for preview renders, otherwise render result wouldn't be updated
+  /* Use final write for preview renders, otherwise render result wouldn't be be updated
   * quickly on Blender side. For all other cases we use the display driver. */
  if (b_engine_.is_preview()) {
    write_render_tile(tile);
@@ -120,7 +120,7 @@ void BlenderOutputDriver::write_render_tile(const Tile &tile)
    b_pass.rect(&pixels[0]);
  }

-  b_engine_.end_result(b_rr, false, false, true);
+  b_engine_.end_result(b_rr, true, false, true);
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/blender/pointcloud.cpp
+++ b/intern/cycles/blender/pointcloud.cpp
@@ -1,303 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scene/pointcloud.h"
-#include "scene/attribute.h"
-#include "scene/scene.h"
-
-#include "blender/sync.h"
-#include "blender/util.h"
-
-#include "util/foreach.h"
-#include "util/hash.h"
-
-CCL_NAMESPACE_BEGIN
-
-template<typename TypeInCycles, typename GetValueAtIndex>
-static void fill_generic_attribute(BL::PointCloud &b_pointcloud,
-                                   TypeInCycles *data,
-                                   const GetValueAtIndex &get_value_at_index)
-{
-  const int num_points = b_pointcloud.points.length();
-  for (int i = 0; i < num_points; i++) {
-    data[i] = get_value_at_index(i);
-  }
-}
-
-static void attr_create_motion(PointCloud *pointcloud,
-                               BL::Attribute &b_attribute,
-                               const float motion_scale)
-{
-  if (!(b_attribute.domain() == BL::Attribute::domain_POINT) &&
-      (b_attribute.data_type() == BL::Attribute::data_type_FLOAT_VECTOR)) {
-    return;
-  }
-
-  BL::FloatVectorAttribute b_vector_attribute(b_attribute);
-  const int num_points = pointcloud->get_points().size();
-
-  /* Find or add attribute */
-  float3 *P = &pointcloud->get_points()[0];
-  Attribute *attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
-  if (!attr_mP) {
-    attr_mP = pointcloud->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
-  }
-
-  /* Only export previous and next frame, we don't have any in between data. */
-  float motion_times[2] = {-1.0f, 1.0f};
-  for (int step = 0; step < 2; step++) {
-    const float relative_time = motion_times[step] * 0.5f * motion_scale;
-    float3 *mP = attr_mP->data_float3() + step * num_points;
-
-    for (int i = 0; i < num_points; i++) {
-      mP[i] = P[i] + get_float3(b_vector_attribute.data[i].vector()) * relative_time;
-    }
-  }
-}
-
-static void copy_attributes(PointCloud *pointcloud,
-                            BL::PointCloud b_pointcloud,
-                            const bool need_motion,
-                            const float motion_scale)
-{
-  AttributeSet &attributes = pointcloud->attributes;
-  static const ustring u_velocity("velocity");
-  for (BL::Attribute &b_attribute : b_pointcloud.attributes) {
-    const ustring name{b_attribute.name().c_str()};
-
-    if (need_motion && name == u_velocity) {
-      attr_create_motion(pointcloud, b_attribute, motion_scale);
-    }
-
-    if (attributes.find(name)) {
-      continue;
-    }
-
-    const AttributeElement element = ATTR_ELEMENT_VERTEX;
-    const BL::Attribute::data_type_enum b_data_type = b_attribute.data_type();
-    switch (b_data_type) {
-      case BL::Attribute::data_type_FLOAT: {
-        BL::FloatAttribute b_float_attribute{b_attribute};
-        Attribute *attr = attributes.add(name, TypeFloat, element);
-        float *data = attr->data_float();
-        fill_generic_attribute(
-            b_pointcloud, data, [&](int i) { return b_float_attribute.data[i].value(); });
-        break;
-      }
-      case BL::Attribute::data_type_BOOLEAN: {
-        BL::BoolAttribute b_bool_attribute{b_attribute};
-        Attribute *attr = attributes.add(name, TypeFloat, element);
-        float *data = attr->data_float();
-        fill_generic_attribute(
-            b_pointcloud, data, [&](int i) { return (float)b_bool_attribute.data[i].value(); });
-        break;
-      }
-      case BL::Attribute::data_type_INT: {
-        BL::IntAttribute b_int_attribute{b_attribute};
-        Attribute *attr = attributes.add(name, TypeFloat, element);
-        float *data = attr->data_float();
-        fill_generic_attribute(
-            b_pointcloud, data, [&](int i) { return (float)b_int_attribute.data[i].value(); });
-        break;
-      }
-      case BL::Attribute::data_type_FLOAT_VECTOR: {
-        BL::FloatVectorAttribute b_vector_attribute{b_attribute};
-        Attribute *attr = attributes.add(name, TypeVector, element);
-        float3 *data = attr->data_float3();
-        fill_generic_attribute(b_pointcloud, data, [&](int i) {
-          BL::Array<float, 3> v = b_vector_attribute.data[i].vector();
-          return make_float3(v[0], v[1], v[2]);
-        });
-        break;
-      }
-      case BL::Attribute::data_type_FLOAT_COLOR: {
-        BL::FloatColorAttribute b_color_attribute{b_attribute};
-        Attribute *attr = attributes.add(name, TypeRGBA, element);
-        float4 *data = attr->data_float4();
-        fill_generic_attribute(b_pointcloud, data, [&](int i) {
-          BL::Array<float, 4> v = b_color_attribute.data[i].color();
-          return make_float4(v[0], v[1], v[2], v[3]);
-        });
-        break;
-      }
-      case BL::Attribute::data_type_FLOAT2: {
-        BL::Float2Attribute b_float2_attribute{b_attribute};
-        Attribute *attr = attributes.add(name, TypeFloat2, element);
-        float2 *data = attr->data_float2();
-        fill_generic_attribute(b_pointcloud, data, [&](int i) {
-          BL::Array<float, 2> v = b_float2_attribute.data[i].vector();
-          return make_float2(v[0], v[1]);
-        });
-        break;
-      }
-      default:
-        /* Not supported. */
-        break;
-    }
-  }
-}
-
-static void export_pointcloud(Scene *scene,
-                              PointCloud *pointcloud,
-                              BL::PointCloud b_pointcloud,
-                              const bool need_motion,
-                              const float motion_scale)
-{
-  /* TODO: optimize so we can straight memcpy arrays from Blender? */
-
-  /* Add requested attributes. */
-  Attribute *attr_random = NULL;
-  if (pointcloud->need_attribute(scene, ATTR_STD_POINT_RANDOM)) {
-    attr_random = pointcloud->attributes.add(ATTR_STD_POINT_RANDOM);
-  }
-
-  /* Reserve memory. */
-  const int num_points = b_pointcloud.points.length();
-  pointcloud->reserve(num_points);
-
-  /* Export points. */
-  BL::PointCloud::points_iterator b_point_iter;
-  for (b_pointcloud.points.begin(b_point_iter); b_point_iter != b_pointcloud.points.end();
-       ++b_point_iter) {
-    BL::Point b_point = *b_point_iter;
-    const float3 co = get_float3(b_point.co());
-    const float radius = b_point.radius();
-    pointcloud->add_point(co, radius);
-
-    /* Random number per point. */
-    if (attr_random != NULL) {
-      attr_random->add(hash_uint2_to_float(b_point.index(), 0));
-    }
-  }
-
-  /* Export attributes */
-  copy_attributes(pointcloud, b_pointcloud, need_motion, motion_scale);
-}
-
-static void export_pointcloud_motion(PointCloud *pointcloud,
-                                     BL::PointCloud b_pointcloud,
-                                     int motion_step)
-{
-  /* Find or add attribute. */
-  Attribute *attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-  bool new_attribute = false;
-
-  if (!attr_mP) {
-    attr_mP = pointcloud->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
-    new_attribute = true;
-  }
-
-  /* Export motion points. */
-  const int num_points = pointcloud->num_points();
-  float3 *mP = attr_mP->data_float3() + motion_step * num_points;
-  bool have_motion = false;
-  int num_motion_points = 0;
-  const array<float3> &pointcloud_points = pointcloud->get_points();
-
-  BL::PointCloud::points_iterator b_point_iter;
-  for (b_pointcloud.points.begin(b_point_iter); b_point_iter != b_pointcloud.points.end();
-       ++b_point_iter) {
-    BL::Point b_point = *b_point_iter;
-
-    if (num_motion_points < num_points) {
-      float3 P = get_float3(b_point.co());
-      P.w = b_point.radius();
-      mP[num_motion_points] = P;
-      have_motion = have_motion || (P != pointcloud_points[num_motion_points]);
-      num_motion_points++;
-    }
-  }
-
-  /* In case of new attribute, we verify if there really was any motion. */
-  if (new_attribute) {
-    if (num_motion_points != num_points || !have_motion) {
-      pointcloud->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
-    }
-    else if (motion_step > 0) {
-      /* Motion, fill up previous steps that we might have skipped because
-       * they had no motion, but we need them anyway now. */
-      for (int step = 0; step < motion_step; step++) {
-        pointcloud->copy_center_to_motion_step(step);
-      }
-    }
-  }
-
-  /* Export attributes */
-  copy_attributes(pointcloud, b_pointcloud, false, 0.0f);
-}
-
-void BlenderSync::sync_pointcloud(PointCloud *pointcloud, BObjectInfo &b_ob_info)
-{
-  size_t old_numpoints = pointcloud->num_points();
-
-  array<Node *> used_shaders = pointcloud->get_used_shaders();
-
-  PointCloud new_pointcloud;
-  new_pointcloud.set_used_shaders(used_shaders);
-
-  /* TODO: add option to filter out points in the view layer. */
-  BL::PointCloud b_pointcloud(b_ob_info.object_data);
-  /* Motion blur attribute is relative to seconds, we need it relative to frames. */
-  const bool need_motion = object_need_motion_attribute(b_ob_info, scene);
-  const float motion_scale = (need_motion) ?
-                                 scene->motion_shutter_time() /
-                                     (b_scene.render().fps() / b_scene.render().fps_base()) :
-                                 0.0f;
-  export_pointcloud(scene, &new_pointcloud, b_pointcloud, need_motion, motion_scale);
-
-  /* update original sockets */
-  for (const SocketType &socket : new_pointcloud.type->inputs) {
-    /* Those sockets are updated in sync_object, so do not modify them. */
-    if (socket.name == "use_motion_blur" || socket.name == "motion_steps" ||
-        socket.name == "used_shaders") {
-      continue;
-    }
-    pointcloud->set_value(socket, new_pointcloud, socket);
-  }
-
-  pointcloud->attributes.clear();
-  foreach (Attribute &attr, new_pointcloud.attributes.attributes) {
-    pointcloud->attributes.attributes.push_back(std::move(attr));
-  }
-
-  /* tag update */
-  const bool rebuild = (pointcloud && old_numpoints != pointcloud->num_points());
-  pointcloud->tag_update(scene, rebuild);
-}
-
-void BlenderSync::sync_pointcloud_motion(PointCloud *pointcloud,
-                                         BObjectInfo &b_ob_info,
-                                         int motion_step)
-{
-  /* Skip if nothing exported. */
-  if (pointcloud->num_points() == 0) {
-    return;
-  }
-
-  /* Export deformed coordinates. */
-  if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
-    /* PointCloud object. */
-    BL::PointCloud b_pointcloud(b_ob_info.object_data);
-    export_pointcloud_motion(pointcloud, b_pointcloud, motion_step);
-  }
-  else {
-    /* No deformation on this frame, copy coordinates if other frames did have it. */
-    pointcloud->copy_center_to_motion_step(motion_step);
-  }
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/blender/python.cpp
+++ b/intern/cycles/blender/python.cpp
@@ -138,18 +138,20 @@ static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce)

 static PyObject *init_func(PyObject * /*self*/, PyObject *args)
 {
-  PyObject *path, *user_path;
+  PyObject *path, *user_path, *temp_path;
  int headless;

-  if (!PyArg_ParseTuple(args, "OOi", &path, &user_path, &headless)) {
+  if (!PyArg_ParseTuple(args, "OOOi", &path, &user_path, &temp_path, &headless)) {
    return nullptr;
  }

-  PyObject *path_coerce = nullptr, *user_path_coerce = nullptr;
+  PyObject *path_coerce = nullptr, *user_path_coerce = nullptr, *temp_path_coerce = nullptr;
  path_init(PyC_UnicodeAsByte(path, &path_coerce),
-            PyC_UnicodeAsByte(user_path, &user_path_coerce));
+            PyC_UnicodeAsByte(user_path, &user_path_coerce),
+            PyC_UnicodeAsByte(temp_path, &temp_path_coerce));
  Py_XDECREF(path_coerce);
  Py_XDECREF(user_path_coerce);
+  Py_XDECREF(temp_path_coerce);

  BlenderSession::headless = headless;

@@ -733,20 +735,27 @@ static bool image_parse_filepaths(PyObject *pyfilepaths, vector<string> &filepat

 static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *keywords)
 {
+#if 1
+  (void)args;
+  (void)keywords;
+#else
  static const char *keyword_list[] = {
-      "preferences", "scene", "view_layer", "input", "output", NULL};
+      "preferences", "scene", "view_layer", "input", "output", "tile_size", "samples", NULL};
  PyObject *pypreferences, *pyscene, *pyviewlayer;
  PyObject *pyinput, *pyoutput = NULL;
+  int tile_size = 0, samples = 0;

  if (!PyArg_ParseTupleAndKeywords(args,
                                   keywords,
-                                   "OOOO|O",
+                                   "OOOO|Oii",
                                   (char **)keyword_list,
                                   &pypreferences,
                                   &pyscene,
                                   &pyviewlayer,
                                   &pyinput,
-                                   &pyoutput)) {
+                                   &pyoutput,
+                                   &tile_size,
+                                   &samples)) {
    return NULL;
  }

@@ -768,10 +777,14 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
                     &RNA_ViewLayer,
                     PyLong_AsVoidPtr(pyviewlayer),
                     &viewlayerptr);
-  BL::ViewLayer b_view_layer(viewlayerptr);
+  PointerRNA cviewlayer = RNA_pointer_get(&viewlayerptr, "cycles");

-  DenoiseParams params = BlenderSync::get_denoise_params(b_scene, b_view_layer, true);
-  params.use = true;
+  DenoiseParams params;
+  params.radius = get_int(cviewlayer, "denoising_radius");
+  params.strength = get_float(cviewlayer, "denoising_strength");
+  params.feature_strength = get_float(cviewlayer, "denoising_feature_strength");
+  params.relative_pca = get_boolean(cviewlayer, "denoising_relative_pca");
+  params.neighbor_frames = get_int(cviewlayer, "denoising_neighbor_frames");

  /* Parse file paths list. */
  vector<string> input, output;
@@ -799,15 +812,24 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
  }

  /* Create denoiser. */
-  DenoiserPipeline denoiser(device, params);
+  DenoiserPipeline denoiser(device);
+  denoiser.params = params;
  denoiser.input = input;
  denoiser.output = output;

+  if (tile_size > 0) {
+    denoiser.tile_size = make_int2(tile_size, tile_size);
+  }
+  if (samples > 0) {
+    denoiser.samples_override = samples;
+  }
+
  /* Run denoiser. */
  if (!denoiser.run()) {
    PyErr_SetString(PyExc_ValueError, denoiser.error.c_str());
    return NULL;
  }
+#endif

  Py_RETURN_NONE;
 }
@@ -884,18 +906,16 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
 static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
 {
  vector<DeviceType> device_types = Device::available_types();
-  bool has_cuda = false, has_optix = false, has_hip = false, has_metal = false;
+  bool has_cuda = false, has_optix = false, has_hip = false;
  foreach (DeviceType device_type, device_types) {
    has_cuda |= (device_type == DEVICE_CUDA);
    has_optix |= (device_type == DEVICE_OPTIX);
    has_hip |= (device_type == DEVICE_HIP);
-    has_metal |= (device_type == DEVICE_METAL);
  }
-  PyObject *list = PyTuple_New(4);
+  PyObject *list = PyTuple_New(3);
  PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
  PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
  PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
-  PyTuple_SET_ITEM(list, 3, PyBool_FromLong(has_metal));
  return list;
 }

@@ -924,9 +944,6 @@ static PyObject *set_device_override_func(PyObject * /*self*/, PyObject *arg)
  else if (override == "HIP") {
    BlenderSession::device_override = DEVICE_MASK_HIP;
  }
-  else if (override == "METAL") {
-    BlenderSession::device_override = DEVICE_MASK_METAL;
-  }
  else {
    printf("\nError: %s is not a valid Cycles device.\n", override.c_str());
    Py_RETURN_FALSE;
@@ -1037,13 +1054,5 @@ void *CCL_python_module_init()
    Py_INCREF(Py_False);
  }

-#ifdef WITH_CYCLES_DEBUG
-  PyModule_AddObject(mod, "with_debug", Py_True);
-  Py_INCREF(Py_True);
-#else  /* WITH_CYCLES_DEBUG */
-  PyModule_AddObject(mod, "with_debug", Py_False);
-  Py_INCREF(Py_False);
-#endif /* WITH_CYCLES_DEBUG */
-
  return (void *)mod;
 }
--- a/intern/cycles/blender/session.cpp
+++ b/intern/cycles/blender/session.cpp
@@ -396,13 +396,6 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
    /* set the current view */
    b_engine.active_view_set(b_rview_name.c_str());

-    /* Force update in this case, since the camera transform on each frame changes
-     * in different views. This could be optimized by somehow storing the animated
-     * camera transforms separate from the fixed stereo transform. */
-    if ((scene->need_motion() != Scene::MOTION_NONE) && view_index > 0) {
-      sync->tag_update();
-    }
-
    /* update scene */
    BL::Object b_camera_override(b_engine.camera_override());
    sync->sync_camera(b_render, b_camera_override, width, height, b_rview_name.c_str());
@@ -502,15 +495,10 @@ void BlenderSession::render_frame_finish()
    path_remove(filename);
  }

-  /* Clear output driver. */
+  /* Clear driver. */
  session->set_output_driver(nullptr);
  session->full_buffer_written_cb = function_null;

-  /* The display driver holds OpenGL resources which belong to an OpenGL context held by the render
-   * engine on Blender side. Force destruction of those resources. */
-  display_driver_ = nullptr;
-  session->set_display_driver(nullptr);
-
  /* All the files are handled.
   * Clear the list so that this session can be re-used by Persistent Data. */
  full_buffer_files_.clear();
@@ -641,7 +629,7 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
      integrator->set_use_emission((bake_filter & BL::BakeSettings::pass_filter_EMIT) != 0);
    }

-    /* Always use transparent background for baking. */
+    /* Always use transpanent background for baking. */
    scene->background->set_transparent(true);

    /* Load built-in images from Blender. */
--- a/intern/cycles/blender/shader.cpp
+++ b/intern/cycles/blender/shader.cpp
@@ -378,19 +378,10 @@ static ShaderNode *add_node(Scene *scene,
  }
  else if (b_node.is_a(&RNA_ShaderNodeMapRange)) {
    BL::ShaderNodeMapRange b_map_range_node(b_node);
-    if (b_map_range_node.data_type() == BL::ShaderNodeMapRange::data_type_FLOAT_VECTOR) {
-      VectorMapRangeNode *vector_map_range_node = graph->create_node<VectorMapRangeNode>();
-      vector_map_range_node->set_use_clamp(b_map_range_node.clamp());
-      vector_map_range_node->set_range_type(
-          (NodeMapRangeType)b_map_range_node.interpolation_type());
-      node = vector_map_range_node;
-    }
-    else {
-      MapRangeNode *map_range_node = graph->create_node<MapRangeNode>();
-      map_range_node->set_clamp(b_map_range_node.clamp());
-      map_range_node->set_range_type((NodeMapRangeType)b_map_range_node.interpolation_type());
-      node = map_range_node;
-    }
+    MapRangeNode *map_range_node = graph->create_node<MapRangeNode>();
+    map_range_node->set_clamp(b_map_range_node.clamp());
+    map_range_node->set_range_type((NodeMapRangeType)b_map_range_node.interpolation_type());
+    node = map_range_node;
  }
  else if (b_node.is_a(&RNA_ShaderNodeClamp)) {
    BL::ShaderNodeClamp b_clamp_node(b_node);
@@ -771,12 +762,11 @@ static ShaderNode *add_node(Scene *scene,
        int scene_frame = b_scene.frame_current();
        int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
        image->handle = scene->image_manager->add_image(
-            new BlenderImageLoader(b_image, image_frame, b_engine.is_preview()),
-            image->image_params());
+            new BlenderImageLoader(b_image, image_frame), image->image_params());
      }
      else {
        ustring filename = ustring(
-            image_user_file_path(b_image_user, b_image, b_scene.frame_current()));
+            image_user_file_path(b_image_user, b_image, b_scene.frame_current(), true));
        image->set_filename(filename);
      }
    }
@@ -807,13 +797,12 @@ static ShaderNode *add_node(Scene *scene,
      if (is_builtin) {
        int scene_frame = b_scene.frame_current();
        int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
-        env->handle = scene->image_manager->add_image(
-            new BlenderImageLoader(b_image, image_frame, b_engine.is_preview()),
-            env->image_params());
+        env->handle = scene->image_manager->add_image(new BlenderImageLoader(b_image, image_frame),
+                                                      env->image_params());
      }
      else {
        env->set_filename(
-            ustring(image_user_file_path(b_image_user, b_image, b_scene.frame_current())));
+            ustring(image_user_file_path(b_image_user, b_image, b_scene.frame_current(), false)));
      }
    }
    node = env;
--- a/intern/cycles/blender/sync.cpp
+++ b/intern/cycles/blender/sync.cpp
@@ -95,11 +95,6 @@ void BlenderSync::reset(BL::BlendData &b_data, BL::Scene &b_scene)
  this->b_scene = b_scene;
 }

-void BlenderSync::tag_update()
-{
-  has_updates_ = true;
-}
-
 /* Sync */

 void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d)
@@ -832,14 +827,6 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
  SessionParams params;
  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");

-  if (background && !b_engine.is_preview()) {
-    /* Viewport and preview renders do not require temp directory and do request session
-     * parameters more often than the background render.
-     * Optimize RNA-C++ usage and memory allocation a bit by saving string access which we know is
-     * not needed for viewport render. */
-    params.temp_dir = b_engine.temporary_directory();
-  }
-
  /* feature set */
  params.experimental = (get_enum(cscene, "feature_set") != 0);

--- a/intern/cycles/blender/sync.h
+++ b/intern/cycles/blender/sync.h
@@ -66,8 +66,6 @@ class BlenderSync {

  void reset(BL::BlendData &b_data, BL::Scene &b_scene);

-  void tag_update();
-
  /* sync */
  void sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d);
  void sync_data(BL::RenderSettings &b_render,
@@ -105,11 +103,11 @@ class BlenderSync {
  static BufferParams get_buffer_params(
      BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, Camera *cam, int width, int height);

+ private:
  static DenoiseParams get_denoise_params(BL::Scene &b_scene,
                                          BL::ViewLayer &b_view_layer,
                                          bool background);

- private:
  /* sync */
  void sync_lights(BL::Depsgraph &b_depsgraph, bool update_all);
  void sync_materials(BL::Depsgraph &b_depsgraph, bool update_all);
@@ -169,16 +167,12 @@ class BlenderSync {
      Hair *hair, BL::Mesh &b_mesh, BObjectInfo &b_ob_info, bool motion, int motion_step = 0);
  bool object_has_particle_hair(BL::Object b_ob);

-  /* Point Cloud */
-  void sync_pointcloud(PointCloud *pointcloud, BObjectInfo &b_ob_info);
-  void sync_pointcloud_motion(PointCloud *pointcloud, BObjectInfo &b_ob_info, int motion_step = 0);
-
  /* Camera */
  void sync_camera_motion(
      BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time);

  /* Geometry */
-  Geometry *sync_geometry(BL::Depsgraph &b_depsgraph,
+  Geometry *sync_geometry(BL::Depsgraph &b_depsgrpah,
                          BObjectInfo &b_ob_info,
                          bool object_updated,
                          bool use_particle_hair,
@@ -273,6 +267,7 @@ class BlenderSync {

  Progress &progress;

+ protected:
  /* Indicates that `sync_recalc()` detected changes in the scene.
   * If this flag is false then the data is considered to be up-to-date and will not be
   * synchronized at all. */
--- a/intern/cycles/blender/util.h
+++ b/intern/cycles/blender/util.h
@@ -18,7 +18,6 @@
 #define __BLENDER_UTIL_H__

 #include "scene/mesh.h"
-#include "scene/scene.h"

 #include "util/algorithm.h"
 #include "util/array.h"
@@ -34,7 +33,7 @@

 extern "C" {
 void BKE_image_user_frame_calc(void *ima, void *iuser, int cfra);
-void BKE_image_user_file_path_ex(void *iuser, void *ima, char *path, bool resolve_udim);
+void BKE_image_user_file_path(void *iuser, void *ima, char *path);
 unsigned char *BKE_image_get_pixels_for_frame(void *image, int frame, int tile);
 float *BKE_image_get_float_pixels_for_frame(void *image, int frame, int tile);
 }
@@ -291,14 +290,25 @@ static inline int render_resolution_y(BL::RenderSettings &b_render)
  return b_render.resolution_y() * b_render.resolution_percentage() / 100;
 }

-static inline string image_user_file_path(BL::ImageUser &iuser, BL::Image &ima, int cfra)
+static inline string image_user_file_path(BL::ImageUser &iuser,
+                                          BL::Image &ima,
+                                          int cfra,
+                                          bool load_tiled)
 {
  char filepath[1024];
  iuser.tile(0);
  BKE_image_user_frame_calc(ima.ptr.data, iuser.ptr.data, cfra);
-  BKE_image_user_file_path_ex(iuser.ptr.data, ima.ptr.data, filepath, false);
+  BKE_image_user_file_path(iuser.ptr.data, ima.ptr.data, filepath);

-  return string(filepath);
+  string filepath_str = string(filepath);
+  if (load_tiled && ima.source() == BL::Image::source_TILED) {
+    string udim;
+    if (!ima.tiles.empty()) {
+      udim = to_string(ima.tiles[0].number());
+    }
+    string_replace(filepath_str, udim, "<UDIM>");
+  }
+  return filepath_str;
 }

 static inline int image_user_frame_number(BL::ImageUser &iuser, BL::Image &ima, int cfra)
@@ -671,40 +681,6 @@ static inline uint object_ray_visibility(BL::Object &b_ob)
  return flag;
 }

-/* Check whether some of "built-in" motion-related attributes are needed to be exported (includes
- * things like velocity from cache modifier, fluid simulation).
- *
- * NOTE: This code is run prior to object motion blur initialization. so can not access properties
- * set by `sync_object_motion_init()`. */
-static inline bool object_need_motion_attribute(BObjectInfo &b_ob_info, Scene *scene)
-{
-  const Scene::MotionType need_motion = scene->need_motion();
-  if (need_motion == Scene::MOTION_NONE) {
-    /* Simple case: neither motion pass nor motion blur is needed, no need in the motion related
-     * attributes. */
-    return false;
-  }
-
-  if (need_motion == Scene::MOTION_BLUR) {
-    /* A bit tricky and implicit case:
-     * - Motion blur is enabled in the scene, which implies specific number of time steps for
-     *   objects.
-     * - If the object has motion blur disabled on it, it will have 0 time steps.
-     * - Motion attribute expects non-zero time steps.
-     *
-     * Avoid adding motion attributes if the motion blur will enforce 0 motion steps. */
-    PointerRNA cobject = RNA_pointer_get(&b_ob_info.real_object.ptr, "cycles");
-    const bool use_motion = get_boolean(cobject, "use_motion_blur");
-    if (!use_motion) {
-      return false;
-    }
-  }
-
-  /* Motion pass which implies 3 motion steps, or motion blur which is not disabled on object
-   * level. */
-  return true;
-}
-
 class EdgeMap {
 public:
  EdgeMap()
--- a/intern/cycles/bvh/CMakeLists.txt
+++ b/intern/cycles/bvh/CMakeLists.txt
@@ -33,17 +33,6 @@ set(SRC
  unaligned.cpp
 )

-set(SRC_METAL
-  metal.mm
-)
-
-if(WITH_CYCLES_DEVICE_METAL)
-  list(APPEND SRC
-    ${SRC_METAL}
-  )
-  add_definitions(-DWITH_METAL)
-endif()
-
 set(SRC_HEADERS
  bvh.h
  bvh2.h
@@ -57,7 +46,6 @@ set(SRC_HEADERS
  sort.h
  split.h
  unaligned.h
-  metal.h
 )

 set(LIB
--- a/intern/cycles/bvh/build.cpp
+++ b/intern/cycles/bvh/build.cpp
@@ -26,7 +26,6 @@
 #include "scene/hair.h"
 #include "scene/mesh.h"
 #include "scene/object.h"
-#include "scene/pointcloud.h"
 #include "scene/scene.h"

 #include "util/algorithm.h"
@@ -114,9 +113,9 @@ void BVHBuild::add_reference_triangles(BoundBox &root,
    else {
      /* Motion triangles, trace optimized case:  we split triangle
       * primitives into separate nodes for each of the time steps.
-       * This way we minimize overlap of neighbor triangle primitives.
+       * This way we minimize overlap of neighbor curve primitives.
       */
-      const int num_bvh_steps = params.num_motion_triangle_steps * 2 + 1;
+      const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
      const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
      const size_t num_verts = mesh->verts.size();
      const size_t num_steps = mesh->motion_steps;
@@ -270,101 +269,6 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair
  }
 }

-void BVHBuild::add_reference_points(BoundBox &root,
-                                    BoundBox &center,
-                                    PointCloud *pointcloud,
-                                    int i)
-{
-  const Attribute *point_attr_mP = NULL;
-  if (pointcloud->has_motion_blur()) {
-    point_attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-  }
-
-  const float3 *points_data = &pointcloud->points[0];
-  const float *radius_data = &pointcloud->radius[0];
-  const size_t num_points = pointcloud->num_points();
-  const float3 *motion_data = (point_attr_mP) ? point_attr_mP->data_float3() : NULL;
-  const size_t num_steps = pointcloud->get_motion_steps();
-
-  if (point_attr_mP == NULL) {
-    /* Really simple logic for static points. */
-    for (uint j = 0; j < num_points; j++) {
-      const PointCloud::Point point = pointcloud->get_point(j);
-      BoundBox bounds = BoundBox::empty;
-      point.bounds_grow(points_data, radius_data, bounds);
-      if (bounds.valid()) {
-        references.push_back(BVHReference(bounds, j, i, PRIMITIVE_POINT));
-        root.grow(bounds);
-        center.grow(bounds.center2());
-      }
-    }
-  }
-  else if (params.num_motion_point_steps == 0 || params.use_spatial_split) {
-    /* Simple case of motion points: single node for the whole
-     * shutter time. Lowest memory usage but less optimal
-     * rendering.
-     */
-    /* TODO(sergey): Support motion steps for spatially split BVH. */
-    for (uint j = 0; j < num_points; j++) {
-      const PointCloud::Point point = pointcloud->get_point(j);
-      BoundBox bounds = BoundBox::empty;
-      point.bounds_grow(points_data, radius_data, bounds);
-      for (size_t step = 0; step < num_steps - 1; step++) {
-        point.bounds_grow(motion_data + step * num_points, radius_data, bounds);
-      }
-      if (bounds.valid()) {
-        references.push_back(BVHReference(bounds, j, i, PRIMITIVE_MOTION_POINT));
-        root.grow(bounds);
-        center.grow(bounds.center2());
-      }
-    }
-  }
-  else {
-    /* Motion points, trace optimized case:  we split point
-     * primitives into separate nodes for each of the time steps.
-     * This way we minimize overlap of neighbor point primitives.
-     */
-    const int num_bvh_steps = params.num_motion_point_steps * 2 + 1;
-    const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
-
-    for (uint j = 0; j < num_points; j++) {
-      const PointCloud::Point point = pointcloud->get_point(j);
-      const size_t num_steps = pointcloud->get_motion_steps();
-      const float3 *point_steps = point_attr_mP->data_float3();
-
-      /* Calculate bounding box of the previous time step.
-       * Will be reused later to avoid duplicated work on
-       * calculating BVH time step boundbox.
-       */
-      float4 prev_key = point.motion_key(
-          points_data, radius_data, point_steps, num_points, num_steps, 0.0f, j);
-      BoundBox prev_bounds = BoundBox::empty;
-      point.bounds_grow(prev_key, prev_bounds);
-      /* Create all primitive time steps, */
-      for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
-        const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
-        float4 curr_key = point.motion_key(
-            points_data, radius_data, point_steps, num_points, num_steps, curr_time, j);
-        BoundBox curr_bounds = BoundBox::empty;
-        point.bounds_grow(curr_key, curr_bounds);
-        BoundBox bounds = prev_bounds;
-        bounds.grow(curr_bounds);
-        if (bounds.valid()) {
-          const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
-          references.push_back(
-              BVHReference(bounds, j, i, PRIMITIVE_MOTION_POINT, prev_time, curr_time));
-          root.grow(bounds);
-          center.grow(bounds.center2());
-        }
-        /* Current time boundbox becomes previous one for the
-         * next time step.
-         */
-        prev_bounds = curr_bounds;
-      }
-    }
-  }
-}
-
 void BVHBuild::add_reference_geometry(BoundBox &root,
                                      BoundBox &center,
                                      Geometry *geom,
@@ -378,10 +282,6 @@ void BVHBuild::add_reference_geometry(BoundBox &root,
    Hair *hair = static_cast<Hair *>(geom);
    add_reference_curves(root, center, hair, object_index);
  }
-  else if (geom->geometry_type == Geometry::POINTCLOUD) {
-    PointCloud *pointcloud = static_cast<PointCloud *>(geom);
-    add_reference_points(root, center, pointcloud, object_index);
-  }
 }

 void BVHBuild::add_reference_object(BoundBox &root, BoundBox &center, Object *ob, int i)
@@ -411,10 +311,6 @@ static size_t count_primitives(Geometry *geom)
    Hair *hair = static_cast<Hair *>(geom);
    return count_curve_segments(hair);
  }
-  else if (geom->geometry_type == Geometry::POINTCLOUD) {
-    PointCloud *pointcloud = static_cast<PointCloud *>(geom);
-    return pointcloud->num_points();
-  }

  return 0;
 }
@@ -432,9 +328,8 @@ void BVHBuild::add_references(BVHRange &root)
      if (!ob->get_geometry()->is_instanced()) {
        num_alloc_references += count_primitives(ob->get_geometry());
      }
-      else {
+      else
        num_alloc_references++;
-      }
    }
    else {
      num_alloc_references += count_primitives(ob->get_geometry());
@@ -499,7 +394,7 @@ BVHNode *BVHBuild::run()
  spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
  spatial_free_index = 0;

-  need_prim_time = params.use_motion_steps();
+  need_prim_time = params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0;

  /* init progress updates */
  double build_start_time;
@@ -640,8 +535,7 @@ bool BVHBuild::range_within_max_leaf_size(const BVHRange &range,
                                          const vector<BVHReference> &references) const
 {
  size_t size = range.size();
-  size_t max_leaf_size = max(max(params.max_triangle_leaf_size, params.max_curve_leaf_size),
-                             params.max_point_leaf_size);
+  size_t max_leaf_size = max(params.max_triangle_leaf_size, params.max_curve_leaf_size);

  if (size > max_leaf_size)
    return false;
@@ -650,44 +544,32 @@ bool BVHBuild::range_within_max_leaf_size(const BVHRange &range,
  size_t num_motion_triangles = 0;
  size_t num_curves = 0;
  size_t num_motion_curves = 0;
-  size_t num_points = 0;
-  size_t num_motion_points = 0;

  for (int i = 0; i < size; i++) {
    const BVHReference &ref = references[range.start() + i];

-    if (ref.prim_type() & PRIMITIVE_CURVE) {
-      if (ref.prim_type() & PRIMITIVE_MOTION) {
+    if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
+      if (ref.prim_type() & PRIMITIVE_ALL_MOTION) {
        num_motion_curves++;
      }
      else {
        num_curves++;
      }
    }
-    else if (ref.prim_type() & PRIMITIVE_TRIANGLE) {
-      if (ref.prim_type() & PRIMITIVE_MOTION) {
+    else if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
+      if (ref.prim_type() & PRIMITIVE_ALL_MOTION) {
        num_motion_triangles++;
      }
      else {
        num_triangles++;
      }
    }
-    else if (ref.prim_type() & PRIMITIVE_POINT) {
-      if (ref.prim_type() & PRIMITIVE_MOTION) {
-        num_motion_points++;
-      }
-      else {
-        num_points++;
-      }
-    }
  }

  return (num_triangles <= params.max_triangle_leaf_size) &&
         (num_motion_triangles <= params.max_motion_triangle_leaf_size) &&
         (num_curves <= params.max_curve_leaf_size) &&
-         (num_motion_curves <= params.max_motion_curve_leaf_size) &&
-         (num_points <= params.max_point_leaf_size) &&
-         (num_motion_points <= params.max_motion_point_leaf_size);
+         (num_motion_curves <= params.max_motion_curve_leaf_size);
 }

 /* multithreaded binning builder */
@@ -973,7 +855,7 @@ BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector<BVHRefer
  for (int i = 0; i < range.size(); i++) {
    const BVHReference &ref = references[range.start() + i];
    if (ref.prim_index() != -1) {
-      uint32_t type_index = PRIMITIVE_INDEX(ref.prim_type() & PRIMITIVE_ALL);
+      uint32_t type_index = bitscan((uint32_t)(ref.prim_type() & PRIMITIVE_ALL));
      p_ref[type_index].push_back(ref);
      p_type[type_index].push_back(ref.prim_type());
      p_index[type_index].push_back(ref.prim_index());
--- a/intern/cycles/bvh/build.h
+++ b/intern/cycles/bvh/build.h
@@ -39,7 +39,6 @@ class Geometry;
 class Hair;
 class Mesh;
 class Object;
-class PointCloud;
 class Progress;

 /* BVH Builder */
@@ -69,7 +68,6 @@ class BVHBuild {
  /* Adding references. */
  void add_reference_triangles(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
  void add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair, int i);
-  void add_reference_points(BoundBox &root, BoundBox &center, PointCloud *pointcloud, int i);
  void add_reference_geometry(BoundBox &root, BoundBox &center, Geometry *geom, int i);
  void add_reference_object(BoundBox &root, BoundBox &center, Object *ob, int i);
  void add_references(BVHRange &root);
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -19,7 +19,6 @@

 #include "bvh/bvh2.h"
 #include "bvh/embree.h"
-#include "bvh/metal.h"
 #include "bvh/multi.h"
 #include "bvh/optix.h"

@@ -41,12 +40,8 @@ const char *bvh_layout_name(BVHLayout layout)
      return "EMBREE";
    case BVH_LAYOUT_OPTIX:
      return "OPTIX";
-    case BVH_LAYOUT_METAL:
-      return "METAL";
    case BVH_LAYOUT_MULTI_OPTIX:
-    case BVH_LAYOUT_MULTI_METAL:
    case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
-    case BVH_LAYOUT_MULTI_METAL_EMBREE:
      return "MULTI";
    case BVH_LAYOUT_ALL:
      return "ALL";
@@ -107,18 +102,9 @@ BVH *BVH::create(const BVHParams &params,
 #else
      (void)device;
      break;
-#endif
-    case BVH_LAYOUT_METAL:
-#ifdef WITH_METAL
-      return bvh_metal_create(params, geometry, objects, device);
-#else
-      (void)device;
-      break;
 #endif
    case BVH_LAYOUT_MULTI_OPTIX:
-    case BVH_LAYOUT_MULTI_METAL:
    case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
-    case BVH_LAYOUT_MULTI_METAL_EMBREE:
      return new BVHMulti(params, geometry, objects);
    case BVH_LAYOUT_NONE:
    case BVH_LAYOUT_ALL:
--- a/intern/cycles/bvh/bvh2.cpp
+++ b/intern/cycles/bvh/bvh2.cpp
@@ -20,7 +20,6 @@
 #include "scene/hair.h"
 #include "scene/mesh.h"
 #include "scene/object.h"
-#include "scene/pointcloud.h"

 #include "bvh/build.h"
 #include "bvh/node.h"
@@ -387,7 +386,7 @@ void BVH2::refit_primitives(int start, int end, BoundBox &bbox, uint &visibility
    }
    else {
      /* Primitives. */
-      if (pack.prim_type[prim] & PRIMITIVE_CURVE) {
+      if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
        /* Curves. */
        const Hair *hair = static_cast<const Hair *>(ob->get_geometry());
        int prim_offset = (params.top_level) ? hair->prim_offset : 0;
@@ -410,30 +409,6 @@ void BVH2::refit_primitives(int start, int end, BoundBox &bbox, uint &visibility
          }
        }
      }
-      else if (pack.prim_type[prim] & PRIMITIVE_POINT) {
-        /* Points. */
-        const PointCloud *pointcloud = static_cast<const PointCloud *>(ob->get_geometry());
-        int prim_offset = (params.top_level) ? pointcloud->prim_offset : 0;
-        const float3 *points = &pointcloud->points[0];
-        const float *radius = &pointcloud->radius[0];
-        PointCloud::Point point = pointcloud->get_point(pidx - prim_offset);
-
-        point.bounds_grow(points, radius, bbox);
-
-        /* Motion points. */
-        if (pointcloud->get_use_motion_blur()) {
-          Attribute *attr = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
-          if (attr) {
-            size_t pointcloud_size = pointcloud->points.size();
-            size_t steps = pointcloud->get_motion_steps() - 1;
-            float3 *point_steps = attr->data_float3();
-
-            for (size_t i = 0; i < steps; i++)
-              point.bounds_grow(point_steps + i * pointcloud_size, radius, bbox);
-          }
-        }
-      }
      else {
        /* Triangles. */
        const Mesh *mesh = static_cast<const Mesh *>(ob->get_geometry());
@@ -530,8 +505,7 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
  pack.leaf_nodes.resize(leaf_nodes_size);
  pack.object_node.resize(objects.size());

-  if (params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0 ||
-      params.num_motion_point_steps > 0) {
+  if (params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0) {
    pack.prim_time.resize(prim_index_size);
  }

@@ -590,7 +564,13 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
      float2 *bvh_prim_time = bvh->pack.prim_time.size() ? &bvh->pack.prim_time[0] : NULL;

      for (size_t i = 0; i < bvh_prim_index_size; i++) {
-        pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
+        if (bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
+          pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
+        }
+        else {
+          pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
+        }
+
        pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
        pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i];
        pack_prim_object[pack_prim_index_offset] = 0;  // unused for instances
--- a/intern/cycles/bvh/embree.cpp
+++ b/intern/cycles/bvh/embree.cpp
@@ -45,7 +45,6 @@
 #  include "scene/hair.h"
 #  include "scene/mesh.h"
 #  include "scene/object.h"
-#  include "scene/pointcloud.h"

 #  include "util/foreach.h"
 #  include "util/log.h"
@@ -91,7 +90,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
      ++ctx->num_hits;

      /* Always use baked shadow transparency for curves. */
-      if (current_isect.type & PRIMITIVE_CURVE) {
+      if (current_isect.type & PRIMITIVE_ALL_CURVE) {
        ctx->throughput *= intersection_curve_shadow_transparency(
            kg, current_isect.object, current_isect.prim, current_isect.u);

@@ -246,7 +245,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
  }
 }

-static void rtc_filter_func_backface_cull(const RTCFilterFunctionNArguments *args)
+static void rtc_filter_func_thick_curve(const RTCFilterFunctionNArguments *args)
 {
  const RTCRay *ray = (RTCRay *)args->ray;
  RTCHit *hit = (RTCHit *)args->hit;
@@ -259,7 +258,7 @@ static void rtc_filter_func_backface_cull(const RTCFilterFunctionNArguments *arg
  }
 }

-static void rtc_filter_occluded_func_backface_cull(const RTCFilterFunctionNArguments *args)
+static void rtc_filter_occluded_func_thick_curve(const RTCFilterFunctionNArguments *args)
 {
  const RTCRay *ray = (RTCRay *)args->ray;
  RTCHit *hit = (RTCHit *)args->hit;
@@ -411,12 +410,6 @@ void BVHEmbree::add_object(Object *ob, int i)
      add_curves(ob, hair, i);
    }
  }
-  else if (geom->geometry_type == Geometry::POINTCLOUD) {
-    PointCloud *pointcloud = static_cast<PointCloud *>(geom);
-    if (pointcloud->num_points() > 0) {
-      add_points(ob, pointcloud, i);
-    }
-  }
 }

 void BVHEmbree::add_instance(Object *ob, int i)
@@ -631,89 +624,6 @@ void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, c
  }
 }

-void BVHEmbree::set_point_vertex_buffer(RTCGeometry geom_id,
-                                        const PointCloud *pointcloud,
-                                        const bool update)
-{
-  const Attribute *attr_mP = NULL;
-  size_t num_motion_steps = 1;
-  if (pointcloud->has_motion_blur()) {
-    attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-    if (attr_mP) {
-      num_motion_steps = pointcloud->get_motion_steps();
-    }
-  }
-
-  const size_t num_points = pointcloud->num_points();
-
-  /* Copy the point data to Embree */
-  const int t_mid = (num_motion_steps - 1) / 2;
-  const float *radius = pointcloud->get_radius().data();
-  for (int t = 0; t < num_motion_steps; ++t) {
-    const float3 *verts;
-    if (t == t_mid || attr_mP == NULL) {
-      verts = pointcloud->get_points().data();
-    }
-    else {
-      int t_ = (t > t_mid) ? (t - 1) : t;
-      verts = &attr_mP->data_float3()[t_ * num_points];
-    }
-
-    float4 *rtc_verts = (update) ? (float4 *)rtcGetGeometryBufferData(
-                                       geom_id, RTC_BUFFER_TYPE_VERTEX, t) :
-                                   (float4 *)rtcSetNewGeometryBuffer(geom_id,
-                                                                     RTC_BUFFER_TYPE_VERTEX,
-                                                                     t,
-                                                                     RTC_FORMAT_FLOAT4,
-                                                                     sizeof(float) * 4,
-                                                                     num_points);
-
-    assert(rtc_verts);
-    if (rtc_verts) {
-      for (size_t j = 0; j < num_points; ++j) {
-        rtc_verts[j] = float3_to_float4(verts[j]);
-        rtc_verts[j].w = radius[j];
-      }
-    }
-
-    if (update) {
-      rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
-    }
-  }
-}
-
-void BVHEmbree::add_points(const Object *ob, const PointCloud *pointcloud, int i)
-{
-  size_t prim_offset = pointcloud->prim_offset;
-
-  const Attribute *attr_mP = NULL;
-  size_t num_motion_steps = 1;
-  if (pointcloud->has_motion_blur()) {
-    attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-    if (attr_mP) {
-      num_motion_steps = pointcloud->get_motion_steps();
-    }
-  }
-
-  enum RTCGeometryType type = RTC_GEOMETRY_TYPE_SPHERE_POINT;
-
-  RTCGeometry geom_id = rtcNewGeometry(rtc_device, type);
-
-  rtcSetGeometryBuildQuality(geom_id, build_quality);
-  rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
-
-  set_point_vertex_buffer(geom_id, pointcloud, false);
-
-  rtcSetGeometryUserData(geom_id, (void *)prim_offset);
-  rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_backface_cull);
-  rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_backface_cull);
-  rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
-
-  rtcCommitGeometry(geom_id);
-  rtcAttachGeometryByID(scene, geom_id, i * 2);
-  rtcReleaseGeometry(geom_id);
-}
-
 void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
 {
  size_t prim_offset = hair->curve_segment_offset;
@@ -768,8 +678,8 @@ void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
    rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
  }
  else {
-    rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_backface_cull);
-    rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_backface_cull);
+    rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_thick_curve);
+    rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_thick_curve);
  }
  rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());

@@ -806,14 +716,6 @@ void BVHEmbree::refit(Progress &progress)
          rtcCommitGeometry(geom);
        }
      }
-      else if (geom->geometry_type == Geometry::POINTCLOUD) {
-        PointCloud *pointcloud = static_cast<PointCloud *>(geom);
-        if (pointcloud->num_points() > 0) {
-          RTCGeometry geom = rtcGetGeometry(scene, geom_id);
-          set_point_vertex_buffer(geom, pointcloud, true);
-          rtcCommitGeometry(geom);
-        }
-      }
    }
    geom_id += 2;
  }
--- a/intern/cycles/bvh/embree.h
+++ b/intern/cycles/bvh/embree.h
@@ -33,7 +33,6 @@ CCL_NAMESPACE_BEGIN

 class Hair;
 class Mesh;
-class PointCloud;

 class BVHEmbree : public BVH {
 public:
@@ -52,15 +51,11 @@ class BVHEmbree : public BVH {
  void add_object(Object *ob, int i);
  void add_instance(Object *ob, int i);
  void add_curves(const Object *ob, const Hair *hair, int i);
-  void add_points(const Object *ob, const PointCloud *pointcloud, int i);
  void add_triangles(const Object *ob, const Mesh *mesh, int i);

 private:
  void set_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh, const bool update);
  void set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, const bool update);
-  void set_point_vertex_buffer(RTCGeometry geom_id,
-                               const PointCloud *pointcloud,
-                               const bool update);

  RTCDevice rtc_device;
  enum RTCBuildQuality build_quality;
--- a/intern/cycles/bvh/metal.h
+++ b/intern/cycles/bvh/metal.h
@@ -1,35 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BVH_METAL_H__
-#define __BVH_METAL_H__
-
-#ifdef WITH_METAL
-
-#  include "bvh/bvh.h"
-
-CCL_NAMESPACE_BEGIN
-
-BVH *bvh_metal_create(const BVHParams &params,
-                      const vector<Geometry *> &geometry,
-                      const vector<Object *> &objects,
-                      Device *device);
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_METAL */
-
-#endif /* __BVH_METAL_H__ */
--- a/intern/cycles/bvh/metal.mm
+++ b/intern/cycles/bvh/metal.mm
@@ -1,33 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_METAL
-
-#  include "device/metal/bvh.h"
-
-CCL_NAMESPACE_BEGIN
-
-BVH *bvh_metal_create(const BVHParams &params,
-                      const vector<Geometry *> &geometry,
-                      const vector<Object *> &objects,
-                      Device *device)
-{
-  return new BVHMetal(params, geometry, objects, device);
-}
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_METAL */
--- a/intern/cycles/bvh/params.h
+++ b/intern/cycles/bvh/params.h
@@ -83,8 +83,6 @@ class BVHParams {
  int max_motion_triangle_leaf_size;
  int max_curve_leaf_size;
  int max_motion_curve_leaf_size;
-  int max_point_leaf_size;
-  int max_motion_point_leaf_size;

  /* object or mesh level bvh */
  bool top_level;
@@ -100,13 +98,13 @@ class BVHParams {
  /* Split time range to this number of steps and create leaf node for each
   * of this time steps.
   *
-   * Speeds up rendering of motion primitives in the cost of higher memory usage.
+   * Speeds up rendering of motion curve primitives in the cost of higher
+   * memory usage.
   */
+  int num_motion_curve_steps;

  /* Same as above, but for triangle primitives. */
  int num_motion_triangle_steps;
-  int num_motion_curve_steps;
-  int num_motion_point_steps;

  /* Same as in SceneParams. */
  int bvh_type;
@@ -134,8 +132,6 @@ class BVHParams {
    max_motion_triangle_leaf_size = 8;
    max_curve_leaf_size = 1;
    max_motion_curve_leaf_size = 4;
-    max_point_leaf_size = 8;
-    max_motion_point_leaf_size = 8;

    top_level = false;
    bvh_layout = BVH_LAYOUT_BVH2;
@@ -143,7 +139,6 @@ class BVHParams {

    num_motion_curve_steps = 0;
    num_motion_triangle_steps = 0;
-    num_motion_point_steps = 0;

    bvh_type = 0;

@@ -171,12 +166,6 @@ class BVHParams {
    return (size <= min_leaf_size || level >= MAX_DEPTH);
  }

-  bool use_motion_steps()
-  {
-    return num_motion_curve_steps > 0 || num_motion_triangle_steps > 0 ||
-           num_motion_point_steps > 0;
-  }
-
  /* Gets best matching BVH.
   *
   * If the requested layout is supported by the device, it will be used.
--- a/intern/cycles/bvh/split.cpp
+++ b/intern/cycles/bvh/split.cpp
@@ -23,7 +23,6 @@
 #include "scene/hair.h"
 #include "scene/mesh.h"
 #include "scene/object.h"
-#include "scene/pointcloud.h"

 #include "util/algorithm.h"

@@ -427,32 +426,6 @@ void BVHSpatialSplit::split_curve_primitive(const Hair *hair,
  }
 }

-void BVHSpatialSplit::split_point_primitive(const PointCloud *pointcloud,
-                                            const Transform *tfm,
-                                            int prim_index,
-                                            int dim,
-                                            float pos,
-                                            BoundBox &left_bounds,
-                                            BoundBox &right_bounds)
-{
-  /* No real splitting support for points, assume they are small enough for it
-   * not to matter. */
-  float3 point = pointcloud->get_points()[prim_index];
-
-  if (tfm != NULL) {
-    point = transform_point(tfm, point);
-  }
-  point = get_unaligned_point(point);
-
-  if (point[dim] <= pos) {
-    left_bounds.grow(point);
-  }
-
-  if (point[dim] >= pos) {
-    right_bounds.grow(point);
-  }
-}
-
 void BVHSpatialSplit::split_triangle_reference(const BVHReference &ref,
                                               const Mesh *mesh,
                                               int dim,
@@ -480,16 +453,6 @@ void BVHSpatialSplit::split_curve_reference(const BVHReference &ref,
                        right_bounds);
 }

-void BVHSpatialSplit::split_point_reference(const BVHReference &ref,
-                                            const PointCloud *pointcloud,
-                                            int dim,
-                                            float pos,
-                                            BoundBox &left_bounds,
-                                            BoundBox &right_bounds)
-{
-  split_point_primitive(pointcloud, NULL, ref.prim_index(), dim, pos, left_bounds, right_bounds);
-}
-
 void BVHSpatialSplit::split_object_reference(
    const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds)
 {
@@ -512,13 +475,6 @@ void BVHSpatialSplit::split_object_reference(
      }
    }
  }
-  else if (geom->geometry_type == Geometry::POINTCLOUD) {
-    PointCloud *pointcloud = static_cast<PointCloud *>(geom);
-    for (int point_idx = 0; point_idx < pointcloud->num_points(); ++point_idx) {
-      split_point_primitive(
-          pointcloud, &object->get_tfm(), point_idx, dim, pos, left_bounds, right_bounds);
-    }
-  }
 }

 void BVHSpatialSplit::split_reference(const BVHBuild &builder,
@@ -535,18 +491,14 @@ void BVHSpatialSplit::split_reference(const BVHBuild &builder,
  /* loop over vertices/edges. */
  const Object *ob = builder.objects[ref.prim_object()];

-  if (ref.prim_type() & PRIMITIVE_TRIANGLE) {
+  if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
    Mesh *mesh = static_cast<Mesh *>(ob->get_geometry());
    split_triangle_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
  }
-  else if (ref.prim_type() & PRIMITIVE_CURVE) {
+  else if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
    Hair *hair = static_cast<Hair *>(ob->get_geometry());
    split_curve_reference(ref, hair, dim, pos, left_bounds, right_bounds);
  }
-  else if (ref.prim_type() & PRIMITIVE_POINT) {
-    PointCloud *pointcloud = static_cast<PointCloud *>(ob->get_geometry());
-    split_point_reference(ref, pointcloud, dim, pos, left_bounds, right_bounds);
-  }
  else {
    split_object_reference(ob, dim, pos, left_bounds, right_bounds);
  }
--- a/intern/cycles/bvh/split.h
+++ b/intern/cycles/bvh/split.h
@@ -26,7 +26,6 @@ CCL_NAMESPACE_BEGIN
 class BVHBuild;
 class Hair;
 class Mesh;
-class PointCloud;
 struct Transform;

 /* Object Split */
@@ -124,13 +123,6 @@ class BVHSpatialSplit {
                             float pos,
                             BoundBox &left_bounds,
                             BoundBox &right_bounds);
-  void split_point_primitive(const PointCloud *pointcloud,
-                             const Transform *tfm,
-                             int prim_index,
-                             int dim,
-                             float pos,
-                             BoundBox &left_bounds,
-                             BoundBox &right_bounds);

  /* Lower-level functions which calculates boundaries of left and right nodes
   * needed for spatial split.
@@ -149,12 +141,6 @@ class BVHSpatialSplit {
                             float pos,
                             BoundBox &left_bounds,
                             BoundBox &right_bounds);
-  void split_point_reference(const BVHReference &ref,
-                             const PointCloud *pointcloud,
-                             int dim,
-                             float pos,
-                             BoundBox &left_bounds,
-                             BoundBox &right_bounds);
  void split_object_reference(
      const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds);

--- a/intern/cycles/bvh/unaligned.cpp
+++ b/intern/cycles/bvh/unaligned.cpp
@@ -69,7 +69,7 @@ bool BVHUnaligned::compute_aligned_space(const BVHReference &ref, Transform *ali
  const int packed_type = ref.prim_type();
  const int type = (packed_type & PRIMITIVE_ALL);
  /* No motion blur curves here, we can't fit them to aligned boxes well. */
-  if ((type & PRIMITIVE_CURVE) && !(type & PRIMITIVE_MOTION)) {
+  if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_CURVE_THICK)) {
    const int curve_index = ref.prim_index();
    const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
    const Hair *hair = static_cast<const Hair *>(object->get_geometry());
@@ -95,7 +95,7 @@ BoundBox BVHUnaligned::compute_aligned_prim_boundbox(const BVHReference &prim,
  const int packed_type = prim.prim_type();
  const int type = (packed_type & PRIMITIVE_ALL);
  /* No motion blur curves here, we can't fit them to aligned boxes well. */
-  if ((type & PRIMITIVE_CURVE) && !(type & PRIMITIVE_MOTION)) {
+  if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_CURVE_THICK)) {
    const int curve_index = prim.prim_index();
    const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
    const Hair *hair = static_cast<const Hair *>(object->get_geometry());
--- a/intern/cycles/cmake/external_libs.cmake
+++ b/intern/cycles/cmake/external_libs.cmake
@@ -551,23 +551,4 @@ if(NOT WITH_HIP_DYNLOAD)
  set(WITH_HIP_DYNLOAD ON)
 endif()

-###########################################################################
-# Metal
-###########################################################################
-
-if(WITH_CYCLES_DEVICE_METAL)
-  find_library(METAL_LIBRARY Metal)
-
-  # This file was added in the 12.0 SDK, use it as a way to detect the version.
-  if (METAL_LIBRARY AND NOT EXISTS "${METAL_LIBRARY}/Headers/MTLFunctionStitching.h")
-    message(STATUS "Metal version too old, must be SDK 12.0 or newer, disabling WITH_CYCLES_DEVICE_METAL")
-    set(WITH_CYCLES_DEVICE_METAL OFF)
-  elseif (NOT METAL_LIBRARY)
-    message(STATUS "Metal not found, disabling WITH_CYCLES_DEVICE_METAL")
-    set(WITH_CYCLES_DEVICE_METAL OFF)
-  else()
-    message(STATUS "Found Metal: ${METAL_LIBRARY}")
-  endif()
-endif()
-
 unset(_cycles_lib_dir)
--- a/intern/cycles/cmake/macros.cmake
+++ b/intern/cycles/cmake/macros.cmake
@@ -168,6 +168,12 @@ macro(cycles_target_link_libraries target)
    target_link_libraries(${target} extern_hipew)
  endif()

+  if(CYCLES_STANDALONE_REPOSITORY)
+    target_link_libraries(${target} extern_numaapi)
+  else()
+    target_link_libraries(${target} bf_intern_numaapi)
+  endif()
+
  if(UNIX AND NOT APPLE)
    if(CYCLES_STANDALONE_REPOSITORY)
      target_link_libraries(${target} extern_libc_compat)
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -43,7 +43,7 @@ if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
  add_definitions(-DWITH_HIP_DYNLOAD)
 endif()

-set(SRC_BASE
+set(SRC
  device.cpp
  denoise.cpp
  graphics_interop.cpp
@@ -104,21 +104,6 @@ set(SRC_MULTI
  multi/device.h
 )

-set(SRC_METAL
-  metal/bvh.mm
-  metal/bvh.h
-  metal/device.mm
-  metal/device.h
-  metal/device_impl.mm
-  metal/device_impl.h
-  metal/kernel.mm
-  metal/kernel.h
-  metal/queue.mm
-  metal/queue.h
-  metal/util.mm
-  metal/util.h
-)
-
 set(SRC_OPTIX
  optix/device.cpp
  optix/device.h
@@ -138,17 +123,6 @@ set(SRC_HEADERS
  queue.h
 )

-set(SRC
-  ${SRC_BASE}
-  ${SRC_CPU}
-  ${SRC_CUDA}
-  ${SRC_HIP}
-  ${SRC_DUMMY}
-  ${SRC_MULTI}
-  ${SRC_OPTIX}
-  ${SRC_HEADERS}
-)
-
 set(LIB
  cycles_kernel
  cycles_util
@@ -184,15 +158,6 @@ endif()
 if(WITH_CYCLES_DEVICE_OPTIX)
  add_definitions(-DWITH_OPTIX)
 endif()
-if(WITH_CYCLES_DEVICE_METAL)
-  list(APPEND LIB
-    ${METAL_LIBRARY}
-  )
-  add_definitions(-DWITH_METAL)
-  list(APPEND SRC
-    ${SRC_METAL}
-  )
-endif()

 if(WITH_OPENIMAGEDENOISE)
  list(APPEND LIB
@@ -203,12 +168,20 @@ endif()
 include_directories(${INC})
 include_directories(SYSTEM ${INC_SYS})

-cycles_add_library(cycles_device "${LIB}" ${SRC})
+cycles_add_library(cycles_device "${LIB}"
+  ${SRC}
+  ${SRC_CPU}
+  ${SRC_CUDA}
+  ${SRC_HIP}
+  ${SRC_DUMMY}
+  ${SRC_MULTI}
+  ${SRC_OPTIX}
+  ${SRC_HEADERS}
+)

 source_group("cpu" FILES ${SRC_CPU})
 source_group("cuda" FILES ${SRC_CUDA})
 source_group("dummy" FILES ${SRC_DUMMY})
 source_group("multi" FILES ${SRC_MULTI})
-source_group("metal" FILES ${SRC_METAL})
 source_group("optix" FILES ${SRC_OPTIX})
 source_group("common" FILES ${SRC} ${SRC_HEADERS})
--- a/intern/cycles/device/cpu/device_impl.cpp
+++ b/intern/cycles/device/cpu/device_impl.cpp
@@ -129,7 +129,8 @@ void CPUDevice::mem_alloc(device_memory &mem)
              << string_human_readable_size(mem.memory_size()) << ")";
    }

-    if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
+    if (mem.type == MEM_DEVICE_ONLY) {
+      assert(!mem.host_pointer);
      size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
      void *data = util_aligned_malloc(mem.memory_size(), alignment);
      mem.device_pointer = (device_ptr)data;
@@ -188,7 +189,7 @@ void CPUDevice::mem_free(device_memory &mem)
    tex_free((device_texture &)mem);
  }
  else if (mem.device_pointer) {
-    if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
+    if (mem.type == MEM_DEVICE_ONLY) {
      util_aligned_free((void *)mem.device_pointer);
    }
    mem.device_pointer = 0;
@@ -273,8 +274,7 @@ void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
 {
 #ifdef WITH_EMBREE
  if (bvh->params.bvh_layout == BVH_LAYOUT_EMBREE ||
-      bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE ||
-      bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE) {
+      bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE) {
    BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
    if (refit) {
      bvh_embree->refit(progress);
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -477,10 +477,10 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
     * still to make it faster. */
    CUDADeviceQueue queue(this);

-    device_ptr d_path_index = 0;
-    device_ptr d_render_buffer = 0;
+    void *d_path_index = nullptr;
+    void *d_render_buffer = nullptr;
    int d_work_size = 0;
-    DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size);
+    void *args[] = {&d_path_index, &d_render_buffer, &d_work_size};

    queue.init_execution();
    queue.enqueue(test_kernel, 1, args);
@@ -678,7 +678,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_

  void *shared_pointer = 0;

-  if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) {
+  if (mem_alloc_result != CUDA_SUCCESS && can_map_host) {
    if (mem.shared_pointer) {
      /* Another device already allocated host memory. */
      mem_alloc_result = CUDA_SUCCESS;
@@ -701,14 +701,8 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_
  }

  if (mem_alloc_result != CUDA_SUCCESS) {
-    if (mem.type == MEM_DEVICE_ONLY) {
-      status = " failed, out of device memory";
-      set_error("System is out of GPU memory");
-    }
-    else {
-      status = " failed, out of device and host memory";
-      set_error("System is out of GPU and shared host memory");
-    }
+    status = " failed, out of device and host memory";
+    set_error("System is out of GPU and shared host memory");
  }

  if (mem.name) {
--- a/intern/cycles/device/cuda/graphics_interop.cpp
+++ b/intern/cycles/device/cuda/graphics_interop.cpp
@@ -45,10 +45,8 @@ void CUDADeviceGraphicsInterop::set_display_interop(

  need_clear_ = display_interop.need_clear;

-  if (!display_interop.need_recreate) {
-    if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
-      return;
-    }
+  if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
+    return;
  }

  CUDAContextScope scope(device_);
--- a/intern/cycles/device/cuda/queue.cpp
+++ b/intern/cycles/device/cuda/queue.cpp
@@ -89,9 +89,7 @@ bool CUDADeviceQueue::kernel_available(DeviceKernel kernel) const
  return cuda_device_->kernels.available(kernel);
 }

-bool CUDADeviceQueue::enqueue(DeviceKernel kernel,
-                              const int work_size,
-                              DeviceKernelArguments const &args)
+bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
 {
  if (cuda_device_->have_error()) {
    return false;
@@ -135,7 +133,7 @@ bool CUDADeviceQueue::enqueue(DeviceKernel kernel,
                                1,
                                shared_mem_bytes,
                                cuda_stream_,
-                                const_cast<void **>(args.values),
+                                args,
                                0),
                 "enqueue");

--- a/intern/cycles/device/cuda/queue.h
+++ b/intern/cycles/device/cuda/queue.h
@@ -42,9 +42,7 @@ class CUDADeviceQueue : public DeviceQueue {

  virtual bool kernel_available(DeviceKernel kernel) const override;

-  virtual bool enqueue(DeviceKernel kernel,
-                       const int work_size,
-                       DeviceKernelArguments const &args) override;
+  virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;

  virtual bool synchronize() override;

--- a/intern/cycles/device/denoise.cpp
+++ b/intern/cycles/device/denoise.cpp
@@ -76,8 +76,6 @@ NODE_DEFINE(DenoiseParams)
  SOCKET_BOOLEAN(use_pass_albedo, "Use Pass Albedo", true);
  SOCKET_BOOLEAN(use_pass_normal, "Use Pass Normal", false);

-  SOCKET_BOOLEAN(temporally_stable, "Temporally Stable", false);
-
  SOCKET_ENUM(prefilter, "Prefilter", *prefilter_enum, DENOISER_PREFILTER_FAST);

  return type;
--- a/intern/cycles/device/denoise.h
+++ b/intern/cycles/device/denoise.h
@@ -72,9 +72,6 @@ class DenoiseParams : public Node {
  bool use_pass_albedo = true;
  bool use_pass_normal = true;

-  /* Configure the denoiser to use motion vectors, previous image and a temporally stable model. */
-  bool temporally_stable = false;
-
  DenoiserPrefilter prefilter = DENOISER_PREFILTER_FAST;

  static const NodeEnum *get_type_enum();
@@ -86,8 +83,7 @@ class DenoiseParams : public Node {
  {
    return !(use == other.use && type == other.type && start_sample == other.start_sample &&
             use_pass_albedo == other.use_pass_albedo &&
-             use_pass_normal == other.use_pass_normal &&
-             temporally_stable == other.temporally_stable && prefilter == other.prefilter);
+             use_pass_normal == other.use_pass_normal && prefilter == other.prefilter);
  }
 };

--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -27,7 +27,6 @@
 #include "device/cuda/device.h"
 #include "device/dummy/device.h"
 #include "device/hip/device.h"
-#include "device/metal/device.h"
 #include "device/multi/device.h"
 #include "device/optix/device.h"

@@ -37,7 +36,6 @@
 #include "util/math.h"
 #include "util/string.h"
 #include "util/system.h"
-#include "util/task.h"
 #include "util/time.h"
 #include "util/types.h"
 #include "util/vector.h"
@@ -51,7 +49,6 @@ vector<DeviceInfo> Device::cuda_devices;
 vector<DeviceInfo> Device::optix_devices;
 vector<DeviceInfo> Device::cpu_devices;
 vector<DeviceInfo> Device::hip_devices;
-vector<DeviceInfo> Device::metal_devices;
 uint Device::devices_initialized_mask = 0;

 /* Device */
@@ -108,12 +105,6 @@ Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
      break;
 #endif

-#ifdef WITH_METAL
-    case DEVICE_METAL:
-      if (device_metal_init())
-        device = device_metal_create(info, stats, profiler);
-      break;
-#endif
    default:
      break;
  }
@@ -137,8 +128,6 @@ DeviceType Device::type_from_string(const char *name)
    return DEVICE_MULTI;
  else if (strcmp(name, "HIP") == 0)
    return DEVICE_HIP;
-  else if (strcmp(name, "METAL") == 0)
-    return DEVICE_METAL;

  return DEVICE_NONE;
 }
@@ -155,8 +144,6 @@ string Device::string_from_type(DeviceType type)
    return "MULTI";
  else if (type == DEVICE_HIP)
    return "HIP";
-  else if (type == DEVICE_METAL)
-    return "METAL";

  return "";
 }
@@ -174,9 +161,7 @@ vector<DeviceType> Device::available_types()
 #ifdef WITH_HIP
  types.push_back(DEVICE_HIP);
 #endif
-#ifdef WITH_METAL
-  types.push_back(DEVICE_METAL);
-#endif
+
  return types;
 }

@@ -242,20 +227,6 @@ vector<DeviceInfo> Device::available_devices(uint mask)
    }
  }

-#ifdef WITH_METAL
-  if (mask & DEVICE_MASK_METAL) {
-    if (!(devices_initialized_mask & DEVICE_MASK_METAL)) {
-      if (device_metal_init()) {
-        device_metal_info(metal_devices);
-      }
-      devices_initialized_mask |= DEVICE_MASK_METAL;
-    }
-    foreach (DeviceInfo &info, metal_devices) {
-      devices.push_back(info);
-    }
-  }
-#endif
-
  return devices;
 }

@@ -295,15 +266,6 @@ string Device::device_capabilities(uint mask)
  }
 #endif

-#ifdef WITH_METAL
-  if (mask & DEVICE_MASK_METAL) {
-    if (device_metal_init()) {
-      capabilities += "\nMetal device capabilities:\n";
-      capabilities += device_metal_capabilities();
-    }
-  }
-#endif
-
  return capabilities;
 }

@@ -334,7 +296,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
    /* Ensure CPU device does not slow down GPU. */
    if (device.type == DEVICE_CPU && subdevices.size() > 1) {
      if (background) {
-        int orig_cpu_threads = (threads) ? threads : TaskScheduler::num_threads();
+        int orig_cpu_threads = (threads) ? threads : system_cpu_thread_count();
        int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);

        VLOG(1) << "CPU render threads reduced from " << orig_cpu_threads << " to " << cpu_threads
@@ -392,7 +354,6 @@ void Device::free_memory()
  optix_devices.free_memory();
  hip_devices.free_memory();
  cpu_devices.free_memory();
-  metal_devices.free_memory();
 }

 unique_ptr<DeviceQueue> Device::gpu_queue_create()
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -52,7 +52,6 @@ enum DeviceType {
  DEVICE_MULTI,
  DEVICE_OPTIX,
  DEVICE_HIP,
-  DEVICE_METAL,
  DEVICE_DUMMY,
 };

@@ -61,7 +60,6 @@ enum DeviceTypeMask {
  DEVICE_MASK_CUDA = (1 << DEVICE_CUDA),
  DEVICE_MASK_OPTIX = (1 << DEVICE_OPTIX),
  DEVICE_MASK_HIP = (1 << DEVICE_HIP),
-  DEVICE_MASK_METAL = (1 << DEVICE_METAL),
  DEVICE_MASK_ALL = ~0
 };

@@ -283,7 +281,6 @@ class Device {
  static vector<DeviceInfo> optix_devices;
  static vector<DeviceInfo> cpu_devices;
  static vector<DeviceInfo> hip_devices;
-  static vector<DeviceInfo> metal_devices;
  static uint devices_initialized_mask;
 };

--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@@ -440,10 +440,10 @@ void HIPDevice::reserve_local_memory(const uint kernel_features)
     * still to make it faster. */
    HIPDeviceQueue queue(this);

-    device_ptr d_path_index = 0;
-    device_ptr d_render_buffer = 0;
+    void *d_path_index = nullptr;
+    void *d_render_buffer = nullptr;
    int d_work_size = 0;
-    DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size);
+    void *args[] = {&d_path_index, &d_render_buffer, &d_work_size};

    queue.init_execution();
    queue.enqueue(test_kernel, 1, args);
--- a/intern/cycles/device/hip/queue.cpp
+++ b/intern/cycles/device/hip/queue.cpp
@@ -89,9 +89,7 @@ bool HIPDeviceQueue::kernel_available(DeviceKernel kernel) const
  return hip_device_->kernels.available(kernel);
 }

-bool HIPDeviceQueue::enqueue(DeviceKernel kernel,
-                             const int work_size,
-                             DeviceKernelArguments const &args)
+bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
 {
  if (hip_device_->have_error()) {
    return false;
@@ -134,7 +132,7 @@ bool HIPDeviceQueue::enqueue(DeviceKernel kernel,
                                       1,
                                       shared_mem_bytes,
                                       hip_stream_,
-                                       const_cast<void **>(args.values),
+                                       args,
                                       0),
                 "enqueue");

--- a/intern/cycles/device/hip/queue.h
+++ b/intern/cycles/device/hip/queue.h
@@ -42,9 +42,7 @@ class HIPDeviceQueue : public DeviceQueue {

  virtual bool kernel_available(DeviceKernel kernel) const override;

-  virtual bool enqueue(DeviceKernel kernel,
-                       const int work_size,
-                       DeviceKernelArguments const &args) override;
+  virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;

  virtual bool synchronize() override;

--- a/intern/cycles/device/memory.h
+++ b/intern/cycles/device/memory.h
@@ -263,7 +263,6 @@ class device_memory {
  friend class CUDADevice;
  friend class OptiXDevice;
  friend class HIPDevice;
-  friend class MetalDevice;

  /* Only create through subclasses. */
  device_memory(Device *device, const char *name, MemoryType type);
@@ -582,7 +581,7 @@ template<typename T> class device_vector : public device_memory {
 * from an already allocated base memory. It is freed automatically when it
 * goes out of scope, which should happen before base memory is freed.
 *
- * NOTE: some devices require offset and size of the sub_ptr to be properly
+ * Note: some devices require offset and size of the sub_ptr to be properly
 * aligned to device->mem_address_alingment(). */

 class device_sub_ptr {
--- a/intern/cycles/device/metal/bvh.h
+++ b/intern/cycles/device/metal/bvh.h
@@ -1,66 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef WITH_METAL
-
-#  include "bvh/bvh.h"
-#  include "bvh/params.h"
-#  include "device/memory.h"
-
-#  include <Metal/Metal.h>
-
-CCL_NAMESPACE_BEGIN
-
-class BVHMetal : public BVH {
- public:
-  API_AVAILABLE(macos(11.0))
-  id<MTLAccelerationStructure> accel_struct = nil;
-  bool accel_struct_building = false;
-
-  API_AVAILABLE(macos(11.0))
-  vector<id<MTLAccelerationStructure>> blas_array;
-
-  bool motion_blur = false;
-
-  Stats &stats;
-
-  bool build(Progress &progress, id<MTLDevice> device, id<MTLCommandQueue> queue, bool refit);
-
-  BVHMetal(const BVHParams &params,
-           const vector<Geometry *> &geometry,
-           const vector<Object *> &objects,
-           Device *device);
-  virtual ~BVHMetal();
-
-  bool build_BLAS(Progress &progress, id<MTLDevice> device, id<MTLCommandQueue> queue, bool refit);
-  bool build_BLAS_mesh(Progress &progress,
-                       id<MTLDevice> device,
-                       id<MTLCommandQueue> queue,
-                       Geometry *const geom,
-                       bool refit);
-  bool build_BLAS_hair(Progress &progress,
-                       id<MTLDevice> device,
-                       id<MTLCommandQueue> queue,
-                       Geometry *const geom,
-                       bool refit);
-  bool build_TLAS(Progress &progress, id<MTLDevice> device, id<MTLCommandQueue> queue, bool refit);
-};
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_METAL */
--- a/intern/cycles/device/metal/bvh.mm
+++ b/intern/cycles/device/metal/bvh.mm
@@ -1,813 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_METAL
-
-#  include "scene/hair.h"
-#  include "scene/mesh.h"
-#  include "scene/object.h"
-
-#  include "util/progress.h"
-
-#  include "device/metal/bvh.h"
-
-CCL_NAMESPACE_BEGIN
-
-#  define BVH_status(...) \
-    { \
-      string str = string_printf(__VA_ARGS__); \
-      progress.set_substatus(str); \
-    }
-
-BVHMetal::BVHMetal(const BVHParams &params_,
-                   const vector<Geometry *> &geometry_,
-                   const vector<Object *> &objects_,
-                   Device *device)
-    : BVH(params_, geometry_, objects_), stats(device->stats)
-{
-}
-
-BVHMetal::~BVHMetal()
-{
-  if (@available(macos 12.0, *)) {
-    if (accel_struct) {
-      stats.mem_free(accel_struct.allocatedSize);
-      [accel_struct release];
-    }
-  }
-}
-
-bool BVHMetal::build_BLAS_mesh(Progress &progress,
-                               id<MTLDevice> device,
-                               id<MTLCommandQueue> queue,
-                               Geometry *const geom,
-                               bool refit)
-{
-  if (@available(macos 12.0, *)) {
-    /* Build BLAS for triangle primitives */
-    Mesh *const mesh = static_cast<Mesh *const>(geom);
-    if (mesh->num_triangles() == 0) {
-      return false;
-    }
-
-    /*------------------------------------------------*/
-    BVH_status(
-        "Building mesh BLAS | %7d tris | %s", (int)mesh->num_triangles(), geom->name.c_str());
-    /*------------------------------------------------*/
-
-    const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
-
-    const array<float3> &verts = mesh->get_verts();
-    const array<int> &tris = mesh->get_triangles();
-    const size_t num_verts = verts.size();
-    const size_t num_indices = tris.size();
-
-    size_t num_motion_steps = 1;
-    Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-    if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
-      num_motion_steps = mesh->get_motion_steps();
-    }
-
-    MTLResourceOptions storage_mode;
-    if (device.hasUnifiedMemory) {
-      storage_mode = MTLResourceStorageModeShared;
-    }
-    else {
-      storage_mode = MTLResourceStorageModeManaged;
-    }
-
-    /* Upload the mesh data to the GPU */
-    id<MTLBuffer> posBuf = nil;
-    id<MTLBuffer> indexBuf = [device newBufferWithBytes:tris.data()
-                                                 length:num_indices * sizeof(tris.data()[0])
-                                                options:storage_mode];
-
-    if (num_motion_steps == 1) {
-      posBuf = [device newBufferWithBytes:verts.data()
-                                   length:num_verts * sizeof(verts.data()[0])
-                                  options:storage_mode];
-    }
-    else {
-      posBuf = [device newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0])
-                                   options:storage_mode];
-      float3 *dest_data = (float3 *)[posBuf contents];
-      size_t center_step = (num_motion_steps - 1) / 2;
-      for (size_t step = 0; step < num_motion_steps; ++step) {
-        const float3 *verts = mesh->get_verts().data();
-
-        /* The center step for motion vertices is not stored in the attribute. */
-        if (step != center_step) {
-          verts = motion_keys->data_float3() + (step > center_step ? step - 1 : step) * num_verts;
-        }
-        memcpy(dest_data + num_verts * step, verts, num_verts * sizeof(float3));
-      }
-      if (storage_mode == MTLResourceStorageModeManaged) {
-        [posBuf didModifyRange:NSMakeRange(0, posBuf.length)];
-      }
-    }
-
-    /* Create an acceleration structure. */
-    MTLAccelerationStructureGeometryDescriptor *geomDesc;
-    if (num_motion_steps > 1) {
-      std::vector<MTLMotionKeyframeData *> vertex_ptrs;
-      vertex_ptrs.reserve(num_motion_steps);
-      for (size_t step = 0; step < num_motion_steps; ++step) {
-        MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
-        k.buffer = posBuf;
-        k.offset = num_verts * step * sizeof(float3);
-        vertex_ptrs.push_back(k);
-      }
-
-      MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion =
-          [MTLAccelerationStructureMotionTriangleGeometryDescriptor descriptor];
-      geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data()
-                                                         count:vertex_ptrs.size()];
-      geomDescMotion.vertexStride = sizeof(verts.data()[0]);
-      geomDescMotion.indexBuffer = indexBuf;
-      geomDescMotion.indexBufferOffset = 0;
-      geomDescMotion.indexType = MTLIndexTypeUInt32;
-      geomDescMotion.triangleCount = num_indices / 3;
-      geomDescMotion.intersectionFunctionTableOffset = 0;
-
-      geomDesc = geomDescMotion;
-    }
-    else {
-      MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion =
-          [MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
-      geomDescNoMotion.vertexBuffer = posBuf;
-      geomDescNoMotion.vertexBufferOffset = 0;
-      geomDescNoMotion.vertexStride = sizeof(verts.data()[0]);
-      geomDescNoMotion.indexBuffer = indexBuf;
-      geomDescNoMotion.indexBufferOffset = 0;
-      geomDescNoMotion.indexType = MTLIndexTypeUInt32;
-      geomDescNoMotion.triangleCount = num_indices / 3;
-      geomDescNoMotion.intersectionFunctionTableOffset = 0;
-
-      geomDesc = geomDescNoMotion;
-    }
-
-    /* Force a single any-hit call, so shadow record-all behavior works correctly */
-    /* (Match optix behavior: unsigned int build_flags =
-     * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
-    geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
-
-    MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
-        [MTLPrimitiveAccelerationStructureDescriptor descriptor];
-    accelDesc.geometryDescriptors = @[ geomDesc ];
-    if (num_motion_steps > 1) {
-      accelDesc.motionStartTime = 0.0f;
-      accelDesc.motionEndTime = 1.0f;
-      accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp;
-      accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp;
-      accelDesc.motionKeyframeCount = num_motion_steps;
-    }
-
-    if (!use_fast_trace_bvh) {
-      accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
-                          MTLAccelerationStructureUsagePreferFastBuild);
-    }
-
-    MTLAccelerationStructureSizes accelSizes = [device
-        accelerationStructureSizesWithDescriptor:accelDesc];
-    id<MTLAccelerationStructure> accel_uncompressed = [device
-        newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
-    id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
-                                                   options:MTLResourceStorageModePrivate];
-    id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared];
-    id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
-    id<MTLAccelerationStructureCommandEncoder> accelEnc =
-        [accelCommands accelerationStructureCommandEncoder];
-    if (refit) {
-      [accelEnc refitAccelerationStructure:accel_struct
-                                descriptor:accelDesc
-                               destination:accel_uncompressed
-                             scratchBuffer:scratchBuf
-                       scratchBufferOffset:0];
-    }
-    else {
-      [accelEnc buildAccelerationStructure:accel_uncompressed
-                                descriptor:accelDesc
-                             scratchBuffer:scratchBuf
-                       scratchBufferOffset:0];
-    }
-    if (use_fast_trace_bvh) {
-      [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
-                                               toBuffer:sizeBuf
-                                                 offset:0
-                                           sizeDataType:MTLDataTypeULong];
-    }
-    [accelEnc endEncoding];
-    [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
-      /* free temp resources */
-      [scratchBuf release];
-      [indexBuf release];
-      [posBuf release];
-
-      if (use_fast_trace_bvh) {
-        /* Compact the accel structure */
-        uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
-
-        dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
-          id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
-          id<MTLAccelerationStructureCommandEncoder> accelEnc =
-              [accelCommands accelerationStructureCommandEncoder];
-          id<MTLAccelerationStructure> accel = [device
-              newAccelerationStructureWithSize:compressed_size];
-          [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
-                                toAccelerationStructure:accel];
-          [accelEnc endEncoding];
-          [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
-            uint64_t allocated_size = [accel allocatedSize];
-            stats.mem_alloc(allocated_size);
-            accel_struct = accel;
-            [accel_uncompressed release];
-            accel_struct_building = false;
-          }];
-          [accelCommands commit];
-        });
-      }
-      else {
-        /* set our acceleration structure to the uncompressed structure */
-        accel_struct = accel_uncompressed;
-
-        uint64_t allocated_size = [accel_struct allocatedSize];
-        stats.mem_alloc(allocated_size);
-        accel_struct_building = false;
-      }
-      [sizeBuf release];
-    }];
-
-    accel_struct_building = true;
-    [accelCommands commit];
-
-    return true;
-  }
-  return false;
-}
-
-bool BVHMetal::build_BLAS_hair(Progress &progress,
-                               id<MTLDevice> device,
-                               id<MTLCommandQueue> queue,
-                               Geometry *const geom,
-                               bool refit)
-{
-  if (@available(macos 12.0, *)) {
-    /* Build BLAS for hair curves */
-    Hair *hair = static_cast<Hair *>(geom);
-    if (hair->num_curves() == 0) {
-      return false;
-    }
-
-    /*------------------------------------------------*/
-    BVH_status(
-        "Building hair BLAS | %7d curves | %s", (int)hair->num_curves(), geom->name.c_str());
-    /*------------------------------------------------*/
-
-    const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
-    const size_t num_segments = hair->num_segments();
-
-    size_t num_motion_steps = 1;
-    Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-    if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
-      num_motion_steps = hair->get_motion_steps();
-    }
-
-    const size_t num_aabbs = num_segments * num_motion_steps;
-
-    MTLResourceOptions storage_mode;
-    if (device.hasUnifiedMemory) {
-      storage_mode = MTLResourceStorageModeShared;
-    }
-    else {
-      storage_mode = MTLResourceStorageModeManaged;
-    }
-
-    /* Allocate a GPU buffer for the AABB data and populate it */
-    id<MTLBuffer> aabbBuf = [device
-        newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox)
-                    options:storage_mode];
-    MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
-
-    /* Get AABBs for each motion step */
-    size_t center_step = (num_motion_steps - 1) / 2;
-    for (size_t step = 0; step < num_motion_steps; ++step) {
-      /* The center step for motion vertices is not stored in the attribute */
-      const float3 *keys = hair->get_curve_keys().data();
-      if (step != center_step) {
-        size_t attr_offset = (step > center_step) ? step - 1 : step;
-        /* Technically this is a float4 array, but sizeof(float3) == sizeof(float4) */
-        keys = motion_keys->data_float3() + attr_offset * hair->get_curve_keys().size();
-      }
-
-      for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) {
-        const Hair::Curve curve = hair->get_curve(j);
-
-        for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) {
-          {
-            BoundBox bounds = BoundBox::empty;
-            curve.bounds_grow(segment, keys, hair->get_curve_radius().data(), bounds);
-
-            const size_t index = step * num_segments + i;
-            aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
-            aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
-          }
-        }
-      }
-    }
-
-    if (storage_mode == MTLResourceStorageModeManaged) {
-      [aabbBuf didModifyRange:NSMakeRange(0, aabbBuf.length)];
-    }
-
-#  if 0
-    for (size_t i=0; i<num_aabbs && i < 400; i++) {
-      MTLAxisAlignedBoundingBox& bb = aabb_data[i];
-      printf("  %d:   %.1f,%.1f,%.1f -- %.1f,%.1f,%.1f\n", int(i), bb.min.x, bb.min.y, bb.min.z, bb.max.x, bb.max.y, bb.max.z);
-    }
-#  endif
-
-    MTLAccelerationStructureGeometryDescriptor *geomDesc;
-    if (motion_blur) {
-      std::vector<MTLMotionKeyframeData *> aabb_ptrs;
-      aabb_ptrs.reserve(num_motion_steps);
-      for (size_t step = 0; step < num_motion_steps; ++step) {
-        MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
-        k.buffer = aabbBuf;
-        k.offset = step * num_segments * sizeof(MTLAxisAlignedBoundingBox);
-        aabb_ptrs.push_back(k);
-      }
-
-      MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion =
-          [MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor descriptor];
-      geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data()
-                                                              count:aabb_ptrs.size()];
-      geomDescMotion.boundingBoxCount = num_segments;
-      geomDescMotion.boundingBoxStride = sizeof(aabb_data[0]);
-      geomDescMotion.intersectionFunctionTableOffset = 1;
-
-      /* Force a single any-hit call, so shadow record-all behavior works correctly */
-      /* (Match optix behavior: unsigned int build_flags =
-       * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
-      geomDescMotion.allowDuplicateIntersectionFunctionInvocation = false;
-      geomDescMotion.opaque = true;
-      geomDesc = geomDescMotion;
-    }
-    else {
-      MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion =
-          [MTLAccelerationStructureBoundingBoxGeometryDescriptor descriptor];
-      geomDescNoMotion.boundingBoxBuffer = aabbBuf;
-      geomDescNoMotion.boundingBoxBufferOffset = 0;
-      geomDescNoMotion.boundingBoxCount = int(num_aabbs);
-      geomDescNoMotion.boundingBoxStride = sizeof(aabb_data[0]);
-      geomDescNoMotion.intersectionFunctionTableOffset = 1;
-
-      /* Force a single any-hit call, so shadow record-all behavior works correctly */
-      /* (Match optix behavior: unsigned int build_flags =
-       * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
-      geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation = false;
-      geomDescNoMotion.opaque = true;
-      geomDesc = geomDescNoMotion;
-    }
-
-    MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
-        [MTLPrimitiveAccelerationStructureDescriptor descriptor];
-    accelDesc.geometryDescriptors = @[ geomDesc ];
-
-    if (motion_blur) {
-      accelDesc.motionStartTime = 0.0f;
-      accelDesc.motionEndTime = 1.0f;
-      accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
-      accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
-      accelDesc.motionKeyframeCount = num_motion_steps;
-    }
-
-    if (!use_fast_trace_bvh) {
-      accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
-                          MTLAccelerationStructureUsagePreferFastBuild);
-    }
-
-    MTLAccelerationStructureSizes accelSizes = [device
-        accelerationStructureSizesWithDescriptor:accelDesc];
-    id<MTLAccelerationStructure> accel_uncompressed = [device
-        newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
-    id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
-                                                   options:MTLResourceStorageModePrivate];
-    id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared];
-    id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
-    id<MTLAccelerationStructureCommandEncoder> accelEnc =
-        [accelCommands accelerationStructureCommandEncoder];
-    if (refit) {
-      [accelEnc refitAccelerationStructure:accel_struct
-                                descriptor:accelDesc
-                               destination:accel_uncompressed
-                             scratchBuffer:scratchBuf
-                       scratchBufferOffset:0];
-    }
-    else {
-      [accelEnc buildAccelerationStructure:accel_uncompressed
-                                descriptor:accelDesc
-                             scratchBuffer:scratchBuf
-                       scratchBufferOffset:0];
-    }
-    if (use_fast_trace_bvh) {
-      [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
-                                               toBuffer:sizeBuf
-                                                 offset:0
-                                           sizeDataType:MTLDataTypeULong];
-    }
-    [accelEnc endEncoding];
-    [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
-      /* free temp resources */
-      [scratchBuf release];
-      [aabbBuf release];
-
-      if (use_fast_trace_bvh) {
-        /* Compact the accel structure */
-        uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
-
-        dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
-          id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
-          id<MTLAccelerationStructureCommandEncoder> accelEnc =
-              [accelCommands accelerationStructureCommandEncoder];
-          id<MTLAccelerationStructure> accel = [device
-              newAccelerationStructureWithSize:compressed_size];
-          [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
-                                toAccelerationStructure:accel];
-          [accelEnc endEncoding];
-          [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
-            uint64_t allocated_size = [accel allocatedSize];
-            stats.mem_alloc(allocated_size);
-            accel_struct = accel;
-            [accel_uncompressed release];
-            accel_struct_building = false;
-          }];
-          [accelCommands commit];
-        });
-      }
-      else {
-        /* set our acceleration structure to the uncompressed structure */
-        accel_struct = accel_uncompressed;
-
-        uint64_t allocated_size = [accel_struct allocatedSize];
-        stats.mem_alloc(allocated_size);
-        accel_struct_building = false;
-      }
-      [sizeBuf release];
-    }];
-
-    accel_struct_building = true;
-    [accelCommands commit];
-    return true;
-  }
-  return false;
-}
-
-bool BVHMetal::build_BLAS(Progress &progress,
-                          id<MTLDevice> device,
-                          id<MTLCommandQueue> queue,
-                          bool refit)
-{
-  if (@available(macos 12.0, *)) {
-    assert(objects.size() == 1 && geometry.size() == 1);
-
-    /* Build bottom level acceleration structures (BLAS) */
-    Geometry *const geom = geometry[0];
-    switch (geom->geometry_type) {
-      case Geometry::VOLUME:
-      case Geometry::MESH:
-        return build_BLAS_mesh(progress, device, queue, geom, refit);
-      case Geometry::HAIR:
-        return build_BLAS_hair(progress, device, queue, geom, refit);
-      default:
-        return false;
-    }
-  }
-  return false;
-}
-
-bool BVHMetal::build_TLAS(Progress &progress,
-                          id<MTLDevice> device,
-                          id<MTLCommandQueue> queue,
-                          bool refit)
-{
-  if (@available(macos 12.0, *)) {
-
-    /* we need to sync here and ensure that all BLAS have completed async generation by both GCD
-     * and Metal */
-    {
-      __block bool complete_bvh = false;
-      while (!complete_bvh) {
-        dispatch_sync(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
-          complete_bvh = true;
-          for (Object *ob : objects) {
-            /* Skip non-traceable objects */
-            if (!ob->is_traceable())
-              continue;
-
-            Geometry const *geom = ob->get_geometry();
-            BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh);
-            if (blas->accel_struct_building) {
-              complete_bvh = false;
-
-              /* We're likely waiting on a command buffer that's in flight to complete.
-               * Queue up a command buffer and wait for it complete before checking the BLAS again
-               */
-              id<MTLCommandBuffer> command_buffer = [queue commandBuffer];
-              [command_buffer commit];
-              [command_buffer waitUntilCompleted];
-              break;
-            }
-          }
-        });
-      }
-    }
-
-    uint32_t num_instances = 0;
-    uint32_t num_motion_transforms = 0;
-    for (Object *ob : objects) {
-      /* Skip non-traceable objects */
-      if (!ob->is_traceable())
-        continue;
-      num_instances++;
-
-      if (ob->use_motion()) {
-        num_motion_transforms += max(1, ob->get_motion().size());
-      }
-      else {
-        num_motion_transforms++;
-      }
-    }
-
-    /*------------------------------------------------*/
-    BVH_status("Building TLAS      | %7d instances", (int)num_instances);
-    /*------------------------------------------------*/
-
-    const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
-
-    NSMutableArray *all_blas = [NSMutableArray array];
-    unordered_map<BVHMetal const *, int> instance_mapping;
-
-    /* Lambda function to build/retrieve the BLAS index mapping */
-    auto get_blas_index = [&](BVHMetal const *blas) {
-      auto it = instance_mapping.find(blas);
-      if (it != instance_mapping.end()) {
-        return it->second;
-      }
-      else {
-        int blas_index = (int)[all_blas count];
-        instance_mapping[blas] = blas_index;
-        if (@available(macos 12.0, *)) {
-          [all_blas addObject:blas->accel_struct];
-        }
-        return blas_index;
-      }
-    };
-
-    MTLResourceOptions storage_mode;
-    if (device.hasUnifiedMemory) {
-      storage_mode = MTLResourceStorageModeShared;
-    }
-    else {
-      storage_mode = MTLResourceStorageModeManaged;
-    }
-
-    size_t instance_size;
-    if (motion_blur) {
-      instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
-    }
-    else {
-      instance_size = sizeof(MTLAccelerationStructureUserIDInstanceDescriptor);
-    }
-
-    /* Allocate a GPU buffer for the instance data and populate it */
-    id<MTLBuffer> instanceBuf = [device newBufferWithLength:num_instances * instance_size
-                                                    options:storage_mode];
-    id<MTLBuffer> motion_transforms_buf = nil;
-    MTLPackedFloat4x3 *motion_transforms = nullptr;
-    if (motion_blur && num_motion_transforms) {
-      motion_transforms_buf = [device
-          newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3)
-                      options:storage_mode];
-      motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
-    }
-
-    uint32_t instance_index = 0;
-    uint32_t motion_transform_index = 0;
-    for (Object *ob : objects) {
-      /* Skip non-traceable objects */
-      if (!ob->is_traceable())
-        continue;
-
-      Geometry const *geom = ob->get_geometry();
-
-      BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh);
-      uint32_t accel_struct_index = get_blas_index(blas);
-
-      /* Add some of the object visibility bits to the mask.
-       * __prim_visibility contains the combined visibility bits of all instances, so is not
-       * reliable if they differ between instances.
-       *
-       * METAL_WIP: OptiX visibility mask can only contain 8 bits, so have to trade-off here
-       * and select just a few important ones.
-       */
-      uint32_t mask = ob->visibility_for_tracing() & 0xFF;
-
-      /* Have to have at least one bit in the mask, or else instance would always be culled. */
-      if (0 == mask) {
-        mask = 0xFF;
-      }
-
-      /* Set user instance ID to object index */
-      int object_index = ob->get_device_index();
-      uint32_t user_id = uint32_t(object_index);
-
-      /* Bake into the appropriate descriptor */
-      if (motion_blur) {
-        MTLAccelerationStructureMotionInstanceDescriptor *instances =
-            (MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
-        MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[instance_index++];
-
-        desc.accelerationStructureIndex = accel_struct_index;
-        desc.userID = user_id;
-        desc.mask = mask;
-        desc.motionStartTime = 0.0f;
-        desc.motionEndTime = 1.0f;
-        desc.motionTransformsStartIndex = motion_transform_index;
-        desc.motionStartBorderMode = MTLMotionBorderModeVanish;
-        desc.motionEndBorderMode = MTLMotionBorderModeVanish;
-        desc.intersectionFunctionTableOffset = 0;
-
-        int key_count = ob->get_motion().size();
-        if (key_count) {
-          desc.motionTransformsCount = key_count;
-
-          Transform *keys = ob->get_motion().data();
-          for (int i = 0; i < key_count; i++) {
-            float *t = (float *)&motion_transforms[motion_transform_index++];
-            /* Transpose transform */
-            auto src = (float const *)&keys[i];
-            for (int i = 0; i < 12; i++) {
-              t[i] = src[(i / 3) + 4 * (i % 3)];
-            }
-          }
-        }
-        else {
-          desc.motionTransformsCount = 1;
-
-          float *t = (float *)&motion_transforms[motion_transform_index++];
-          if (ob->get_geometry()->is_instanced()) {
-            /* Transpose transform */
-            auto src = (float const *)&ob->get_tfm();
-            for (int i = 0; i < 12; i++) {
-              t[i] = src[(i / 3) + 4 * (i % 3)];
-            }
-          }
-          else {
-            /* Clear transform to identity matrix */
-            t[0] = t[4] = t[8] = 1.0f;
-          }
-        }
-      }
-      else {
-        MTLAccelerationStructureUserIDInstanceDescriptor *instances =
-            (MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
-        MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[instance_index++];
-
-        desc.accelerationStructureIndex = accel_struct_index;
-        desc.userID = user_id;
-        desc.mask = mask;
-        desc.intersectionFunctionTableOffset = 0;
-
-        float *t = (float *)&desc.transformationMatrix;
-        if (ob->get_geometry()->is_instanced()) {
-          /* Transpose transform */
-          auto src = (float const *)&ob->get_tfm();
-          for (int i = 0; i < 12; i++) {
-            t[i] = src[(i / 3) + 4 * (i % 3)];
-          }
-        }
-        else {
-          /* Clear transform to identity matrix */
-          t[0] = t[4] = t[8] = 1.0f;
-        }
-      }
-    }
-
-    if (storage_mode == MTLResourceStorageModeManaged) {
-      [instanceBuf didModifyRange:NSMakeRange(0, instanceBuf.length)];
-      if (motion_transforms_buf) {
-        [motion_transforms_buf didModifyRange:NSMakeRange(0, motion_transforms_buf.length)];
-        assert(num_motion_transforms == motion_transform_index);
-      }
-    }
-
-    MTLInstanceAccelerationStructureDescriptor *accelDesc =
-        [MTLInstanceAccelerationStructureDescriptor descriptor];
-    accelDesc.instanceCount = num_instances;
-    accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID;
-    accelDesc.instanceDescriptorBuffer = instanceBuf;
-    accelDesc.instanceDescriptorBufferOffset = 0;
-    accelDesc.instanceDescriptorStride = instance_size;
-    accelDesc.instancedAccelerationStructures = all_blas;
-
-    if (motion_blur) {
-      accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion;
-      accelDesc.motionTransformBuffer = motion_transforms_buf;
-      accelDesc.motionTransformCount = num_motion_transforms;
-    }
-
-    if (!use_fast_trace_bvh) {
-      accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
-                          MTLAccelerationStructureUsagePreferFastBuild);
-    }
-
-    MTLAccelerationStructureSizes accelSizes = [device
-        accelerationStructureSizesWithDescriptor:accelDesc];
-    id<MTLAccelerationStructure> accel = [device
-        newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
-    id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
-                                                   options:MTLResourceStorageModePrivate];
-    id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
-    id<MTLAccelerationStructureCommandEncoder> accelEnc =
-        [accelCommands accelerationStructureCommandEncoder];
-    if (refit) {
-      [accelEnc refitAccelerationStructure:accel_struct
-                                descriptor:accelDesc
-                               destination:accel
-                             scratchBuffer:scratchBuf
-                       scratchBufferOffset:0];
-    }
-    else {
-      [accelEnc buildAccelerationStructure:accel
-                                descriptor:accelDesc
-                             scratchBuffer:scratchBuf
-                       scratchBufferOffset:0];
-    }
-    [accelEnc endEncoding];
-    [accelCommands commit];
-    [accelCommands waitUntilCompleted];
-
-    if (motion_transforms_buf) {
-      [motion_transforms_buf release];
-    }
-    [instanceBuf release];
-    [scratchBuf release];
-
-    uint64_t allocated_size = [accel allocatedSize];
-    stats.mem_alloc(allocated_size);
-
-    /* Cache top and bottom-level acceleration structs */
-    accel_struct = accel;
-    blas_array.clear();
-    blas_array.reserve(all_blas.count);
-    for (id<MTLAccelerationStructure> blas in all_blas) {
-      blas_array.push_back(blas);
-    }
-
-    return true;
-  }
-  return false;
-}
-
-bool BVHMetal::build(Progress &progress,
-                     id<MTLDevice> device,
-                     id<MTLCommandQueue> queue,
-                     bool refit)
-{
-  if (@available(macos 12.0, *)) {
-    if (refit && params.bvh_type != BVH_TYPE_STATIC) {
-      assert(accel_struct);
-    }
-    else {
-      if (accel_struct) {
-        stats.mem_free(accel_struct.allocatedSize);
-        [accel_struct release];
-        accel_struct = nil;
-      }
-    }
-  }
-
-  if (!params.top_level) {
-    return build_BLAS(progress, device, queue, refit);
-  }
-  else {
-    return build_TLAS(progress, device, queue, refit);
-  }
-}
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_METAL */
--- a/intern/cycles/device/metal/device.h
+++ b/intern/cycles/device/metal/device.h
@@ -1,37 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "util/string.h"
-#include "util/vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class Device;
-class DeviceInfo;
-class Profiler;
-class Stats;
-
-bool device_metal_init();
-
-Device *device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
-
-void device_metal_info(vector<DeviceInfo> &devices);
-
-string device_metal_capabilities();
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/metal/device.mm
+++ b/intern/cycles/device/metal/device.mm
@@ -1,136 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_METAL
-
-#  include "device/metal/device.h"
-#  include "device/metal/device_impl.h"
-
-#endif
-
-#include "util/debug.h"
-#include "util/set.h"
-#include "util/system.h"
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef WITH_METAL
-
-Device *device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
-{
-  return new MetalDevice(info, stats, profiler);
-}
-
-bool device_metal_init()
-{
-  return true;
-}
-
-static int device_metal_get_num_devices_safe(uint32_t *num_devices)
-{
-  *num_devices = MTLCopyAllDevices().count;
-  return 0;
-}
-
-void device_metal_info(vector<DeviceInfo> &devices)
-{
-  uint32_t num_devices = 0;
-  device_metal_get_num_devices_safe(&num_devices);
-  if (num_devices == 0) {
-    return;
-  }
-
-  vector<MetalPlatformDevice> usable_devices;
-  MetalInfo::get_usable_devices(&usable_devices);
-  /* Devices are numbered consecutively across platforms. */
-  set<string> unique_ids;
-  int device_index = 0;
-  for (MetalPlatformDevice &device : usable_devices) {
-    /* Compute unique ID for persistent user preferences. */
-    const string &device_name = device.device_name;
-    string id = string("METAL_") + device_name;
-
-    /* Hardware ID might not be unique, add device number in that case. */
-    if (unique_ids.find(id) != unique_ids.end()) {
-      id += string_printf("_ID_%d", num_devices);
-    }
-    unique_ids.insert(id);
-
-    /* Create DeviceInfo. */
-    DeviceInfo info;
-    info.type = DEVICE_METAL;
-    info.description = string_remove_trademark(string(device_name));
-
-    /* Ensure unique naming on Apple Silicon / SoC devices which return the same string for CPU and
-     * GPU */
-    if (info.description == system_cpu_brand_string()) {
-      info.description += " (GPU)";
-    }
-
-    info.num = device_index;
-    /* We don't know if it's used for display, but assume it is. */
-    info.display_device = true;
-    info.denoisers = DENOISER_NONE;
-    info.id = id;
-
-    devices.push_back(info);
-    device_index++;
-  }
-}
-
-string device_metal_capabilities()
-{
-  string result = "";
-  string error_msg = "";
-  uint32_t num_devices = 0;
-  assert(device_metal_get_num_devices_safe(&num_devices));
-  if (num_devices == 0) {
-    return "No Metal devices found\n";
-  }
-  result += string_printf("Number of devices: %u\n", num_devices);
-
-  NSArray<id<MTLDevice>> *allDevices = MTLCopyAllDevices();
-  for (id<MTLDevice> device in allDevices) {
-    result += string_printf("\t\tDevice: %s\n", [device.name UTF8String]);
-  }
-
-  return result;
-}
-
-#else
-
-Device *device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
-{
-  return nullptr;
-}
-
-bool device_metal_init()
-{
-  return false;
-}
-
-void device_metal_info(vector<DeviceInfo> &devices)
-{
-}
-
-string device_metal_capabilities()
-{
-  return "";
-}
-
-#endif
-
-CCL_NAMESPACE_END
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -1,166 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef WITH_METAL
-
-#  include "bvh/bvh.h"
-#  include "device/device.h"
-#  include "device/metal/bvh.h"
-#  include "device/metal/device.h"
-#  include "device/metal/kernel.h"
-#  include "device/metal/queue.h"
-#  include "device/metal/util.h"
-
-#  include <Metal/Metal.h>
-
-CCL_NAMESPACE_BEGIN
-
-class DeviceQueue;
-
-class MetalDevice : public Device {
- public:
-  id<MTLDevice> mtlDevice = nil;
-  id<MTLLibrary> mtlLibrary[PSO_NUM] = {nil};
-  id<MTLArgumentEncoder> mtlBufferKernelParamsEncoder =
-      nil; /* encoder used for fetching device pointers from MTLBuffers */
-  id<MTLCommandQueue> mtlGeneralCommandQueue = nil;
-  id<MTLArgumentEncoder> mtlAncillaryArgEncoder =
-      nil; /* encoder used for fetching device pointers from MTLBuffers */
-  string source_used_for_compile[PSO_NUM];
-
-  KernelParamsMetal launch_params = {0};
-
-  /* MetalRT members ----------------------------------*/
-  BVHMetal *bvhMetalRT = nullptr;
-  bool motion_blur = false;
-  id<MTLArgumentEncoder> mtlASArgEncoder =
-      nil; /* encoder used for fetching device pointers from MTLAccelerationStructure */
-  /*---------------------------------------------------*/
-
-  string device_name;
-  MetalGPUVendor device_vendor;
-
-  uint kernel_features;
-  MTLResourceOptions default_storage_mode;
-  int max_threads_per_threadgroup;
-
-  int mtlDevId = 0;
-  bool first_error = true;
-
-  struct MetalMem {
-    device_memory *mem = nullptr;
-    int pointer_index = -1;
-    id<MTLBuffer> mtlBuffer = nil;
-    id<MTLTexture> mtlTexture = nil;
-    uint64_t offset = 0;
-    uint64_t size = 0;
-    void *hostPtr = nullptr;
-    bool use_UMA = false; /* If true, UMA memory in shared_pointer is being used. */
-  };
-  typedef map<device_memory *, unique_ptr<MetalMem>> MetalMemMap;
-  MetalMemMap metal_mem_map;
-  std::vector<id<MTLResource>> delayed_free_list;
-  std::recursive_mutex metal_mem_map_mutex;
-
-  /* Bindless Textures */
-  device_vector<TextureInfo> texture_info;
-  bool need_texture_info;
-  id<MTLArgumentEncoder> mtlTextureArgEncoder = nil;
-  id<MTLBuffer> texture_bindings_2d = nil;
-  id<MTLBuffer> texture_bindings_3d = nil;
-  std::vector<id<MTLTexture>> texture_slot_map;
-
-  MetalDeviceKernels kernels;
-  bool use_metalrt = false;
-  bool use_function_specialisation = false;
-
-  virtual BVHLayoutMask get_bvh_layout_mask() const override;
-
-  void set_error(const string &error) override;
-
-  MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
-
-  virtual ~MetalDevice();
-
-  bool support_device(const uint /*kernel_features*/);
-
-  bool check_peer_access(Device *peer_device) override;
-
-  bool use_adaptive_compilation();
-
-  string get_source(const uint kernel_features);
-
-  string compile_kernel(const uint kernel_features, const char *name);
-
-  virtual bool load_kernels(const uint kernel_features) override;
-
-  void reserve_local_memory(const uint kernel_features);
-
-  void init_host_memory();
-
-  void load_texture_info();
-
-  virtual bool should_use_graphics_interop() override;
-
-  virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
-
-  virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
-
-  /* ------------------------------------------------------------------ */
-  /* low-level memory management */
-
-  MetalMem *generic_alloc(device_memory &mem);
-
-  void generic_copy_to(device_memory &mem);
-
-  void generic_free(device_memory &mem);
-
-  void mem_alloc(device_memory &mem) override;
-
-  void mem_copy_to(device_memory &mem) override;
-
-  void mem_copy_from(device_memory &mem)
-  {
-    mem_copy_from(mem, -1, -1, -1, -1);
-  }
-  void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
-
-  void mem_zero(device_memory &mem) override;
-
-  void mem_free(device_memory &mem) override;
-
-  device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
-
-  virtual void const_copy_to(const char *name, void *host, size_t size) override;
-
-  void global_alloc(device_memory &mem);
-
-  void global_free(device_memory &mem);
-
-  void tex_alloc(device_texture &mem);
-
-  void tex_alloc_as_buffer(device_texture &mem);
-
-  void tex_free(device_texture &mem);
-
-  void flush_delayed_free_list();
-};
-
-CCL_NAMESPACE_END
-
-#endif
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
--- a/intern/cycles/device/metal/kernel.h
+++ b/intern/cycles/device/metal/kernel.h
@@ -1,168 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef WITH_METAL
-
-#  include "device/kernel.h"
-#  include <Metal/Metal.h>
-
-CCL_NAMESPACE_BEGIN
-
-class MetalDevice;
-
-enum {
-  METALRT_FUNC_DEFAULT_TRI,
-  METALRT_FUNC_DEFAULT_BOX,
-  METALRT_FUNC_SHADOW_TRI,
-  METALRT_FUNC_SHADOW_BOX,
-  METALRT_FUNC_LOCAL_TRI,
-  METALRT_FUNC_LOCAL_BOX,
-  METALRT_FUNC_CURVE_RIBBON,
-  METALRT_FUNC_CURVE_RIBBON_SHADOW,
-  METALRT_FUNC_CURVE_ALL,
-  METALRT_FUNC_CURVE_ALL_SHADOW,
-  METALRT_FUNC_NUM
-};
-
-enum { METALRT_TABLE_DEFAULT, METALRT_TABLE_SHADOW, METALRT_TABLE_LOCAL, METALRT_TABLE_NUM };
-
-/* Pipeline State Object types */
-enum {
-  /* A kernel that can be used with all scenes, supporting all features.
-   * It is slow to compile, but only needs to be compiled once and is then
-   * cached for future render sessions. This allows a render to get underway
-   * on the GPU quickly.
-   */
-  PSO_GENERIC,
-
-  /* A kernel that is relatively quick to compile, but is specialized for the
-   * scene being rendered. It only contains the functionality and even baked in
-   * constants for values that means it needs to be recompiled whenever a
-   * dependent setting is changed. The render performance of this kernel is
-   * significantly faster though, and justifies the extra compile time.
-   */
-  /* METAL_WIP: This isn't used and will require more changes to enable. */
-  PSO_SPECIALISED,
-
-  PSO_NUM
-};
-
-const char *kernel_type_as_string(int kernel_type);
-
-struct MetalKernelPipeline {
-  void release()
-  {
-    if (pipeline) {
-      [pipeline release];
-      pipeline = nil;
-      if (@available(macOS 11.0, *)) {
-        for (int i = 0; i < METALRT_TABLE_NUM; i++) {
-          if (intersection_func_table[i]) {
-            [intersection_func_table[i] release];
-            intersection_func_table[i] = nil;
-          }
-        }
-      }
-    }
-    if (function) {
-      [function release];
-      function = nil;
-    }
-    if (@available(macOS 11.0, *)) {
-      for (int i = 0; i < METALRT_TABLE_NUM; i++) {
-        if (intersection_func_table[i]) {
-          [intersection_func_table[i] release];
-        }
-      }
-    }
-  }
-
-  bool loaded = false;
-  id<MTLFunction> function = nil;
-  id<MTLComputePipelineState> pipeline = nil;
-
-  API_AVAILABLE(macos(11.0))
-  id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
-};
-
-struct MetalKernelLoadDesc {
-  int pso_index = 0;
-  const char *function_name = nullptr;
-  int kernel_index = 0;
-  int threads_per_threadgroup = 0;
-  MTLFunctionConstantValues *constant_values = nullptr;
-  NSArray *linked_functions = nullptr;
-
-  struct IntersectorFunctions {
-    NSArray *defaults;
-    NSArray *shadow;
-    NSArray *local;
-    NSArray *operator[](int index) const
-    {
-      if (index == METALRT_TABLE_DEFAULT)
-        return defaults;
-      if (index == METALRT_TABLE_SHADOW)
-        return shadow;
-      return local;
-    }
-  } intersector_functions = {nullptr};
-};
-
-/* Metal kernel and associate occupancy information. */
-class MetalDeviceKernel {
- public:
-  ~MetalDeviceKernel();
-
-  bool load(MetalDevice *device, MetalKernelLoadDesc const &desc, class MD5Hash const &md5);
-
-  void mark_loaded(int pso_index)
-  {
-    pso[pso_index].loaded = true;
-  }
-
-  int get_num_threads_per_block() const
-  {
-    return num_threads_per_block;
-  }
-  const MetalKernelPipeline &get_pso() const;
-
-  double load_duration = 0.0;
-
- private:
-  MetalKernelPipeline pso[PSO_NUM];
-
-  int num_threads_per_block = 0;
-};
-
-/* Cache of Metal kernels for each DeviceKernel. */
-class MetalDeviceKernels {
- public:
-  bool load(MetalDevice *device, int kernel_type);
-  bool available(DeviceKernel kernel) const;
-  const MetalDeviceKernel &get(DeviceKernel kernel) const;
-
-  MetalDeviceKernel kernels_[DEVICE_KERNEL_NUM];
-
-  id<MTLFunction> rt_intersection_funcs[PSO_NUM][METALRT_FUNC_NUM] = {{nil}};
-
-  string loaded_md5[PSO_NUM];
-};
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_METAL */
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -1,525 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_METAL
-
-#  include "device/metal/kernel.h"
-#  include "device/metal/device_impl.h"
-#  include "util/md5.h"
-#  include "util/path.h"
-#  include "util/tbb.h"
-#  include "util/time.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* limit to 2 MTLCompiler instances */
-int max_mtlcompiler_threads = 2;
-
-const char *kernel_type_as_string(int kernel_type)
-{
-  switch (kernel_type) {
-    case PSO_GENERIC:
-      return "PSO_GENERIC";
-    case PSO_SPECIALISED:
-      return "PSO_SPECIALISED";
-    default:
-      assert(0);
-  }
-  return "";
-}
-
-MetalDeviceKernel::~MetalDeviceKernel()
-{
-  for (int i = 0; i < PSO_NUM; i++) {
-    pso[i].release();
-  }
-}
-
-bool MetalDeviceKernel::load(MetalDevice *device,
-                             MetalKernelLoadDesc const &desc_in,
-                             MD5Hash const &md5)
-{
-  __block MetalKernelLoadDesc const desc(desc_in);
-  if (desc.kernel_index == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
-    /* skip megakernel */
-    return true;
-  }
-
-  bool use_binary_archive = true;
-  if (getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
-    use_binary_archive = false;
-  }
-
-  id<MTLBinaryArchive> archive = nil;
-  string metalbin_path;
-  if (use_binary_archive) {
-    NSProcessInfo *processInfo = [NSProcessInfo processInfo];
-    string osVersion = [[processInfo operatingSystemVersionString] UTF8String];
-    MD5Hash local_md5(md5);
-    local_md5.append(osVersion);
-    string metalbin_name = string(desc.function_name) + "." + local_md5.get_hex() +
-                           to_string(desc.pso_index) + ".bin";
-    metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
-    path_create_directories(metalbin_path);
-
-    if (path_exists(metalbin_path) && use_binary_archive) {
-      if (@available(macOS 11.0, *)) {
-        MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
-        archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
-        archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
-        [archiveDesc release];
-      }
-    }
-  }
-
-  NSString *entryPoint = [@(desc.function_name) copy];
-
-  NSError *error = NULL;
-  if (@available(macOS 11.0, *)) {
-    MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
-    func_desc.name = entryPoint;
-    if (desc.constant_values) {
-      func_desc.constantValues = desc.constant_values;
-    }
-    pso[desc.pso_index].function = [device->mtlLibrary[desc.pso_index]
-        newFunctionWithDescriptor:func_desc
-                            error:&error];
-  }
-  [entryPoint release];
-
-  if (pso[desc.pso_index].function == nil) {
-    NSString *err = [error localizedDescription];
-    string errors = [err UTF8String];
-
-    device->set_error(
-        string_printf("Error getting function \"%s\": %s", desc.function_name, errors.c_str()));
-    return false;
-  }
-
-  pso[desc.pso_index].function.label = [@(desc.function_name) copy];
-
-  __block MTLComputePipelineDescriptor *computePipelineStateDescriptor =
-      [[MTLComputePipelineDescriptor alloc] init];
-
-  computePipelineStateDescriptor.buffers[0].mutability = MTLMutabilityImmutable;
-  computePipelineStateDescriptor.buffers[1].mutability = MTLMutabilityImmutable;
-  computePipelineStateDescriptor.buffers[2].mutability = MTLMutabilityImmutable;
-
-  if (@available(macos 10.14, *)) {
-    computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = desc.threads_per_threadgroup;
-  }
-  computePipelineStateDescriptor.threadGroupSizeIsMultipleOfThreadExecutionWidth = true;
-
-  computePipelineStateDescriptor.computeFunction = pso[desc.pso_index].function;
-  if (@available(macOS 11.0, *)) {
-    /* Attach the additional functions to an MTLLinkedFunctions object */
-    if (desc.linked_functions) {
-      computePipelineStateDescriptor.linkedFunctions = [[MTLLinkedFunctions alloc] init];
-      computePipelineStateDescriptor.linkedFunctions.functions = desc.linked_functions;
-    }
-
-    computePipelineStateDescriptor.maxCallStackDepth = 1;
-  }
-
-  /* Create a new Compute pipeline state object */
-  MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;
-
-  bool creating_new_archive = false;
-  if (@available(macOS 11.0, *)) {
-    if (use_binary_archive) {
-      if (!archive) {
-        MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
-        archiveDesc.url = nil;
-        archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
-        creating_new_archive = true;
-
-        double starttime = time_dt();
-
-        if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
-                                                          error:&error]) {
-          NSString *errStr = [error localizedDescription];
-          metal_printf("Failed to add PSO to archive:\n%s\n",
-                       errStr ? [errStr UTF8String] : "nil");
-        }
-        else {
-          double duration = time_dt() - starttime;
-          metal_printf("%2d | %-55s | %7.2fs\n",
-                       desc.kernel_index,
-                       device_kernel_as_string((DeviceKernel)desc.kernel_index),
-                       duration);
-
-          if (desc.pso_index == PSO_GENERIC) {
-            this->load_duration = duration;
-          }
-        }
-      }
-      computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil];
-      pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss;
-    }
-  }
-
-  double starttime = time_dt();
-
-  MTLNewComputePipelineStateWithReflectionCompletionHandler completionHandler = ^(
-      id<MTLComputePipelineState> computePipelineState,
-      MTLComputePipelineReflection *reflection,
-      NSError *error) {
-    bool recreate_archive = false;
-    if (computePipelineState == nil && archive && !creating_new_archive) {
-
-      assert(0);
-
-      NSString *errStr = [error localizedDescription];
-      metal_printf(
-          "Failed to create compute pipeline state \"%s\" from archive - attempting recreation... "
-          "(error: %s)\n",
-          device_kernel_as_string((DeviceKernel)desc.kernel_index),
-          errStr ? [errStr UTF8String] : "nil");
-      computePipelineState = [device->mtlDevice
-          newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
-                                        options:MTLPipelineOptionNone
-                                     reflection:nullptr
-                                          error:&error];
-      recreate_archive = true;
-    }
-
-    double duration = time_dt() - starttime;
-
-    if (computePipelineState == nil) {
-      NSString *errStr = [error localizedDescription];
-      device->set_error(string_printf("Failed to create compute pipeline state \"%s\", error: \n",
-                                      device_kernel_as_string((DeviceKernel)desc.kernel_index)) +
-                        (errStr ? [errStr UTF8String] : "nil"));
-      metal_printf("%2d | %-55s | %7.2fs | FAILED!\n",
-                   desc.kernel_index,
-                   device_kernel_as_string((DeviceKernel)desc.kernel_index),
-                   duration);
-      return;
-    }
-
-    pso[desc.pso_index].pipeline = computePipelineState;
-    num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup,
-                                       computePipelineState.threadExecutionWidth);
-    num_threads_per_block = std::max(num_threads_per_block,
-                                     (int)computePipelineState.threadExecutionWidth);
-
-    if (!use_binary_archive) {
-      metal_printf("%2d | %-55s | %7.2fs\n",
-                   desc.kernel_index,
-                   device_kernel_as_string((DeviceKernel)desc.kernel_index),
-                   duration);
-
-      if (desc.pso_index == PSO_GENERIC) {
-        this->load_duration = duration;
-      }
-    }
-
-    if (@available(macOS 11.0, *)) {
-      if (creating_new_archive || recreate_archive) {
-        if (![archive serializeToURL:[NSURL fileURLWithPath:@(metalbin_path.c_str())]
-                               error:&error]) {
-          metal_printf("Failed to save binary archive, error:\n%s\n",
-                       [[error localizedDescription] UTF8String]);
-        }
-      }
-    }
-
-    [computePipelineStateDescriptor release];
-    computePipelineStateDescriptor = nil;
-
-    if (device->use_metalrt && desc.linked_functions) {
-      for (int table = 0; table < METALRT_TABLE_NUM; table++) {
-        if (@available(macOS 11.0, *)) {
-          MTLIntersectionFunctionTableDescriptor *ift_desc =
-              [[MTLIntersectionFunctionTableDescriptor alloc] init];
-          ift_desc.functionCount = desc.intersector_functions[table].count;
-
-          pso[desc.pso_index].intersection_func_table[table] = [pso[desc.pso_index].pipeline
-              newIntersectionFunctionTableWithDescriptor:ift_desc];
-
-          /* Finally write the function handles into this pipeline's table */
-          for (int i = 0; i < 2; i++) {
-            id<MTLFunctionHandle> handle = [pso[desc.pso_index].pipeline
-                functionHandleWithFunction:desc.intersector_functions[table][i]];
-            [pso[desc.pso_index].intersection_func_table[table] setFunction:handle atIndex:i];
-          }
-        }
-      }
-    }
-
-    mark_loaded(desc.pso_index);
-  };
-
-  if (desc.pso_index == PSO_SPECIALISED) {
-    /* Asynchronous load */
-    dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
-      NSError *error;
-      id<MTLComputePipelineState> pipeline = [device->mtlDevice
-          newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
-                                        options:pipelineOptions
-                                     reflection:nullptr
-                                          error:&error];
-      completionHandler(pipeline, nullptr, error);
-    });
-  }
-  else {
-    /* Block on load to ensure we continue with a valid kernel function */
-    id<MTLComputePipelineState> pipeline = [device->mtlDevice
-        newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
-                                      options:pipelineOptions
-                                   reflection:nullptr
-                                        error:&error];
-    completionHandler(pipeline, nullptr, error);
-  }
-
-  return true;
-}
-
-const MetalKernelPipeline &MetalDeviceKernel::get_pso() const
-{
-  if (pso[PSO_SPECIALISED].loaded) {
-    return pso[PSO_SPECIALISED];
-  }
-
-  assert(pso[PSO_GENERIC].loaded);
-  return pso[PSO_GENERIC];
-}
-
-bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type)
-{
-  bool any_error = false;
-
-  MD5Hash md5;
-
-  /* Build the function constant table */
-  MTLFunctionConstantValues *constant_values = nullptr;
-  if (kernel_type == PSO_SPECIALISED) {
-    constant_values = [MTLFunctionConstantValues new];
-
-#  define KERNEL_FILM(_type, name) \
-    [constant_values setConstantValue:&data.film.name \
-                                 type:get_MTLDataType_##_type() \
-                              atIndex:KernelData_film_##name]; \
-    md5.append((uint8_t *)&data.film.name, sizeof(data.film.name));
-
-#  define KERNEL_BACKGROUND(_type, name) \
-    [constant_values setConstantValue:&data.background.name \
-                                 type:get_MTLDataType_##_type() \
-                              atIndex:KernelData_background_##name]; \
-    md5.append((uint8_t *)&data.background.name, sizeof(data.background.name));
-
-#  define KERNEL_INTEGRATOR(_type, name) \
-    [constant_values setConstantValue:&data.integrator.name \
-                                 type:get_MTLDataType_##_type() \
-                              atIndex:KernelData_integrator_##name]; \
-    md5.append((uint8_t *)&data.integrator.name, sizeof(data.integrator.name));
-
-#  define KERNEL_BVH(_type, name) \
-    [constant_values setConstantValue:&data.bvh.name \
-                                 type:get_MTLDataType_##_type() \
-                              atIndex:KernelData_bvh_##name]; \
-    md5.append((uint8_t *)&data.bvh.name, sizeof(data.bvh.name));
-
-    /* METAL_WIP: populate constant_values based on KernelData */
-    assert(0);
-    /*
-        const KernelData &data = device->launch_params.data;
-    #    include "kernel/types/background.h"
-    #    include "kernel/types/bvh.h"
-    #    include "kernel/types/film.h"
-    #    include "kernel/types/integrator.h"
-    */
-  }
-
-  if (device->use_metalrt) {
-    if (@available(macOS 11.0, *)) {
-      /* create the id<MTLFunction> for each intersection function */
-      const char *function_names[] = {
-          "__anyhit__cycles_metalrt_visibility_test_tri",
-          "__anyhit__cycles_metalrt_visibility_test_box",
-          "__anyhit__cycles_metalrt_shadow_all_hit_tri",
-          "__anyhit__cycles_metalrt_shadow_all_hit_box",
-          "__anyhit__cycles_metalrt_local_hit_tri",
-          "__anyhit__cycles_metalrt_local_hit_box",
-          "__intersection__curve_ribbon",
-          "__intersection__curve_ribbon_shadow",
-          "__intersection__curve_all",
-          "__intersection__curve_all_shadow",
-      };
-      assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM);
-
-      MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
-      if (kernel_type == PSO_SPECIALISED) {
-        desc.constantValues = constant_values;
-      }
-      for (int i = 0; i < METALRT_FUNC_NUM; i++) {
-        const char *function_name = function_names[i];
-        desc.name = [@(function_name) copy];
-
-        NSError *error = NULL;
-        rt_intersection_funcs[kernel_type][i] = [device->mtlLibrary[kernel_type]
-            newFunctionWithDescriptor:desc
-                                error:&error];
-
-        if (rt_intersection_funcs[kernel_type][i] == nil) {
-          NSString *err = [error localizedDescription];
-          string errors = [err UTF8String];
-
-          device->set_error(string_printf(
-              "Error getting intersection function \"%s\": %s", function_name, errors.c_str()));
-          any_error = true;
-          break;
-        }
-
-        rt_intersection_funcs[kernel_type][i].label = [@(function_name) copy];
-      }
-    }
-  }
-  md5.append(device->source_used_for_compile[kernel_type]);
-
-  string hash = md5.get_hex();
-  if (loaded_md5[kernel_type] == hash) {
-    return true;
-  }
-
-  if (!any_error) {
-    NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
-    NSArray *function_list = nil;
-
-    if (device->use_metalrt) {
-      id<MTLFunction> box_intersect_default = nil;
-      id<MTLFunction> box_intersect_shadow = nil;
-      if (device->kernel_features & KERNEL_FEATURE_HAIR) {
-        /* Add curve intersection programs. */
-        if (device->kernel_features & KERNEL_FEATURE_HAIR_THICK) {
-          /* Slower programs for thick hair since that also slows down ribbons.
-           * Ideally this should not be needed. */
-          box_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL];
-          box_intersect_shadow = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL_SHADOW];
-        }
-        else {
-          box_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON];
-          box_intersect_shadow =
-              rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON_SHADOW];
-        }
-      }
-      table_functions[METALRT_TABLE_DEFAULT] = [NSArray
-          arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_TRI],
-                           box_intersect_default ?
-                               box_intersect_default :
-                               rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX],
-                           nil];
-      table_functions[METALRT_TABLE_SHADOW] = [NSArray
-          arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_TRI],
-                           box_intersect_shadow ?
-                               box_intersect_shadow :
-                               rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX],
-                           nil];
-      table_functions[METALRT_TABLE_LOCAL] = [NSArray
-          arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_TRI],
-                           rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX],
-                           nil];
-
-      NSMutableSet *unique_functions = [NSMutableSet
-          setWithArray:table_functions[METALRT_TABLE_DEFAULT]];
-      [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]];
-      [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]];
-
-      function_list = [[NSArray arrayWithArray:[unique_functions allObjects]]
-          sortedArrayUsingComparator:^NSComparisonResult(id<MTLFunction> f1, id<MTLFunction> f2) {
-            return [f1.label compare:f2.label];
-          }];
-
-      unique_functions = nil;
-    }
-
-    metal_printf("Starting %s \"cycles_metal_...\" pipeline builds\n",
-                 kernel_type_as_string(kernel_type));
-
-    tbb::task_arena local_arena(max_mtlcompiler_threads);
-    local_arena.execute([&]() {
-      tbb::parallel_for(int(0), int(DEVICE_KERNEL_NUM), [&](int i) {
-        /* skip megakernel */
-        if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
-          return;
-        }
-
-        /* Only specialize kernels where it can make an impact. */
-        if (kernel_type == PSO_SPECIALISED) {
-          if (i < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
-              i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
-            return;
-          }
-        }
-
-        MetalDeviceKernel &kernel = kernels_[i];
-
-        const std::string function_name = std::string("cycles_metal_") +
-                                          device_kernel_as_string((DeviceKernel)i);
-        int threads_per_threadgroup = device->max_threads_per_threadgroup;
-        if (i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL && i < DEVICE_KERNEL_INTEGRATOR_RESET) {
-          /* Always use 512 for the sorting kernels */
-          threads_per_threadgroup = 512;
-        }
-
-        NSArray *kernel_function_list = nil;
-
-        if (i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
-            i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
-            i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
-            i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
-            i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
-          kernel_function_list = function_list;
-        }
-
-        MetalKernelLoadDesc desc;
-        desc.pso_index = kernel_type;
-        desc.kernel_index = i;
-        desc.linked_functions = kernel_function_list;
-        desc.intersector_functions.defaults = table_functions[METALRT_TABLE_DEFAULT];
-        desc.intersector_functions.shadow = table_functions[METALRT_TABLE_SHADOW];
-        desc.intersector_functions.local = table_functions[METALRT_TABLE_LOCAL];
-        desc.constant_values = constant_values;
-        desc.threads_per_threadgroup = threads_per_threadgroup;
-        desc.function_name = function_name.c_str();
-
-        bool success = kernel.load(device, desc, md5);
-
-        any_error |= !success;
-      });
-    });
-  }
-
-  bool loaded = !any_error;
-  if (loaded) {
-    loaded_md5[kernel_type] = hash;
-  }
-  return loaded;
-}
-
-const MetalDeviceKernel &MetalDeviceKernels::get(DeviceKernel kernel) const
-{
-  return kernels_[(int)kernel];
-}
-
-bool MetalDeviceKernels::available(DeviceKernel kernel) const
-{
-  return kernels_[(int)kernel].get_pso().function != nil;
-}
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_METAL*/
--- a/intern/cycles/device/metal/queue.h
+++ b/intern/cycles/device/metal/queue.h
@@ -1,99 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef WITH_METAL
-
-#  include "device/kernel.h"
-#  include "device/memory.h"
-#  include "device/queue.h"
-
-#  include "device/metal/util.h"
-#  include "kernel/device/metal/globals.h"
-
-#  define metal_printf VLOG(4) << string_printf
-
-CCL_NAMESPACE_BEGIN
-
-class MetalDevice;
-
-/* Base class for Metal queues. */
-class MetalDeviceQueue : public DeviceQueue {
- public:
-  MetalDeviceQueue(MetalDevice *device);
-  ~MetalDeviceQueue();
-
-  virtual int num_concurrent_states(const size_t) const override;
-  virtual int num_concurrent_busy_states() const override;
-
-  virtual void init_execution() override;
-
-  virtual bool enqueue(DeviceKernel kernel,
-                       const int work_size,
-                       DeviceKernelArguments const &args) override;
-
-  virtual bool synchronize() override;
-
-  virtual void zero_to_device(device_memory &mem) override;
-  virtual void copy_to_device(device_memory &mem) override;
-  virtual void copy_from_device(device_memory &mem) override;
-
-  virtual bool kernel_available(DeviceKernel kernel) const override;
-
- protected:
-  void prepare_resources(DeviceKernel kernel);
-
-  id<MTLComputeCommandEncoder> get_compute_encoder(DeviceKernel kernel);
-  id<MTLBlitCommandEncoder> get_blit_encoder();
-
-  MetalDevice *metal_device;
-  MetalBufferPool temp_buffer_pool;
-
-  API_AVAILABLE(macos(11.0), ios(14.0))
-  MTLCommandBufferDescriptor *command_buffer_desc = nullptr;
-  id<MTLDevice> mtlDevice = nil;
-  id<MTLCommandQueue> mtlCommandQueue = nil;
-  id<MTLCommandBuffer> mtlCommandBuffer = nil;
-  id<MTLComputeCommandEncoder> mtlComputeEncoder = nil;
-  id<MTLBlitCommandEncoder> mtlBlitEncoder = nil;
-  API_AVAILABLE(macos(10.14), ios(14.0))
-  id<MTLSharedEvent> shared_event = nil;
-  API_AVAILABLE(macos(10.14), ios(14.0))
-  MTLSharedEventListener *shared_event_listener = nil;
-
-  dispatch_queue_t event_queue;
-  dispatch_semaphore_t wait_semaphore;
-
-  struct CopyBack {
-    void *host_pointer;
-    void *gpu_mem;
-    uint64_t size;
-  };
-  std::vector<CopyBack> copy_back_mem;
-
-  uint64_t shared_event_id;
-  uint64_t command_buffers_submitted = 0;
-  uint64_t command_buffers_completed = 0;
-  Stats &stats;
-
-  void close_compute_encoder();
-  void close_blit_encoder();
-};
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_METAL */
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -1,610 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_METAL
-
-#  include "device/metal/queue.h"
-
-#  include "device/metal/device_impl.h"
-#  include "device/metal/kernel.h"
-
-#  include "util/path.h"
-#  include "util/string.h"
-#  include "util/time.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* MetalDeviceQueue */
-
-MetalDeviceQueue::MetalDeviceQueue(MetalDevice *device)
-    : DeviceQueue(device), metal_device(device), stats(device->stats)
-{
-  if (@available(macos 11.0, *)) {
-    command_buffer_desc = [[MTLCommandBufferDescriptor alloc] init];
-    command_buffer_desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
-  }
-
-  mtlDevice = device->mtlDevice;
-  mtlCommandQueue = [mtlDevice newCommandQueue];
-
-  if (@available(macos 10.14, *)) {
-    shared_event = [mtlDevice newSharedEvent];
-    shared_event_id = 1;
-
-    /* Shareable event listener */
-    event_queue = dispatch_queue_create("com.cycles.metal.event_queue", NULL);
-    shared_event_listener = [[MTLSharedEventListener alloc] initWithDispatchQueue:event_queue];
-  }
-
-  wait_semaphore = dispatch_semaphore_create(0);
-}
-
-MetalDeviceQueue::~MetalDeviceQueue()
-{
-  /* Tidying up here isn't really practical - we should expect and require the work
-   * queue to be empty here. */
-  assert(mtlCommandBuffer == nil);
-  assert(command_buffers_submitted == command_buffers_completed);
-
-  if (@available(macos 10.14, *)) {
-    [shared_event_listener release];
-    [shared_event release];
-  }
-
-  if (@available(macos 11.0, *)) {
-    [command_buffer_desc release];
-  }
-  if (mtlCommandQueue) {
-    [mtlCommandQueue release];
-    mtlCommandQueue = nil;
-  }
-}
-
-int MetalDeviceQueue::num_concurrent_states(const size_t /*state_size*/) const
-{
-  /* METAL_WIP */
-  /* TODO: compute automatically. */
-  /* TODO: must have at least num_threads_per_block. */
-  int result = 1048576;
-  if (metal_device->device_vendor == METAL_GPU_AMD) {
-    result *= 2;
-  }
-  else if (metal_device->device_vendor == METAL_GPU_APPLE) {
-    result *= 4;
-  }
-  return result;
-}
-
-int MetalDeviceQueue::num_concurrent_busy_states() const
-{
-  /* METAL_WIP */
-  /* TODO: compute automatically. */
-  int result = 65536;
-  if (metal_device->device_vendor == METAL_GPU_AMD) {
-    result *= 2;
-  }
-  else if (metal_device->device_vendor == METAL_GPU_APPLE) {
-    result *= 4;
-  }
-  return result;
-}
-
-void MetalDeviceQueue::init_execution()
-{
-  /* Synchronize all textures and memory copies before executing task. */
-  metal_device->load_texture_info();
-
-  synchronize();
-}
-
-bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
-                               const int work_size,
-                               DeviceKernelArguments const &args)
-{
-  if (metal_device->have_error()) {
-    return false;
-  }
-
-  VLOG(3) << "Metal queue launch " << device_kernel_as_string(kernel) << ", work_size "
-          << work_size;
-
-  const MetalDeviceKernel &metal_kernel = metal_device->kernels.get(kernel);
-  const MetalKernelPipeline &metal_kernel_pso = metal_kernel.get_pso();
-
-  id<MTLComputeCommandEncoder> mtlComputeCommandEncoder = get_compute_encoder(kernel);
-
-  /* Determine size requirement for argument buffer. */
-  size_t arg_buffer_length = 0;
-  for (size_t i = 0; i < args.count; i++) {
-    size_t size_in_bytes = args.sizes[i];
-    arg_buffer_length = round_up(arg_buffer_length, size_in_bytes) + size_in_bytes;
-  }
-  /* 256 is the Metal offset alignment for constant address space bindings */
-  arg_buffer_length = round_up(arg_buffer_length, 256);
-
-  /* Globals placed after "vanilla" arguments. */
-  size_t globals_offsets = arg_buffer_length;
-  arg_buffer_length += sizeof(KernelParamsMetal);
-  arg_buffer_length = round_up(arg_buffer_length, 256);
-
-  /* Metal ancillary bindless pointers. */
-  size_t metal_offsets = arg_buffer_length;
-  arg_buffer_length += metal_device->mtlAncillaryArgEncoder.encodedLength;
-  arg_buffer_length = round_up(arg_buffer_length, metal_device->mtlAncillaryArgEncoder.alignment);
-
-  /* Temporary buffer used to prepare arg_buffer */
-  uint8_t *init_arg_buffer = (uint8_t *)alloca(arg_buffer_length);
-  memset(init_arg_buffer, 0, arg_buffer_length);
-
-  /* Prepare the non-pointer "enqueue" arguments */
-  size_t bytes_written = 0;
-  for (size_t i = 0; i < args.count; i++) {
-    size_t size_in_bytes = args.sizes[i];
-    bytes_written = round_up(bytes_written, size_in_bytes);
-    if (args.types[i] != DeviceKernelArguments::POINTER) {
-      memcpy(init_arg_buffer + bytes_written, args.values[i], size_in_bytes);
-    }
-    bytes_written += size_in_bytes;
-  }
-
-  /* Prepare any non-pointer (i.e. plain-old-data) KernelParamsMetal data */
-  /* The plain-old-data is contiguous, continuing to the end of KernelParamsMetal */
-  size_t plain_old_launch_data_offset = offsetof(KernelParamsMetal, __integrator_state) +
-                                        sizeof(IntegratorStateGPU);
-  size_t plain_old_launch_data_size = sizeof(KernelParamsMetal) - plain_old_launch_data_offset;
-  memcpy(init_arg_buffer + globals_offsets + plain_old_launch_data_offset,
-         (uint8_t *)&metal_device->launch_params + plain_old_launch_data_offset,
-         plain_old_launch_data_size);
-
-  /* Allocate an argument buffer. */
-  MTLResourceOptions arg_buffer_options = MTLResourceStorageModeManaged;
-  if (@available(macOS 11.0, *)) {
-    if ([mtlDevice hasUnifiedMemory]) {
-      arg_buffer_options = MTLResourceStorageModeShared;
-    }
-  }
-
-  id<MTLBuffer> arg_buffer = temp_buffer_pool.get_buffer(
-      mtlDevice, mtlCommandBuffer, arg_buffer_length, arg_buffer_options, init_arg_buffer, stats);
-
-  /* Encode the pointer "enqueue" arguments */
-  bytes_written = 0;
-  for (size_t i = 0; i < args.count; i++) {
-    size_t size_in_bytes = args.sizes[i];
-    bytes_written = round_up(bytes_written, size_in_bytes);
-    if (args.types[i] == DeviceKernelArguments::POINTER) {
-      [metal_device->mtlBufferKernelParamsEncoder setArgumentBuffer:arg_buffer
-                                                             offset:bytes_written];
-      if (MetalDevice::MetalMem *mmem = *(MetalDevice::MetalMem **)args.values[i]) {
-        [mtlComputeCommandEncoder useResource:mmem->mtlBuffer
-                                        usage:MTLResourceUsageRead | MTLResourceUsageWrite];
-        [metal_device->mtlBufferKernelParamsEncoder setBuffer:mmem->mtlBuffer offset:0 atIndex:0];
-      }
-      else {
-        if (@available(macos 12.0, *)) {
-          [metal_device->mtlBufferKernelParamsEncoder setBuffer:nil offset:0 atIndex:0];
-        }
-      }
-    }
-    bytes_written += size_in_bytes;
-  }
-
-  /* Encode KernelParamsMetal buffers */
-  [metal_device->mtlBufferKernelParamsEncoder setArgumentBuffer:arg_buffer offset:globals_offsets];
-
-  /* this relies on IntegratorStateGPU layout being contiguous device_ptrs  */
-  const size_t pointer_block_end = offsetof(KernelParamsMetal, __integrator_state) +
-                                   sizeof(IntegratorStateGPU);
-  for (size_t offset = 0; offset < pointer_block_end; offset += sizeof(device_ptr)) {
-    int pointer_index = offset / sizeof(device_ptr);
-    MetalDevice::MetalMem *mmem = *(
-        MetalDevice::MetalMem **)((uint8_t *)&metal_device->launch_params + offset);
-    if (mmem && (mmem->mtlBuffer || mmem->mtlTexture)) {
-      [metal_device->mtlBufferKernelParamsEncoder setBuffer:mmem->mtlBuffer
-                                                     offset:0
-                                                    atIndex:pointer_index];
-    }
-    else {
-      if (@available(macos 12.0, *)) {
-        [metal_device->mtlBufferKernelParamsEncoder setBuffer:nil offset:0 atIndex:pointer_index];
-      }
-    }
-  }
-  bytes_written = globals_offsets + sizeof(KernelParamsMetal);
-
-  /* Encode ancillaries */
-  [metal_device->mtlAncillaryArgEncoder setArgumentBuffer:arg_buffer offset:metal_offsets];
-  [metal_device->mtlAncillaryArgEncoder setBuffer:metal_device->texture_bindings_2d
-                                           offset:0
-                                          atIndex:0];
-  [metal_device->mtlAncillaryArgEncoder setBuffer:metal_device->texture_bindings_3d
-                                           offset:0
-                                          atIndex:1];
-  if (@available(macos 12.0, *)) {
-    if (metal_device->use_metalrt) {
-      if (metal_device->bvhMetalRT) {
-        id<MTLAccelerationStructure> accel_struct = metal_device->bvhMetalRT->accel_struct;
-        [metal_device->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:2];
-      }
-
-      for (int table = 0; table < METALRT_TABLE_NUM; table++) {
-        if (metal_kernel_pso.intersection_func_table[table]) {
-          [metal_kernel_pso.intersection_func_table[table] setBuffer:arg_buffer
-                                                              offset:globals_offsets
-                                                             atIndex:1];
-          [metal_device->mtlAncillaryArgEncoder
-              setIntersectionFunctionTable:metal_kernel_pso.intersection_func_table[table]
-                                   atIndex:3 + table];
-          [mtlComputeCommandEncoder useResource:metal_kernel_pso.intersection_func_table[table]
-                                          usage:MTLResourceUsageRead];
-        }
-        else {
-          [metal_device->mtlAncillaryArgEncoder setIntersectionFunctionTable:nil
-                                                                     atIndex:3 + table];
-        }
-      }
-    }
-    bytes_written = metal_offsets + metal_device->mtlAncillaryArgEncoder.encodedLength;
-  }
-
-  if (arg_buffer.storageMode == MTLStorageModeManaged) {
-    [arg_buffer didModifyRange:NSMakeRange(0, bytes_written)];
-  }
-
-  [mtlComputeCommandEncoder setBuffer:arg_buffer offset:0 atIndex:0];
-  [mtlComputeCommandEncoder setBuffer:arg_buffer offset:globals_offsets atIndex:1];
-  [mtlComputeCommandEncoder setBuffer:arg_buffer offset:metal_offsets atIndex:2];
-
-  if (metal_device->use_metalrt) {
-    if (@available(macos 12.0, *)) {
-
-      auto bvhMetalRT = metal_device->bvhMetalRT;
-      switch (kernel) {
-        case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
-        case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
-        case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
-        case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
-        case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
-          break;
-        default:
-          bvhMetalRT = nil;
-          break;
-      }
-
-      if (bvhMetalRT) {
-        /* Mark all Accelerations resources as used */
-        [mtlComputeCommandEncoder useResource:bvhMetalRT->accel_struct usage:MTLResourceUsageRead];
-        [mtlComputeCommandEncoder useResources:bvhMetalRT->blas_array.data()
-                                         count:bvhMetalRT->blas_array.size()
-                                         usage:MTLResourceUsageRead];
-      }
-    }
-  }
-
-  [mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso.pipeline];
-
-  /* Compute kernel launch parameters. */
-  const int num_threads_per_block = metal_kernel.get_num_threads_per_block();
-
-  int shared_mem_bytes = 0;
-
-  switch (kernel) {
-    case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY:
-    case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY:
-      /* See parallel_active_index.h for why this amount of shared memory is needed.
-       * Rounded up to 16 bytes for Metal */
-      shared_mem_bytes = round_up((num_threads_per_block + 1) * sizeof(int), 16);
-      [mtlComputeCommandEncoder setThreadgroupMemoryLength:shared_mem_bytes atIndex:0];
-      break;
-
-    default:
-      break;
-  }
-
-  MTLSize size_threadgroups_per_dispatch = MTLSizeMake(
-      divide_up(work_size, num_threads_per_block), 1, 1);
-  MTLSize size_threads_per_threadgroup = MTLSizeMake(num_threads_per_block, 1, 1);
-  [mtlComputeCommandEncoder dispatchThreadgroups:size_threadgroups_per_dispatch
-                           threadsPerThreadgroup:size_threads_per_threadgroup];
-
-  [mtlCommandBuffer addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
-    NSString *kernel_name = metal_kernel_pso.function.label;
-
-    /* Enhanced command buffer errors are only available in 11.0+ */
-    if (@available(macos 11.0, *)) {
-      if (command_buffer.status == MTLCommandBufferStatusError && command_buffer.error != nil) {
-        printf("CommandBuffer Failed: %s\n", [kernel_name UTF8String]);
-        NSArray<id<MTLCommandBufferEncoderInfo>> *encoderInfos = [command_buffer.error.userInfo
-            valueForKey:MTLCommandBufferEncoderInfoErrorKey];
-        if (encoderInfos != nil) {
-          for (id<MTLCommandBufferEncoderInfo> encoderInfo : encoderInfos) {
-            NSLog(@"%@", encoderInfo);
-          }
-        }
-        id<MTLLogContainer> logs = command_buffer.logs;
-        for (id<MTLFunctionLog> log in logs) {
-          NSLog(@"%@", log);
-        }
-      }
-      else if (command_buffer.error) {
-        printf("CommandBuffer Failed: %s\n", [kernel_name UTF8String]);
-      }
-    }
-  }];
-
-  return !(metal_device->have_error());
-}
-
-bool MetalDeviceQueue::synchronize()
-{
-  if (metal_device->have_error()) {
-    return false;
-  }
-
-  if (mtlComputeEncoder) {
-    close_compute_encoder();
-  }
-  close_blit_encoder();
-
-  if (mtlCommandBuffer) {
-    uint64_t shared_event_id = this->shared_event_id++;
-
-    if (@available(macos 10.14, *)) {
-      __block dispatch_semaphore_t block_sema = wait_semaphore;
-      [shared_event notifyListener:shared_event_listener
-                           atValue:shared_event_id
-                             block:^(id<MTLSharedEvent> sharedEvent, uint64_t value) {
-                               dispatch_semaphore_signal(block_sema);
-                             }];
-
-      [mtlCommandBuffer encodeSignalEvent:shared_event value:shared_event_id];
-      [mtlCommandBuffer commit];
-      dispatch_semaphore_wait(wait_semaphore, DISPATCH_TIME_FOREVER);
-    }
-
-    [mtlCommandBuffer release];
-
-    for (const CopyBack &mmem : copy_back_mem) {
-      memcpy((uchar *)mmem.host_pointer, (uchar *)mmem.gpu_mem, mmem.size);
-    }
-    copy_back_mem.clear();
-
-    temp_buffer_pool.process_command_buffer_completion(mtlCommandBuffer);
-    metal_device->flush_delayed_free_list();
-
-    mtlCommandBuffer = nil;
-  }
-
-  return !(metal_device->have_error());
-}
-
-void MetalDeviceQueue::zero_to_device(device_memory &mem)
-{
-  assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
-
-  if (mem.memory_size() == 0) {
-    return;
-  }
-
-  /* Allocate on demand. */
-  if (mem.device_pointer == 0) {
-    metal_device->mem_alloc(mem);
-  }
-
-  /* Zero memory on device. */
-  assert(mem.device_pointer != 0);
-
-  std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
-  MetalDevice::MetalMem &mmem = *metal_device->metal_mem_map.at(&mem);
-  if (mmem.mtlBuffer) {
-    id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
-    [blitEncoder fillBuffer:mmem.mtlBuffer range:NSMakeRange(mmem.offset, mmem.size) value:0];
-  }
-  else {
-    metal_device->mem_zero(mem);
-  }
-}
-
-void MetalDeviceQueue::copy_to_device(device_memory &mem)
-{
-  if (mem.memory_size() == 0) {
-    return;
-  }
-
-  /* Allocate on demand. */
-  if (mem.device_pointer == 0) {
-    metal_device->mem_alloc(mem);
-  }
-
-  assert(mem.device_pointer != 0);
-  assert(mem.host_pointer != nullptr);
-
-  std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
-  auto result = metal_device->metal_mem_map.find(&mem);
-  if (result != metal_device->metal_mem_map.end()) {
-    if (mem.host_pointer == mem.shared_pointer) {
-      return;
-    }
-
-    MetalDevice::MetalMem &mmem = *result->second;
-    id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
-
-    id<MTLBuffer> buffer = temp_buffer_pool.get_buffer(mtlDevice,
-                                                       mtlCommandBuffer,
-                                                       mmem.size,
-                                                       MTLResourceStorageModeShared,
-                                                       mem.host_pointer,
-                                                       stats);
-
-    [blitEncoder copyFromBuffer:buffer
-                   sourceOffset:0
-                       toBuffer:mmem.mtlBuffer
-              destinationOffset:mmem.offset
-                           size:mmem.size];
-  }
-  else {
-    metal_device->mem_copy_to(mem);
-  }
-}
-
-void MetalDeviceQueue::copy_from_device(device_memory &mem)
-{
-  assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
-
-  if (mem.memory_size() == 0) {
-    return;
-  }
-
-  assert(mem.device_pointer != 0);
-  assert(mem.host_pointer != nullptr);
-
-  std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
-  MetalDevice::MetalMem &mmem = *metal_device->metal_mem_map.at(&mem);
-  if (mmem.mtlBuffer) {
-    const size_t size = mem.memory_size();
-
-    if (mem.device_pointer) {
-      if ([mmem.mtlBuffer storageMode] == MTLStorageModeManaged) {
-        id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
-        [blitEncoder synchronizeResource:mmem.mtlBuffer];
-      }
-      if (mem.host_pointer != mmem.hostPtr) {
-        if (mtlCommandBuffer) {
-          copy_back_mem.push_back({mem.host_pointer, mmem.hostPtr, size});
-        }
-        else {
-          memcpy((uchar *)mem.host_pointer, (uchar *)mmem.hostPtr, size);
-        }
-      }
-    }
-    else {
-      memset((char *)mem.host_pointer, 0, size);
-    }
-  }
-  else {
-    metal_device->mem_copy_from(mem);
-  }
-}
-
-bool MetalDeviceQueue::kernel_available(DeviceKernel kernel) const
-{
-  return metal_device->kernels.available(kernel);
-}
-
-void MetalDeviceQueue::prepare_resources(DeviceKernel kernel)
-{
-  std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
-
-  /* declare resource usage */
-  for (auto &it : metal_device->metal_mem_map) {
-    device_memory *mem = it.first;
-
-    MTLResourceUsage usage = MTLResourceUsageRead;
-    if (mem->type != MEM_GLOBAL && mem->type != MEM_READ_ONLY && mem->type != MEM_TEXTURE) {
-      usage |= MTLResourceUsageWrite;
-    }
-
-    if (it.second->mtlBuffer) {
-      /* METAL_WIP - use array version (i.e. useResources) */
-      [mtlComputeEncoder useResource:it.second->mtlBuffer usage:usage];
-    }
-    else if (it.second->mtlTexture) {
-      /* METAL_WIP - use array version (i.e. useResources) */
-      [mtlComputeEncoder useResource:it.second->mtlTexture usage:usage | MTLResourceUsageSample];
-    }
-  }
-
-  /* ancillaries */
-  [mtlComputeEncoder useResource:metal_device->texture_bindings_2d usage:MTLResourceUsageRead];
-  [mtlComputeEncoder useResource:metal_device->texture_bindings_3d usage:MTLResourceUsageRead];
-}
-
-id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel kernel)
-{
-  bool concurrent = (kernel < DEVICE_KERNEL_INTEGRATOR_NUM);
-
-  if (@available(macos 10.14, *)) {
-    if (mtlComputeEncoder) {
-      if (mtlComputeEncoder.dispatchType == concurrent ? MTLDispatchTypeConcurrent :
-                                                         MTLDispatchTypeSerial) {
-        /* declare usage of MTLBuffers etc */
-        prepare_resources(kernel);
-
-        return mtlComputeEncoder;
-      }
-      close_compute_encoder();
-    }
-
-    close_blit_encoder();
-
-    if (!mtlCommandBuffer) {
-      mtlCommandBuffer = [mtlCommandQueue commandBuffer];
-      [mtlCommandBuffer retain];
-    }
-
-    mtlComputeEncoder = [mtlCommandBuffer
-        computeCommandEncoderWithDispatchType:concurrent ? MTLDispatchTypeConcurrent :
-                                                           MTLDispatchTypeSerial];
-
-    /* declare usage of MTLBuffers etc */
-    prepare_resources(kernel);
-  }
-
-  return mtlComputeEncoder;
-}
-
-id<MTLBlitCommandEncoder> MetalDeviceQueue::get_blit_encoder()
-{
-  if (mtlBlitEncoder) {
-    return mtlBlitEncoder;
-  }
-
-  if (mtlComputeEncoder) {
-    close_compute_encoder();
-  }
-
-  if (!mtlCommandBuffer) {
-    mtlCommandBuffer = [mtlCommandQueue commandBuffer];
-    [mtlCommandBuffer retain];
-  }
-
-  mtlBlitEncoder = [mtlCommandBuffer blitCommandEncoder];
-  return mtlBlitEncoder;
-}
-
-void MetalDeviceQueue::close_compute_encoder()
-{
-  [mtlComputeEncoder endEncoding];
-  mtlComputeEncoder = nil;
-}
-
-void MetalDeviceQueue::close_blit_encoder()
-{
-  if (mtlBlitEncoder) {
-    [mtlBlitEncoder endEncoding];
-    mtlBlitEncoder = nil;
-  }
-}
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_METAL */
--- a/intern/cycles/device/metal/util.h
+++ b/intern/cycles/device/metal/util.h
@@ -1,101 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef WITH_METAL
-
-#  include <Metal/Metal.h>
-#  include <string>
-
-#  include "device/metal/device.h"
-#  include "device/metal/kernel.h"
-#  include "device/queue.h"
-
-#  include "util/thread.h"
-
-CCL_NAMESPACE_BEGIN
-
-enum MetalGPUVendor {
-  METAL_GPU_UNKNOWN = 0,
-  METAL_GPU_APPLE = 1,
-  METAL_GPU_AMD = 2,
-  METAL_GPU_INTEL = 3,
-};
-
-/* Retains a named MTLDevice for device enumeration. */
-struct MetalPlatformDevice {
-  MetalPlatformDevice(id<MTLDevice> device, const string &device_name)
-      : device_id(device), device_name(device_name)
-  {
-    [device_id retain];
-  }
-  ~MetalPlatformDevice()
-  {
-    [device_id release];
-  }
-  id<MTLDevice> device_id;
-  string device_name;
-};
-
-/* Contains static Metal helper functions. */
-struct MetalInfo {
-  static bool device_version_check(id<MTLDevice> device);
-  static void get_usable_devices(vector<MetalPlatformDevice> *usable_devices);
-  static MetalGPUVendor get_vendor_from_device_name(string const &device_name);
-
-  /* Platform information. */
-  static bool get_num_devices(uint32_t *num_platforms);
-  static uint32_t get_num_devices();
-
-  static bool get_device_name(id<MTLDevice> device_id, string *device_name);
-  static string get_device_name(id<MTLDevice> device_id);
-};
-
-/* Pool of MTLBuffers whose lifetime is linked to a single MTLCommandBuffer */
-class MetalBufferPool {
-  struct MetalBufferListEntry {
-    MetalBufferListEntry(id<MTLBuffer> buffer, id<MTLCommandBuffer> command_buffer)
-        : buffer(buffer), command_buffer(command_buffer)
-    {
-    }
-
-    MetalBufferListEntry() = delete;
-
-    id<MTLBuffer> buffer;
-    id<MTLCommandBuffer> command_buffer;
-  };
-  std::vector<MetalBufferListEntry> buffer_free_list;
-  std::vector<MetalBufferListEntry> buffer_in_use_list;
-  thread_mutex buffer_mutex;
-  size_t total_temp_mem_size = 0;
-
- public:
-  MetalBufferPool() = default;
-  ~MetalBufferPool();
-
-  id<MTLBuffer> get_buffer(id<MTLDevice> device,
-                           id<MTLCommandBuffer> command_buffer,
-                           NSUInteger length,
-                           MTLResourceOptions options,
-                           const void *pointer,
-                           Stats &stats);
-  void process_command_buffer_completion(id<MTLCommandBuffer> command_buffer);
-};
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_METAL */
--- a/intern/cycles/device/metal/util.mm
+++ b/intern/cycles/device/metal/util.mm
@@ -1,218 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_METAL
-
-#  include "device/metal/util.h"
-#  include "device/metal/device_impl.h"
-#  include "util/md5.h"
-#  include "util/path.h"
-#  include "util/string.h"
-#  include "util/time.h"
-
-#  include <pwd.h>
-#  include <sys/shm.h>
-#  include <time.h>
-
-CCL_NAMESPACE_BEGIN
-
-MetalGPUVendor MetalInfo::get_vendor_from_device_name(string const &device_name)
-{
-  if (device_name.find("Intel") != string::npos) {
-    return METAL_GPU_INTEL;
-  }
-  else if (device_name.find("AMD") != string::npos) {
-    return METAL_GPU_AMD;
-  }
-  else if (device_name.find("Apple") != string::npos) {
-    return METAL_GPU_APPLE;
-  }
-  return METAL_GPU_UNKNOWN;
-}
-
-bool MetalInfo::device_version_check(id<MTLDevice> device)
-{
-  /* Metal Cycles doesn't work correctly on macOS versions older than 12.0 */
-  if (@available(macos 12.0, *)) {
-    MetalGPUVendor vendor = get_vendor_from_device_name([[device name] UTF8String]);
-
-    /* Metal Cycles works on Apple Silicon GPUs at present */
-    return (vendor == METAL_GPU_APPLE);
-  }
-
-  return false;
-}
-
-void MetalInfo::get_usable_devices(vector<MetalPlatformDevice> *usable_devices)
-{
-  static bool first_time = true;
-#  define FIRST_VLOG(severity) \
-    if (first_time) \
-    VLOG(severity)
-
-  usable_devices->clear();
-
-  NSArray<id<MTLDevice>> *allDevices = MTLCopyAllDevices();
-  for (id<MTLDevice> device in allDevices) {
-    string device_name;
-    if (!get_device_name(device, &device_name)) {
-      FIRST_VLOG(2) << "Failed to get device name, ignoring.";
-      continue;
-    }
-
-    static const char *forceIntelStr = getenv("CYCLES_METAL_FORCE_INTEL");
-    bool forceIntel = forceIntelStr ? (atoi(forceIntelStr) != 0) : false;
-    if (forceIntel && device_name.find("Intel") == string::npos) {
-      FIRST_VLOG(2) << "CYCLES_METAL_FORCE_INTEL causing non-Intel device " << device_name
-                    << " to be ignored.";
-      continue;
-    }
-
-    if (!device_version_check(device)) {
-      FIRST_VLOG(2) << "Ignoring device " << device_name << " due to too old compiler version.";
-      continue;
-    }
-    FIRST_VLOG(2) << "Adding new device " << device_name << ".";
-    string hardware_id;
-    usable_devices->push_back(MetalPlatformDevice(device, device_name));
-  }
-  first_time = false;
-}
-
-bool MetalInfo::get_num_devices(uint32_t *num_devices)
-{
-  *num_devices = MTLCopyAllDevices().count;
-  return true;
-}
-
-uint32_t MetalInfo::get_num_devices()
-{
-  uint32_t num_devices;
-  if (!get_num_devices(&num_devices)) {
-    return 0;
-  }
-  return num_devices;
-}
-
-bool MetalInfo::get_device_name(id<MTLDevice> device, string *platform_name)
-{
-  *platform_name = [device.name UTF8String];
-  return true;
-}
-
-string MetalInfo::get_device_name(id<MTLDevice> device)
-{
-  string platform_name;
-  if (!get_device_name(device, &platform_name)) {
-    return "";
-  }
-  return platform_name;
-}
-
-id<MTLBuffer> MetalBufferPool::get_buffer(id<MTLDevice> device,
-                                          id<MTLCommandBuffer> command_buffer,
-                                          NSUInteger length,
-                                          MTLResourceOptions options,
-                                          const void *pointer,
-                                          Stats &stats)
-{
-  id<MTLBuffer> buffer;
-
-  MTLStorageMode storageMode = MTLStorageMode((options & MTLResourceStorageModeMask) >>
-                                              MTLResourceStorageModeShift);
-  MTLCPUCacheMode cpuCacheMode = MTLCPUCacheMode((options & MTLResourceCPUCacheModeMask) >>
-                                                 MTLResourceCPUCacheModeShift);
-
-  buffer_mutex.lock();
-  for (auto entry = buffer_free_list.begin(); entry != buffer_free_list.end(); entry++) {
-    MetalBufferListEntry bufferEntry = *entry;
-
-    /* Check if buffer matches size and storage mode and is old enough to reuse */
-    if (bufferEntry.buffer.length == length && storageMode == bufferEntry.buffer.storageMode &&
-        cpuCacheMode == bufferEntry.buffer.cpuCacheMode) {
-      buffer = bufferEntry.buffer;
-      buffer_free_list.erase(entry);
-      bufferEntry.command_buffer = command_buffer;
-      buffer_in_use_list.push_back(bufferEntry);
-      buffer_mutex.unlock();
-
-      /* Copy over data */
-      if (pointer) {
-        memcpy(buffer.contents, pointer, length);
-        if (bufferEntry.buffer.storageMode == MTLStorageModeManaged) {
-          [buffer didModifyRange:NSMakeRange(0, length)];
-        }
-      }
-
-      return buffer;
-    }
-  }
-  // NSLog(@"Creating buffer of length %lu (%lu)", length, frameCount);
-  if (pointer) {
-    buffer = [device newBufferWithBytes:pointer length:length options:options];
-  }
-  else {
-    buffer = [device newBufferWithLength:length options:options];
-  }
-
-  MetalBufferListEntry buffer_entry(buffer, command_buffer);
-
-  stats.mem_alloc(buffer.allocatedSize);
-
-  total_temp_mem_size += buffer.allocatedSize;
-  buffer_in_use_list.push_back(buffer_entry);
-  buffer_mutex.unlock();
-
-  return buffer;
-}
-
-void MetalBufferPool::process_command_buffer_completion(id<MTLCommandBuffer> command_buffer)
-{
-  assert(command_buffer);
-  thread_scoped_lock lock(buffer_mutex);
-  /* Release all buffers that have not been recently reused back into the free pool */
-  for (auto entry = buffer_in_use_list.begin(); entry != buffer_in_use_list.end();) {
-    MetalBufferListEntry buffer_entry = *entry;
-    if (buffer_entry.command_buffer == command_buffer) {
-      entry = buffer_in_use_list.erase(entry);
-      buffer_entry.command_buffer = nil;
-      buffer_free_list.push_back(buffer_entry);
-    }
-    else {
-      entry++;
-    }
-  }
-}
-
-MetalBufferPool::~MetalBufferPool()
-{
-  thread_scoped_lock lock(buffer_mutex);
-  /* Release all buffers that have not been recently reused */
-  for (auto entry = buffer_free_list.begin(); entry != buffer_free_list.end();) {
-    MetalBufferListEntry buffer_entry = *entry;
-
-    id<MTLBuffer> buffer = buffer_entry.buffer;
-    // NSLog(@"Releasing buffer of length %lu (%lu) (%lu outstanding)", buffer.length, frameCount,
-    // bufferFreeList.size());
-    total_temp_mem_size -= buffer.allocatedSize;
-    [buffer release];
-    entry = buffer_free_list.erase(entry);
-  }
-}
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_METAL */
--- a/intern/cycles/device/multi/device.cpp
+++ b/intern/cycles/device/multi/device.cpp
@@ -124,20 +124,11 @@ class MultiDevice : public Device {
      return BVH_LAYOUT_MULTI_OPTIX;
    }

-    /* With multiple Metal devices, every device needs its own acceleration structure */
-    if (bvh_layout_mask == BVH_LAYOUT_METAL) {
-      return BVH_LAYOUT_MULTI_METAL;
-    }
-
    /* When devices do not share a common BVH layout, fall back to creating one for each */
    const BVHLayoutMask BVH_LAYOUT_OPTIX_EMBREE = (BVH_LAYOUT_OPTIX | BVH_LAYOUT_EMBREE);
    if ((bvh_layout_mask_all & BVH_LAYOUT_OPTIX_EMBREE) == BVH_LAYOUT_OPTIX_EMBREE) {
      return BVH_LAYOUT_MULTI_OPTIX_EMBREE;
    }
-    const BVHLayoutMask BVH_LAYOUT_METAL_EMBREE = (BVH_LAYOUT_METAL | BVH_LAYOUT_EMBREE);
-    if ((bvh_layout_mask_all & BVH_LAYOUT_METAL_EMBREE) == BVH_LAYOUT_METAL_EMBREE) {
-      return BVH_LAYOUT_MULTI_METAL_EMBREE;
-    }

    return bvh_layout_mask;
  }
@@ -160,9 +151,7 @@ class MultiDevice : public Device {
    }

    assert(bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX ||
-           bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL ||
-           bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE ||
-           bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE);
+           bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE);

    BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh);
    bvh_multi->sub_bvhs.resize(devices.size());
@@ -185,14 +174,9 @@ class MultiDevice : public Device {
        BVHParams params = bvh->params;
        if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX)
          params.bvh_layout = BVH_LAYOUT_OPTIX;
-        else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL)
-          params.bvh_layout = BVH_LAYOUT_METAL;
        else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE)
          params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
                                                                      BVH_LAYOUT_EMBREE;
-        else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE)
-          params.bvh_layout = sub.device->info.type == DEVICE_METAL ? BVH_LAYOUT_METAL :
-                                                                      BVH_LAYOUT_EMBREE;

        /* Skip building a bottom level acceleration structure for non-instanced geometry on Embree
         * (since they are put into the top level directly, see bvh_embree.cpp) */
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -28,7 +28,6 @@
 #  include "scene/mesh.h"
 #  include "scene/object.h"
 #  include "scene/pass.h"
-#  include "scene/pointcloud.h"
 #  include "scene/scene.h"

 #  include "util/debug.h"
@@ -42,19 +41,17 @@
 #  define __KERNEL_OPTIX__
 #  include "kernel/device/optix/globals.h"

-#  include <optix_denoiser_tiling.h>
-
 CCL_NAMESPACE_BEGIN

 OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
-    : device(device), queue(device), state(device, "__denoiser_state", true)
+    : device(device), queue(device), state(device, "__denoiser_state")
 {
 }

 OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
    : CUDADevice(info, stats, profiler),
      sbt_data(this, "__sbt", MEM_READ_ONLY),
-      launch_params(this, "__params", false),
+      launch_params(this, "__params"),
      denoiser_(this)
 {
  /* Make the CUDA context current. */
@@ -211,15 +208,11 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
  }
  else {
    module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
-    module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
+    module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
  }

  module_options.boundValues = nullptr;
  module_options.numBoundValues = 0;
-#  if OPTIX_ABI_VERSION >= 55
-  module_options.payloadTypes = nullptr;
-  module_options.numPayloadTypes = 0;
-#  endif

  OptixPipelineCompileOptions pipeline_options = {};
  /* Default to no motion blur and two-level graph, since it is the fastest option. */
@@ -234,18 +227,11 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
  pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
  if (kernel_features & KERNEL_FEATURE_HAIR) {
    if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
-#  if OPTIX_ABI_VERSION >= 55
-      pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM;
-#  else
      pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
-#  endif
    }
    else
      pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
  }
-  if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
-    pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
-  }

  /* Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds
   * This is necessary since objects may be reported to have motion if the Vector pass is
@@ -338,13 +324,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
    if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
      /* Built-in thick curve intersection. */
      OptixBuiltinISOptions builtin_options = {};
-#  if OPTIX_ABI_VERSION >= 55
-      builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
-      builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE;
-      builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT; /* Disable end-caps. */
-#  else
      builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
-#  endif
      builtin_options.usesMotionBlur = false;

      optix_assert(optixBuiltinISModuleGet(
@@ -376,18 +356,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
    }
  }

-  /* Pointclouds */
-  if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
-    group_descs[PG_HITD_POINTCLOUD] = group_descs[PG_HITD];
-    group_descs[PG_HITD_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
-    group_descs[PG_HITD_POINTCLOUD].hitgroup.moduleIS = optix_module;
-    group_descs[PG_HITD_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point";
-    group_descs[PG_HITS_POINTCLOUD] = group_descs[PG_HITS];
-    group_descs[PG_HITS_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
-    group_descs[PG_HITS_POINTCLOUD].hitgroup.moduleIS = optix_module;
-    group_descs[PG_HITS_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point";
-  }
-
  if (kernel_features & (KERNEL_FEATURE_SUBSURFACE | KERNEL_FEATURE_NODE_RAYTRACE)) {
    /* Add hit group for local intersections. */
    group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
@@ -435,10 +403,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
                       stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
  trace_css = std::max(trace_css,
                       stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH);
-  trace_css = std::max(
-      trace_css, stack_size[PG_HITD_POINTCLOUD].cssIS + stack_size[PG_HITD_POINTCLOUD].cssAH);
-  trace_css = std::max(
-      trace_css, stack_size[PG_HITS_POINTCLOUD].cssIS + stack_size[PG_HITS_POINTCLOUD].cssAH);

  OptixPipelineLinkOptions link_options = {};
  link_options.maxTraceDepth = 1;
@@ -447,7 +411,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
    link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
  }
  else {
-    link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
+    link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
  }

  if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
@@ -464,10 +428,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
      pipeline_groups.push_back(groups[PG_HITD_MOTION]);
      pipeline_groups.push_back(groups[PG_HITS_MOTION]);
    }
-    if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
-      pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
-      pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
-    }
    pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
    pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);

@@ -507,10 +467,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
      pipeline_groups.push_back(groups[PG_HITD_MOTION]);
      pipeline_groups.push_back(groups[PG_HITS_MOTION]);
    }
-    if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
-      pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
-      pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
-    }

    optix_assert(optixPipelineCreate(context,
                                     &pipeline_options,
@@ -551,7 +507,7 @@ class OptiXDevice::DenoiseContext {
      : denoise_params(task.params),
        render_buffers(task.render_buffers),
        buffer_params(task.buffer_params),
-        guiding_buffer(device, "denoiser guiding passes buffer", true),
+        guiding_buffer(device, "denoiser guiding passes buffer"),
        num_samples(task.num_samples)
  {
    num_input_passes = 1;
@@ -566,28 +522,14 @@ class OptiXDevice::DenoiseContext {
      }
    }

-    if (denoise_params.temporally_stable) {
-      prev_output.device_pointer = render_buffers->buffer.device_pointer;
+    const int num_guiding_passes = num_input_passes - 1;

-      prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
-
-      prev_output.stride = buffer_params.stride;
-      prev_output.pass_stride = buffer_params.pass_stride;
-
-      num_input_passes += 1;
-      use_pass_flow = true;
-      pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
-    }
-
-    use_guiding_passes = (num_input_passes - 1) > 0;
-
-    if (use_guiding_passes) {
+    if (num_guiding_passes) {
      if (task.allow_inplace_modification) {
        guiding_params.device_pointer = render_buffers->buffer.device_pointer;

        guiding_params.pass_albedo = pass_denoising_albedo;
        guiding_params.pass_normal = pass_denoising_normal;
-        guiding_params.pass_flow = pass_motion;

        guiding_params.stride = buffer_params.stride;
        guiding_params.pass_stride = buffer_params.pass_stride;
@@ -602,10 +544,6 @@ class OptiXDevice::DenoiseContext {
          guiding_params.pass_normal = guiding_params.pass_stride;
          guiding_params.pass_stride += 3;
        }
-        if (use_pass_flow) {
-          guiding_params.pass_flow = guiding_params.pass_stride;
-          guiding_params.pass_stride += 2;
-        }

        guiding_params.stride = buffer_params.width;

@@ -623,16 +561,6 @@ class OptiXDevice::DenoiseContext {
  RenderBuffers *render_buffers = nullptr;
  const BufferParams &buffer_params;

-  /* Previous output. */
-  struct {
-    device_ptr device_pointer = 0;
-
-    int offset = PASS_UNUSED;
-
-    int stride = -1;
-    int pass_stride = -1;
-  } prev_output;
-
  /* Device-side storage of the guiding passes. */
  device_only_memory<float> guiding_buffer;

@@ -642,7 +570,6 @@ class OptiXDevice::DenoiseContext {
    /* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
    int pass_albedo = PASS_UNUSED;
    int pass_normal = PASS_UNUSED;
-    int pass_flow = PASS_UNUSED;

    int stride = -1;
    int pass_stride = -1;
@@ -650,10 +577,8 @@ class OptiXDevice::DenoiseContext {

  /* Number of input passes. Including the color and extra auxiliary passes. */
  int num_input_passes = 0;
-  bool use_guiding_passes = false;
  bool use_pass_albedo = false;
  bool use_pass_normal = false;
-  bool use_pass_flow = false;

  int num_samples = 0;

@@ -662,7 +587,6 @@ class OptiXDevice::DenoiseContext {
  /* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
  int pass_denoising_albedo = PASS_UNUSED;
  int pass_denoising_normal = PASS_UNUSED;
-  int pass_motion = PASS_UNUSED;

  /* For passes which don't need albedo channel for denoising we replace the actual albedo with
   * the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
@@ -729,24 +653,22 @@ bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context)

  const int work_size = buffer_params.width * buffer_params.height;

-  DeviceKernelArguments args(&context.guiding_params.device_pointer,
-                             &context.guiding_params.pass_stride,
-                             &context.guiding_params.pass_albedo,
-                             &context.guiding_params.pass_normal,
-                             &context.guiding_params.pass_flow,
-                             &context.render_buffers->buffer.device_pointer,
-                             &buffer_params.offset,
-                             &buffer_params.stride,
-                             &buffer_params.pass_stride,
-                             &context.pass_sample_count,
-                             &context.pass_denoising_albedo,
-                             &context.pass_denoising_normal,
-                             &context.pass_motion,
-                             &buffer_params.full_x,
-                             &buffer_params.full_y,
-                             &buffer_params.width,
-                             &buffer_params.height,
-                             &context.num_samples);
+  void *args[] = {const_cast<device_ptr *>(&context.guiding_params.device_pointer),
+                  const_cast<int *>(&context.guiding_params.pass_stride),
+                  const_cast<int *>(&context.guiding_params.pass_albedo),
+                  const_cast<int *>(&context.guiding_params.pass_normal),
+                  &context.render_buffers->buffer.device_pointer,
+                  const_cast<int *>(&buffer_params.offset),
+                  const_cast<int *>(&buffer_params.stride),
+                  const_cast<int *>(&buffer_params.pass_stride),
+                  const_cast<int *>(&context.pass_sample_count),
+                  const_cast<int *>(&context.pass_denoising_albedo),
+                  const_cast<int *>(&context.pass_denoising_normal),
+                  const_cast<int *>(&buffer_params.full_x),
+                  const_cast<int *>(&buffer_params.full_y),
+                  const_cast<int *>(&buffer_params.width),
+                  const_cast<int *>(&buffer_params.height),
+                  const_cast<int *>(&context.num_samples)};

  return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
 }
@@ -757,11 +679,11 @@ bool OptiXDevice::denoise_filter_guiding_set_fake_albedo(DenoiseContext &context

  const int work_size = buffer_params.width * buffer_params.height;

-  DeviceKernelArguments args(&context.guiding_params.device_pointer,
-                             &context.guiding_params.pass_stride,
-                             &context.guiding_params.pass_albedo,
-                             &buffer_params.width,
-                             &buffer_params.height);
+  void *args[] = {const_cast<device_ptr *>(&context.guiding_params.device_pointer),
+                  const_cast<int *>(&context.guiding_params.pass_stride),
+                  const_cast<int *>(&context.guiding_params.pass_albedo),
+                  const_cast<int *>(&buffer_params.width),
+                  const_cast<int *>(&buffer_params.height)};

  return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
 }
@@ -786,7 +708,7 @@ void OptiXDevice::denoise_pass(DenoiseContext &context, PassType pass_type)
      return;
    }
  }
-  else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
+  else if (!context.albedo_replaced_with_fake) {
    context.albedo_replaced_with_fake = true;
    if (!denoise_filter_guiding_set_fake_albedo(context)) {
      LOG(ERROR) << "Error replacing real albedo with the fake one.";
@@ -857,15 +779,15 @@ bool OptiXDevice::denoise_filter_color_preprocess(DenoiseContext &context, const

  const int work_size = buffer_params.width * buffer_params.height;

-  DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
-                             &buffer_params.full_x,
-                             &buffer_params.full_y,
-                             &buffer_params.width,
-                             &buffer_params.height,
-                             &buffer_params.offset,
-                             &buffer_params.stride,
-                             &buffer_params.pass_stride,
-                             &pass.denoised_offset);
+  void *args[] = {&context.render_buffers->buffer.device_pointer,
+                  const_cast<int *>(&buffer_params.full_x),
+                  const_cast<int *>(&buffer_params.full_y),
+                  const_cast<int *>(&buffer_params.width),
+                  const_cast<int *>(&buffer_params.height),
+                  const_cast<int *>(&buffer_params.offset),
+                  const_cast<int *>(&buffer_params.stride),
+                  const_cast<int *>(&buffer_params.pass_stride),
+                  const_cast<int *>(&pass.denoised_offset)};

  return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
 }
@@ -877,20 +799,20 @@ bool OptiXDevice::denoise_filter_color_postprocess(DenoiseContext &context,

  const int work_size = buffer_params.width * buffer_params.height;

-  DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
-                             &buffer_params.full_x,
-                             &buffer_params.full_y,
-                             &buffer_params.width,
-                             &buffer_params.height,
-                             &buffer_params.offset,
-                             &buffer_params.stride,
-                             &buffer_params.pass_stride,
-                             &context.num_samples,
-                             &pass.noisy_offset,
-                             &pass.denoised_offset,
-                             &context.pass_sample_count,
-                             &pass.num_components,
-                             &pass.use_compositing);
+  void *args[] = {&context.render_buffers->buffer.device_pointer,
+                  const_cast<int *>(&buffer_params.full_x),
+                  const_cast<int *>(&buffer_params.full_y),
+                  const_cast<int *>(&buffer_params.width),
+                  const_cast<int *>(&buffer_params.height),
+                  const_cast<int *>(&buffer_params.offset),
+                  const_cast<int *>(&buffer_params.stride),
+                  const_cast<int *>(&buffer_params.pass_stride),
+                  const_cast<int *>(&context.num_samples),
+                  const_cast<int *>(&pass.noisy_offset),
+                  const_cast<int *>(&pass.denoised_offset),
+                  const_cast<int *>(&context.pass_sample_count),
+                  const_cast<int *>(&pass.num_components),
+                  const_cast<bool *>(&pass.use_compositing)};

  return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
 }
@@ -914,8 +836,7 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
 {
  const bool recreate_denoiser = (denoiser_.optix_denoiser == nullptr) ||
                                 (denoiser_.use_pass_albedo != context.use_pass_albedo) ||
-                                 (denoiser_.use_pass_normal != context.use_pass_normal) ||
-                                 (denoiser_.use_pass_flow != context.use_pass_flow);
+                                 (denoiser_.use_pass_normal != context.use_pass_normal);
  if (!recreate_denoiser) {
    return true;
  }
@@ -929,14 +850,8 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
  OptixDenoiserOptions denoiser_options = {};
  denoiser_options.guideAlbedo = context.use_pass_albedo;
  denoiser_options.guideNormal = context.use_pass_normal;
-
-  OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
-  if (context.use_pass_flow) {
-    model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
-  }
-
  const OptixResult result = optixDenoiserCreate(
-      this->context, model, &denoiser_options, &denoiser_.optix_denoiser);
+      this->context, OPTIX_DENOISER_MODEL_KIND_HDR, &denoiser_options, &denoiser_.optix_denoiser);

  if (result != OPTIX_SUCCESS) {
    set_error("Failed to create OptiX denoiser");
@@ -946,7 +861,6 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
  /* OptiX denoiser handle was created with the requested number of input passes. */
  denoiser_.use_pass_albedo = context.use_pass_albedo;
  denoiser_.use_pass_normal = context.use_pass_normal;
-  denoiser_.use_pass_flow = context.use_pass_flow;

  /* OptiX denoiser has been created, but it needs configuration. */
  denoiser_.is_configured = false;
@@ -956,33 +870,35 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)

 bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
 {
-  /* Limit maximum tile size denoiser can be invoked with. */
-  const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
-                                   min(context.buffer_params.height, 4096));
-
-  if (denoiser_.is_configured &&
-      (denoiser_.configured_size.x == tile_size.x && denoiser_.configured_size.y == tile_size.y)) {
+  if (denoiser_.is_configured && (denoiser_.configured_size.x == context.buffer_params.width &&
+                                  denoiser_.configured_size.y == context.buffer_params.height)) {
    return true;
  }

+  const BufferParams &buffer_params = context.buffer_params;
+
+  OptixDenoiserSizes sizes = {};
  optix_assert(optixDenoiserComputeMemoryResources(
-      denoiser_.optix_denoiser, tile_size.x, tile_size.y, &denoiser_.sizes));
+      denoiser_.optix_denoiser, buffer_params.width, buffer_params.height, &sizes));
+
+  /* Denoiser is invoked on whole images only, so no overlap needed (would be used for tiling). */
+  denoiser_.scratch_size = sizes.withoutOverlapScratchSizeInBytes;
+  denoiser_.scratch_offset = sizes.stateSizeInBytes;

  /* Allocate denoiser state if tile size has changed since last setup. */
-  denoiser_.state.alloc_to_device(denoiser_.sizes.stateSizeInBytes +
-                                  denoiser_.sizes.withOverlapScratchSizeInBytes);
+  denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size);

  /* Initialize denoiser state for the current tile size. */
  const OptixResult result = optixDenoiserSetup(
      denoiser_.optix_denoiser,
      0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
            on a stream that is not the default stream */
-      tile_size.x + denoiser_.sizes.overlapWindowSizeInPixels * 2,
-      tile_size.y + denoiser_.sizes.overlapWindowSizeInPixels * 2,
+      buffer_params.width,
+      buffer_params.height,
      denoiser_.state.device_pointer,
-      denoiser_.sizes.stateSizeInBytes,
-      denoiser_.state.device_pointer + denoiser_.sizes.stateSizeInBytes,
-      denoiser_.sizes.withOverlapScratchSizeInBytes);
+      denoiser_.scratch_offset,
+      denoiser_.state.device_pointer + denoiser_.scratch_offset,
+      denoiser_.scratch_size);
  if (result != OPTIX_SUCCESS) {
    set_error("Failed to set up OptiX denoiser");
    return false;
@@ -991,7 +907,8 @@ bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
  cuda_assert(cuCtxSynchronize());

  denoiser_.is_configured = true;
-  denoiser_.configured_size = tile_size;
+  denoiser_.configured_size.x = buffer_params.width;
+  denoiser_.configured_size.y = buffer_params.height;

  return true;
 }
@@ -1006,10 +923,8 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
  OptixImage2D color_layer = {0};
  OptixImage2D albedo_layer = {0};
  OptixImage2D normal_layer = {0};
-  OptixImage2D flow_layer = {0};

  OptixImage2D output_layer = {0};
-  OptixImage2D prev_output_layer = {0};

  /* Color pass. */
  {
@@ -1025,19 +940,6 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
    color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
  }

-  /* Previous output. */
-  if (context.prev_output.offset != PASS_UNUSED) {
-    const int64_t pass_stride_in_bytes = context.prev_output.pass_stride * sizeof(float);
-
-    prev_output_layer.data = context.prev_output.device_pointer +
-                             context.prev_output.offset * sizeof(float);
-    prev_output_layer.width = width;
-    prev_output_layer.height = height;
-    prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
-    prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
-    prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
-  }
-
  /* Optional albedo and color passes. */
  if (context.num_input_passes > 1) {
    const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
@@ -1061,47 +963,34 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
      normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
      normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
    }
-
-    if (context.use_pass_flow) {
-      flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow * sizeof(float);
-      flow_layer.width = width;
-      flow_layer.height = height;
-      flow_layer.rowStrideInBytes = row_stride_in_bytes;
-      flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
-      flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
-    }
  }

  /* Denoise in-place of the noisy input in the render buffers. */
  output_layer = color_layer;

-  OptixDenoiserGuideLayer guide_layers = {};
-  guide_layers.albedo = albedo_layer;
-  guide_layers.normal = normal_layer;
-  guide_layers.flow = flow_layer;
-
-  OptixDenoiserLayer image_layers = {};
-  image_layers.input = color_layer;
-  image_layers.previousOutput = prev_output_layer;
-  image_layers.output = output_layer;
-
  /* Finally run denoising. */
  OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */

-  optix_assert(optixUtilDenoiserInvokeTiled(denoiser_.optix_denoiser,
-                                            denoiser_.queue.stream(),
-                                            &params,
-                                            denoiser_.state.device_pointer,
-                                            denoiser_.sizes.stateSizeInBytes,
-                                            &guide_layers,
-                                            &image_layers,
-                                            1,
-                                            denoiser_.state.device_pointer +
-                                                denoiser_.sizes.stateSizeInBytes,
-                                            denoiser_.sizes.withOverlapScratchSizeInBytes,
-                                            denoiser_.sizes.overlapWindowSizeInPixels,
-                                            denoiser_.configured_size.x,
-                                            denoiser_.configured_size.y));
+  OptixDenoiserLayer image_layers = {};
+  image_layers.input = color_layer;
+  image_layers.output = output_layer;
+
+  OptixDenoiserGuideLayer guide_layers = {};
+  guide_layers.albedo = albedo_layer;
+  guide_layers.normal = normal_layer;
+
+  optix_assert(optixDenoiserInvoke(denoiser_.optix_denoiser,
+                                   denoiser_.queue.stream(),
+                                   &params,
+                                   denoiser_.state.device_pointer,
+                                   denoiser_.scratch_offset,
+                                   &guide_layers,
+                                   &image_layers,
+                                   1,
+                                   0,
+                                   0,
+                                   denoiser_.state.device_pointer + denoiser_.scratch_offset,
+                                   denoiser_.scratch_size));

  return true;
 }
@@ -1111,13 +1000,6 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
                                  const OptixBuildInput &build_input,
                                  uint16_t num_motion_steps)
 {
-  /* Allocate and build acceleration structures only one at a time, to prevent parallel builds
-   * from running out of memory (since both original and compacted acceleration structure memory
-   * may be allocated at the same time for the duration of this function). The builds would
-   * otherwise happen on the same CUDA stream anyway. */
-  static thread_mutex mutex;
-  thread_scoped_lock lock(mutex);
-
  const CUDAContextScope scope(this);

  const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
@@ -1143,14 +1025,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
  optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes));

  /* Allocate required output buffers. */
-  device_only_memory<char> temp_mem(this, "optix temp as build mem", true);
+  device_only_memory<char> temp_mem(this, "optix temp as build mem");
  temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8);
  if (!temp_mem.device_pointer) {
    /* Make sure temporary memory allocation succeeded. */
    return false;
  }

-  /* Acceleration structure memory has to be allocated on the device (not allowed on the host). */
  device_only_memory<char> &out_data = *bvh->as_data;
  if (operation == OPTIX_BUILD_OPERATION_BUILD) {
    assert(out_data.device == this);
@@ -1199,13 +1080,12 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,

    /* There is no point compacting if the size does not change. */
    if (compacted_size < sizes.outputSizeInBytes) {
-      device_only_memory<char> compacted_data(this, "optix compacted as", false);
+      device_only_memory<char> compacted_data(this, "optix compacted as");
      compacted_data.alloc_to_device(compacted_size);
-      if (!compacted_data.device_pointer) {
+      if (!compacted_data.device_pointer)
        /* Do not compact if memory allocation for compacted acceleration structure fails.
         * Can just use the uncompacted one then, so succeed here regardless. */
        return !have_error();
-      }

      optix_assert(optixAccelCompact(
          context, NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle));
@@ -1216,8 +1096,6 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,

      std::swap(out_data.device_size, compacted_data.device_size);
      std::swap(out_data.device_pointer, compacted_data.device_pointer);
-      /* Original acceleration structure memory is freed when 'compacted_data' goes out of scope.
-       */
    }
  }

@@ -1300,27 +1178,20 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
              int ka = max(k0 - 1, curve.first_key);
              int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1);

-              index_data[i] = i * 4;
-              float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
-
-#  if OPTIX_ABI_VERSION >= 55
-              v[0] = make_float4(keys[ka].x, keys[ka].y, keys[ka].z, curve_radius[ka]);
-              v[1] = make_float4(keys[k0].x, keys[k0].y, keys[k0].z, curve_radius[k0]);
-              v[2] = make_float4(keys[k1].x, keys[k1].y, keys[k1].z, curve_radius[k1]);
-              v[3] = make_float4(keys[kb].x, keys[kb].y, keys[kb].z, curve_radius[kb]);
-#  else
              const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x);
              const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y);
              const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z);
              const float4 pw = make_float4(
                  curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]);

-              /* Convert Catmull-Rom data to B-spline. */
+              /* Convert Catmull-Rom data to Bezier spline. */
              static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
              static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f;
              static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
              static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f;

+              index_data[i] = i * 4;
+              float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
              v[0] = make_float4(
                  dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw));
              v[1] = make_float4(
@@ -1329,7 +1200,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
                  dot(cr2bsp2, px), dot(cr2bsp2, py), dot(cr2bsp2, pz), dot(cr2bsp2, pw));
              v[3] = make_float4(
                  dot(cr2bsp3, px), dot(cr2bsp3, py), dot(cr2bsp3, pz), dot(cr2bsp3, pw));
-#  endif
            }
            else {
              BoundBox bounds = BoundBox::empty;
@@ -1371,11 +1241,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
      OptixBuildInput build_input = {};
      if (hair->curve_shape == CURVE_THICK) {
        build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES;
-#  if OPTIX_ABI_VERSION >= 55
-        build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
-#  else
        build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
-#  endif
        build_input.curveArray.numPrimitives = num_segments;
        build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
        build_input.curveArray.numVertices = num_vertices;
@@ -1389,7 +1255,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
      }
      else {
        /* Disable visibility test any-hit program, since it is already checked during
-         * intersection. Those trace calls that require any-hit can force it with a ray flag. */
+         * intersection. Those trace calls that require anyhit can force it with a ray flag. */
        build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;

        build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
@@ -1469,86 +1335,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
      build_input.triangleArray.numSbtRecords = 1;
      build_input.triangleArray.primitiveIndexOffset = mesh->prim_offset;

-      if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
-        progress.set_error("Failed to build OptiX acceleration structure");
-      }
-    }
-    else if (geom->geometry_type == Geometry::POINTCLOUD) {
-      /* Build BLAS for points primitives. */
-      PointCloud *const pointcloud = static_cast<PointCloud *const>(geom);
-      const size_t num_points = pointcloud->num_points();
-      if (num_points == 0) {
-        return;
-      }
-
-      size_t num_motion_steps = 1;
-      Attribute *motion_points = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-      if (motion_blur && pointcloud->get_use_motion_blur() && motion_points) {
-        num_motion_steps = pointcloud->get_motion_steps();
-      }
-
-      device_vector<OptixAabb> aabb_data(this, "optix temp aabb data", MEM_READ_ONLY);
-      aabb_data.alloc(num_points * num_motion_steps);
-
-      /* Get AABBs for each motion step. */
-      for (size_t step = 0; step < num_motion_steps; ++step) {
-        /* The center step for motion vertices is not stored in the attribute. */
-        const float3 *points = pointcloud->get_points().data();
-        const float *radius = pointcloud->get_radius().data();
-        size_t center_step = (num_motion_steps - 1) / 2;
-        if (step != center_step) {
-          size_t attr_offset = (step > center_step) ? step - 1 : step;
-          /* Technically this is a float4 array, but sizeof(float3) == sizeof(float4). */
-          points = motion_points->data_float3() + attr_offset * num_points;
-        }
-
-        for (size_t i = 0; i < num_points; ++i) {
-          const PointCloud::Point point = pointcloud->get_point(i);
-          BoundBox bounds = BoundBox::empty;
-          point.bounds_grow(points, radius, bounds);
-
-          const size_t index = step * num_points + i;
-          aabb_data[index].minX = bounds.min.x;
-          aabb_data[index].minY = bounds.min.y;
-          aabb_data[index].minZ = bounds.min.z;
-          aabb_data[index].maxX = bounds.max.x;
-          aabb_data[index].maxY = bounds.max.y;
-          aabb_data[index].maxZ = bounds.max.z;
-        }
-      }
-
-      /* Upload AABB data to GPU. */
-      aabb_data.copy_to_device();
-
-      vector<device_ptr> aabb_ptrs;
-      aabb_ptrs.reserve(num_motion_steps);
-      for (size_t step = 0; step < num_motion_steps; ++step) {
-        aabb_ptrs.push_back(aabb_data.device_pointer + step * num_points * sizeof(OptixAabb));
-      }
-
-      /* Disable visibility test any-hit program, since it is already checked during
-       * intersection. Those trace calls that require anyhit can force it with a ray flag.
-       * For those, force a single any-hit call, so shadow record-all behavior works correctly. */
-      unsigned int build_flags = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT |
-                                 OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
-      OptixBuildInput build_input = {};
-      build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
-#  if OPTIX_ABI_VERSION < 23
-      build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
-      build_input.aabbArray.numPrimitives = num_points;
-      build_input.aabbArray.strideInBytes = sizeof(OptixAabb);
-      build_input.aabbArray.flags = &build_flags;
-      build_input.aabbArray.numSbtRecords = 1;
-      build_input.aabbArray.primitiveIndexOffset = pointcloud->prim_offset;
-#  else
-      build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
-      build_input.customPrimitiveArray.numPrimitives = num_points;
-      build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb);
-      build_input.customPrimitiveArray.flags = &build_flags;
-      build_input.customPrimitiveArray.numSbtRecords = 1;
-      build_input.customPrimitiveArray.primitiveIndexOffset = pointcloud->prim_offset;
-#  endif
-
      if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
        progress.set_error("Failed to build OptiX acceleration structure");
      }
@@ -1636,22 +1422,9 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
          instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
        }
      }
-      else if (ob->get_geometry()->geometry_type == Geometry::POINTCLOUD) {
-        /* Use the hit group that has an intersection program for point clouds. */
-        instance.sbtOffset = PG_HITD_POINTCLOUD - PG_HITD;
-
-        /* Also skip point clouds in local trace calls. */
-        instance.visibilityMask |= 4;
-      }
-
-#  if OPTIX_ABI_VERSION < 55
-      /* Cannot disable any-hit program for thick curves, since it needs to filter out end-caps. */
-      else
-#  endif
-      {
+      else {
        /* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves,
         * since it needs to filter out end-caps there).
-
         * It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit
         * programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT.
         */
@@ -1721,6 +1494,9 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
        cuMemcpyHtoD(motion_transform_gpu, &motion_transform, motion_transform_size);
        delete[] reinterpret_cast<uint8_t *>(&motion_transform);

+        /* Disable instance transform if object uses motion transform already. */
+        instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
+
        /* Get traversable handle to motion transform. */
        optixConvertPointerToTraversableHandle(context,
                                               motion_transform_gpu,
@@ -1734,6 +1510,10 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
          /* Set transform matrix. */
          memcpy(instance.transform, &ob->get_tfm(), sizeof(instance.transform));
        }
+        else {
+          /* Disable instance transform if geometry already has it applied to vertex data. */
+          instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
+        }
      }
    }

--- a/intern/cycles/device/optix/device_impl.h
+++ b/intern/cycles/device/optix/device_impl.h
@@ -44,8 +44,6 @@ enum {
  PG_HITV, /* __VOLUME__ hit group. */
  PG_HITD_MOTION,
  PG_HITS_MOTION,
-  PG_HITD_POINTCLOUD,
-  PG_HITS_POINTCLOUD,
  PG_CALL_SVM_AO,
  PG_CALL_SVM_BEVEL,
  NUM_PROGRAM_GROUPS
@@ -54,9 +52,9 @@ enum {
 static const int MISS_PROGRAM_GROUP_OFFSET = PG_MISS;
 static const int NUM_MIS_PROGRAM_GROUPS = 1;
 static const int HIT_PROGAM_GROUP_OFFSET = PG_HITD;
-static const int NUM_HIT_PROGRAM_GROUPS = 8;
+static const int NUM_HIT_PROGRAM_GROUPS = 6;
 static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
-static const int NUM_CALLABLE_PROGRAM_GROUPS = 2;
+static const int NUM_CALLABLE_PROGRAM_GROUPS = 3;

 /* List of OptiX pipelines. */
 enum { PIP_SHADE_RAYTRACE, PIP_INTERSECT, NUM_PIPELINES };
@@ -100,11 +98,11 @@ class OptiXDevice : public CUDADevice {
    /* OptiX denoiser state and scratch buffers, stored in a single memory buffer.
     * The memory layout goes as following: [denoiser state][scratch buffer]. */
    device_only_memory<unsigned char> state;
-    OptixDenoiserSizes sizes = {};
+    size_t scratch_offset = 0;
+    size_t scratch_size = 0;

    bool use_pass_albedo = false;
    bool use_pass_normal = false;
-    bool use_pass_flow = false;
  };
  Denoiser denoiser_;

--- a/intern/cycles/device/optix/queue.cpp
+++ b/intern/cycles/device/optix/queue.cpp
@@ -47,9 +47,7 @@ static bool is_optix_specific_kernel(DeviceKernel kernel)
          kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
 }

-bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
-                               const int work_size,
-                               DeviceKernelArguments const &args)
+bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
 {
  if (!is_optix_specific_kernel(kernel)) {
    return CUDADeviceQueue::enqueue(kernel, work_size, args);
@@ -71,7 +69,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
  cuda_device_assert(
      cuda_device_,
      cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, path_index_array),
-                        args.values[0],  // &d_path_index
+                        args[0],  // &d_path_index
                        sizeof(device_ptr),
                        cuda_stream_));

@@ -80,7 +78,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
    cuda_device_assert(
        cuda_device_,
        cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
-                          args.values[1],  // &d_render_buffer
+                          args[1],  // &d_render_buffer
                          sizeof(device_ptr),
                          cuda_stream_));
  }
--- a/intern/cycles/device/optix/queue.h
+++ b/intern/cycles/device/optix/queue.h
@@ -31,9 +31,7 @@ class OptiXDeviceQueue : public CUDADeviceQueue {

  virtual void init_execution() override;

-  virtual bool enqueue(DeviceKernel kernel,
-                       const int work_size,
-                       DeviceKernelArguments const &args) override;
+  virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;
 };

 CCL_NAMESPACE_END
--- a/intern/cycles/device/queue.h
+++ b/intern/cycles/device/queue.h
@@ -19,7 +19,6 @@
 #include "device/kernel.h"

 #include "device/graphics_interop.h"
-#include "util/debug.h"
 #include "util/log.h"
 #include "util/map.h"
 #include "util/string.h"
@@ -32,74 +31,6 @@ class device_memory;

 struct KernelWorkTile;

-/* Container for device kernel arguments with type correctness ensured by API. */
-struct DeviceKernelArguments {
-
-  enum Type {
-    POINTER,
-    INT32,
-    FLOAT32,
-    BOOLEAN,
-    KERNEL_FILM_CONVERT,
-  };
-
-  static const int MAX_ARGS = 18;
-  Type types[MAX_ARGS];
-  void *values[MAX_ARGS];
-  size_t sizes[MAX_ARGS];
-  size_t count = 0;
-
-  DeviceKernelArguments()
-  {
-  }
-
-  template<class T> DeviceKernelArguments(const T *arg)
-  {
-    add(arg);
-  }
-
-  template<class T, class... Args> DeviceKernelArguments(const T *first, Args... args)
-  {
-    add(first);
-    add(args...);
-  }
-
-  void add(const KernelFilmConvert *value)
-  {
-    add(KERNEL_FILM_CONVERT, value, sizeof(KernelFilmConvert));
-  }
-  void add(const device_ptr *value)
-  {
-    add(POINTER, value, sizeof(device_ptr));
-  }
-  void add(const int32_t *value)
-  {
-    add(INT32, value, sizeof(int32_t));
-  }
-  void add(const float *value)
-  {
-    add(FLOAT32, value, sizeof(float));
-  }
-  void add(const bool *value)
-  {
-    add(BOOLEAN, value, 4);
-  }
-  void add(const Type type, const void *value, size_t size)
-  {
-    assert(count < MAX_ARGS);
-
-    types[count] = type;
-    values[count] = (void *)value;
-    sizes[count] = size;
-    count++;
-  }
-  template<typename T, typename... Args> void add(const T *first, Args... args)
-  {
-    add(first);
-    add(args...);
-  }
-};
-
 /* Abstraction of a command queue for a device.
 * Provides API to schedule kernel execution in a specific queue with minimal possible overhead
 * from driver side.
@@ -135,9 +66,7 @@ class DeviceQueue {
   * - int: pass pointer to the int
   * - device memory: pass pointer to device_memory.device_pointer
   * Return false if there was an error executing this or a previous kernel. */
-  virtual bool enqueue(DeviceKernel kernel,
-                       const int work_size,
-                       DeviceKernelArguments const &args) = 0;
+  virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) = 0;

  /* Wait unit all enqueued kernels have finished execution.
   * Return false if there was an error executing any of the enqueued kernels. */
--- a/intern/cycles/graph/node.h
+++ b/intern/cycles/graph/node.h
@@ -31,7 +31,7 @@ struct Node;
 struct NodeType;
 struct Transform;

-/* NOTE: in the following macros we use "type const &" instead of "const type &"
+/* Note: in the following macros we use "type const &" instead of "const type &"
 * to avoid issues when pasting a pointer type. */
 #define NODE_SOCKET_API_BASE_METHODS(type_, name, string_name) \
  const SocketType *get_##name##_socket() const \
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Erik Abrahamsson	5be8cc6556	FIx: Change to instance domain	2021-11-24 01:29:45 +01:00
Erik Abrahamsson	c853826ed1	Merge branch 'master' into temp-geometry-nodes-text	2021-11-23 17:59:13 +01:00
Erik Abrahamsson	3aab18f0ae	Add default value to String to Curves string input	2021-10-29 16:59:07 +02:00
Erik Abrahamsson	423a931ce5	Rename socket "Curves" to "Curve Instances" on String to Curves-node	2021-10-29 16:58:30 +02:00
Erik Abrahamsson	be109b60e7	Fix typo Curve -> Curves in socket name	2021-10-29 16:57:27 +02:00
Erik Abrahamsson	98df4c4040	Add Line and Pivot outputs to String to Curves	2021-10-29 16:56:20 +02:00
Erik Abrahamsson	43a56ea1e7	Initial support for custom attributes on instances	2021-10-29 16:54:05 +02:00