cleanups

avoid some unnecessary changes
add comment in ModifierTypeInfo
2020-03-08 12:39:19 +01:00 · 2020-03-08 12:29:24 +01:00 · 2020-03-08 12:24:51 +01:00 · 2020-03-08 12:16:33 +01:00 · 2020-03-08 12:13:44 +01:00 · 2020-03-08 12:09:49 +01:00
1679 changed files with 44193 additions and 63531 deletions
--- a/.arcconfig
+++ b/.arcconfig
@@ -1,6 +1,7 @@
 {
 	"project_id" : "Blender",
 	"conduit_uri" : "https://developer.blender.org/",
+	"phabricator.uri" : "https://developer.blender.org/",
 	"git.default-relative-commit" : "origin/master",
 	"arc.land.update.default" : "rebase",
 	"arc.land.onto.default" : "master"
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -180,6 +180,15 @@ option(WITH_BULLET        "Enable Bullet (Physics Engine)" ON)
 option(WITH_SYSTEM_BULLET "Use the systems bullet library (currently unsupported due to missing features in upstream!)" )
 mark_as_advanced(WITH_SYSTEM_BULLET)
 option(WITH_OPENCOLORIO   "Enable OpenColorIO color management" ON)
+if(APPLE)
+  # There's no OpenXR runtime in sight for macOS, neither is code well
+  # tested there -> disable it by default.
+  option(WITH_XR_OPENXR   "Enable VR features through the OpenXR specification" OFF)
+  mark_as_advanced(WITH_XR_OPENXR)
+else()
+  # Disabled until there's more than just the build system stuff. Should be enabled soon.
+  option(WITH_XR_OPENXR   "Enable VR features through the OpenXR specification" OFF)
+endif()

 # Compositor
 option(WITH_COMPOSITOR         "Enable the tile based nodal compositor" ON)
@@ -633,6 +642,9 @@ set_and_warn_dependency(WITH_TBB WITH_OPENIMAGEDENOISE  OFF)
 set_and_warn_dependency(WITH_TBB WITH_OPENVDB           OFF)
 set_and_warn_dependency(WITH_TBB WITH_MOD_FLUID         OFF)

+# OpenVDB uses 'half' type from OpenEXR & fails to link without OpenEXR enabled.
+set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_OPENVDB OFF)
+
 # auto enable openimageio for cycles
 if(WITH_CYCLES)
  set(WITH_OPENIMAGEIO ON)
@@ -1695,6 +1707,7 @@ if(FIRST_RUN)
  info_cfg_option(WITH_CYCLES)
  info_cfg_option(WITH_FREESTYLE)
  info_cfg_option(WITH_OPENCOLORIO)
+  info_cfg_option(WITH_XR_OPENXR)
  info_cfg_option(WITH_OPENIMAGEDENOISE)
  info_cfg_option(WITH_OPENVDB)
  info_cfg_option(WITH_ALEMBIC)
--- a/build_files/build_environment/CMakeLists.txt
+++ b/build_files/build_environment/CMakeLists.txt
@@ -99,6 +99,7 @@ else()
 endif()
 include(cmake/openimagedenoise.cmake)
 include(cmake/embree.cmake)
+include(cmake/xr_openxr.cmake)

 if(WITH_WEBP)
  include(cmake/webp.cmake)
--- a/build_files/build_environment/cmake/harvest.cmake
+++ b/build_files/build_environment/cmake/harvest.cmake
@@ -161,6 +161,8 @@ harvest(opensubdiv/include opensubdiv/include "*.h")
 harvest(opensubdiv/lib opensubdiv/lib "*.a")
 harvest(openvdb/include/openvdb openvdb/include/openvdb "*.h")
 harvest(openvdb/lib openvdb/lib "*.a")
+harvest(xr_openxr_sdk/include/openxr xr_openxr_sdk/include/openxr "*.h")
+harvest(xr_openxr_sdk/lib xr_openxr_sdk/src/loader "*.a")
 harvest(osl/bin osl/bin "oslc")
 harvest(osl/include osl/include "*.h")
 harvest(osl/lib osl/lib "*.a")
--- a/build_files/build_environment/cmake/versions.cmake
+++ b/build_files/build_environment/cmake/versions.cmake
@@ -318,3 +318,7 @@ set(LIBGLU_HASH 151aef599b8259efe9acd599c96ea2a3)
 set(MESA_VERSION 18.3.1)
 set(MESA_URI ftp://ftp.freedesktop.org/pub/mesa//mesa-${MESA_VERSION}.tar.xz)
 set(MESA_HASH d60828056d77bfdbae0970f9b15fb1be)
+
+set(XR_OPENXR_SDK_VERSION 1.0.6)
+set(XR_OPENXR_SDK_URI https://github.com/KhronosGroup/OpenXR-SDK/archive/release-${XR_OPENXR_SDK_VERSION}.tar.gz)
+set(XR_OPENXR_SDK_HASH 21daea7c3bfec365298d779a0e19caa1)
--- a/build_files/build_environment/cmake/xr_openxr.cmake
+++ b/build_files/build_environment/cmake/xr_openxr.cmake
@@ -0,0 +1,58 @@
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# ***** END GPL LICENSE BLOCK *****
+
+
+# Keep flags in sync with install_deps.sh ones in compile_XR_OpenXR_SDK()
+set(XR_OPENXR_SDK_EXTRA_ARGS
+  -DBUILD_FORCE_GENERATION=OFF
+  -DBUILD_LOADER=ON
+  -DDYNAMIC_LOADER=OFF
+)
+
+if(UNIX AND NOT APPLE)
+  list(APPEND XR_OPENXR_SDK_EXTRA_ARGS
+    -DBUILD_WITH_WAYLAND_HEADERS=OFF
+    -DBUILD_WITH_XCB_HEADERS=OFF
+    -DBUILD_WITH_XLIB_HEADERS=ON
+  )
+endif()
+
+ExternalProject_Add(external_xr_openxr_sdk
+  URL ${XR_OPENXR_SDK_URI}
+  DOWNLOAD_DIR ${DOWNLOAD_DIR}
+  URL_HASH MD5=${XR_OPENXR_SDK_HASH}
+  PREFIX ${BUILD_DIR}/xr_openxr_sdk
+  CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/xr_openxr_sdk ${DEFAULT_CMAKE_FLAGS} ${XR_OPENXR_SDK_EXTRA_ARGS}
+  INSTALL_DIR ${LIBDIR}/xr_openxr_sdk
+)
+
+if(WIN32)
+  if(BUILD_MODE STREQUAL Release)
+    ExternalProject_Add_Step(external_xr_openxr_sdk after_install
+      COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/xr_openxr_sdk/include/openxr ${HARVEST_TARGET}/xr_openxr_sdk/include/openxr
+      COMMAND ${CMAKE_COMMAND} -E copy_directory ${LIBDIR}/xr_openxr_sdk/lib ${HARVEST_TARGET}/xr_openxr_sdk/lib
+      DEPENDEES install
+    )
+  endif()
+  if(BUILD_MODE STREQUAL Debug)
+    ExternalProject_Add_Step(external_xr_openxr_sdk after_install
+      COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/xr_openxr_sdk/lib/openxr_loader.lib ${HARVEST_TARGET}/xr_openxr_sdk/lib/openxr_loader_d.lib
+      DEPENDEES install
+    )
+  endif()
+endif()
--- a/build_files/build_environment/install_deps.sh
+++ b/build_files/build_environment/install_deps.sh
@@ -52,16 +52,19 @@ getopt \
 -o s:i:t:h \
 --long source:,install:,tmp:,info:,threads:,help,show-deps,no-sudo,no-build,no-confirm,\
 with-all,with-opencollada,with-jack,with-embree,with-oidn,\
-ver-ocio:,ver-oiio:,ver-llvm:,ver-osl:,ver-osd:,ver-openvdb:,\
+ver-ocio:,ver-oiio:,ver-llvm:,ver-osl:,ver-osd:,ver-openvdb:,ver-xr-openxr:,\
 force-all,force-python,force-numpy,force-boost,\
 force-ocio,force-openexr,force-oiio,force-llvm,force-osl,force-osd,force-openvdb,\
 force-ffmpeg,force-opencollada,force-alembic,force-embree,force-oidn,force-usd,\
+force-xr-openxr,\
 build-all,build-python,build-numpy,build-boost,\
 build-ocio,build-openexr,build-oiio,build-llvm,build-osl,build-osd,build-openvdb,\
 build-ffmpeg,build-opencollada,build-alembic,build-embree,build-oidn,build-usd,\
+build-xr-openxr,\
 skip-python,skip-numpy,skip-boost,\
 skip-ocio,skip-openexr,skip-oiio,skip-llvm,skip-osl,skip-osd,skip-openvdb,\
-skip-ffmpeg,skip-opencollada,skip-alembic,skip-embree,skip-oidn,skip-usd \
+skip-ffmpeg,skip-opencollada,skip-alembic,skip-embree,skip-oidn,skip-usd, \
+skip-xr-openxr\
 -- "$@" \
 )

@@ -169,6 +172,9 @@ ARGUMENTS_INFO="\"COMMAND LINE ARGUMENTS:
    --ver-openvdb=<ver>
        Force version of OpenVDB library.

+    --ver-xr-openxr=<ver>
+        Force version of OpenXR-SDK.
+
    Note about the --ver-foo options:
        It may not always work as expected (some libs are actually checked out from a git rev...), yet it might help
        to fix some build issues (like LLVM mismatch with the version used by your graphic system).
@@ -224,6 +230,9 @@ ARGUMENTS_INFO="\"COMMAND LINE ARGUMENTS:
    --build-usd
        Force the build of Universal Scene Description.

+    --build-xr-openxr
+        Force the build of OpenXR-SDK.
+
    Note about the --build-foo options:
        * They force the script to prefer building dependencies rather than using available packages.
          This may make things simpler and allow working around some distribution bugs, but on the other hand it will
@@ -285,6 +294,9 @@ ARGUMENTS_INFO="\"COMMAND LINE ARGUMENTS:
    --force-usd
        Force the rebuild of Universal Scene Description.

+    --force-xr-openxr
+        Force the rebuild of OpenXR-SDK.
+
    Note about the --force-foo options:
        * They obviously only have an effect if those libraries are built by this script
          (i.e. if there is no available and satisfactory package)!
@@ -337,7 +349,10 @@ ARGUMENTS_INFO="\"COMMAND LINE ARGUMENTS:
        Unconditionally skip FFMpeg installation/building.

    --skip-usd
-        Unconditionally skip Universal Scene Description installation/building.\""
+        Unconditionally skip Universal Scene Description installation/building.
+
+    --skip-xr-openxr
+        Unconditionally skip OpenXR-SDK installation/building.\""

 # ----------------------------------------------------------------------------
 # Main Vars
@@ -454,6 +469,11 @@ FFMPEG_FORCE_REBUILD=false
 FFMPEG_SKIP=false
 _ffmpeg_list_sep=";"

+XR_OPENXR_VERSION="1.0.6"
+XR_OPENXR_FORCE_BUILD=false
+XR_OPENXR_FORCE_REBUILD=false
+XR_OPENXR_SKIP=false
+
 # FFMPEG optional libs.
 VORBIS_USE=false
 VORBIS_DEV=""
@@ -624,6 +644,11 @@ while true; do
      OPENVDB_VERSION_MIN=$OPENVDB_VERSION
      shift; shift; continue
    ;;
+    --ver-xr-openxr)
+      XR_OPENXR_VERSION="$2"
+      XR_OPENXR_VERSION_MIN=$XR_OPENXR_VERSION
+      shift; shift; continue
+    ;;
    --build-all)
      PYTHON_FORCE_BUILD=true
      NUMPY_FORCE_BUILD=true
@@ -641,6 +666,7 @@ while true; do
      FFMPEG_FORCE_BUILD=true
      ALEMBIC_FORCE_BUILD=true
      USD_FORCE_BUILD=true
+      XR_OPENXR_FORCE_BUILD=true
      shift; continue
    ;;
    --build-python)
@@ -695,6 +721,9 @@ while true; do
    --build-usd)
      USD_FORCE_BUILD=true; shift; continue
    ;;
+    --build-xr-openxr)
+      XR_OPENXR_FORCE_BUILD=true; shift; continue
+    ;;
    --force-all)
      PYTHON_FORCE_REBUILD=true
      NUMPY_FORCE_REBUILD=true
@@ -712,6 +741,7 @@ while true; do
      FFMPEG_FORCE_REBUILD=true
      ALEMBIC_FORCE_REBUILD=true
      USD_FORCE_REBUILD=true
+      XR_OPENXR_FORCE_REBUILD=true
      shift; continue
    ;;
    --force-python)
@@ -764,6 +794,9 @@ while true; do
    --force-usd)
      USD_FORCE_REBUILD=true; shift; continue
    ;;
+    --force-xr-openxr)
+      XR_OPENXR_FORCE_REBUILD=true; shift; continue
+    ;;
    --skip-python)
      PYTHON_SKIP=true; shift; continue
    ;;
@@ -812,6 +845,9 @@ while true; do
    --skip-usd)
      USD_SKIP=true; shift; continue
    ;;
+    --skip-xr-openxr)
+      XR_OPENXR_SKIP=true; shift; continue
+    ;;
    --)
      # no more arguments to parse
      break
@@ -940,6 +976,12 @@ OIDN_SOURCE=( "https://github.com/OpenImageDenoise/oidn/releases/download/v${OID

 FFMPEG_SOURCE=( "http://ffmpeg.org/releases/ffmpeg-$FFMPEG_VERSION.tar.bz2" )

+XR_OPENXR_USE_REPO=false
+XR_OPENXR_SOURCE=("https://github.com/KhronosGroup/OpenXR-SDK/archive/release-${XR_OPENXR_VERSION}.tar.gz")
+#~ XR_OPENXR_SOURCE_REPO=("https://github.com/KhronosGroup/OpenXR-SDK-Source.git")
+#~ XR_OPENXR_REPO_UID="5292e57fda47561e672fba0a4b6e545c0f25dd8d"
+#~ XR_OPENXR_REPO_BRANCH="master"
+
 # C++11 is required now
 CXXFLAGS_BACK=$CXXFLAGS
 CXXFLAGS="$CXXFLAGS -std=c++11"
@@ -982,7 +1024,8 @@ You may also want to build them yourself (optional ones are [between brackets]):
    * [Embree $EMBREE_VERSION] (from $EMBREE_SOURCE).
    * [OpenImageDenoise $OIDN_VERSION] (from $OIDN_SOURCE).
    * [Alembic $ALEMBIC_VERSION] (from $ALEMBIC_SOURCE).
-    * [Universal Scene Description $USD_VERSION] (from $USD_SOURCE).\""
+    * [Universal Scene Description $USD_VERSION] (from $USD_SOURCE).
+    * [OpenXR-SDK $XR_OPENXR_VERSION] (from $XR_OPENXR_SOURCE).\""

 if [ "$DO_SHOW_DEPS" = true ]; then
  PRINT ""
@@ -3058,6 +3101,116 @@ compile_FFmpeg() {
  fi
 }

+# ----------------------------------------------------------------------------
+# Build OpenXR SDK
+
+_init_xr_openxr_sdk() {
+  _src=$SRC/XR-OpenXR-SDK-$XR_OPENXR_VERSION
+  _git=true
+  _inst=$INST/xr-openxr-sdk-$XR_OPENXR_VERSION
+  _inst_shortcut=$INST/xr-openxr-sdk
+}
+
+_update_deps_xr_openxr_sdk() {
+  :
+}
+
+clean_XR_OpenXR_SDK() {
+  _init_xr_openxr_sdk
+  _clean
+  _update_deps_xr_openxr_sdk
+}
+
+compile_XR_OpenXR_SDK() {
+  if [ "$NO_BUILD" = true ]; then
+    WARNING "--no-build enabled, OpenXR will not be compiled!"
+    return
+  fi
+
+  # To be changed each time we make edits that would modify the compiled result!
+  xr_openxr_magic=0
+  _init_xr_openxr_sdk
+
+  # Clean install if needed!
+  magic_compile_check xr-openxr-$OPENXR_VERSION $xr_openxr_magic
+  if [ $? -eq 1 -o "$XR_OPENXR_FORCE_REBUILD" = true ]; then
+    clean_XR_OpenXR_SDK
+  fi
+
+  if [ ! -d $_inst ]; then
+    INFO "Building XR-OpenXR-SDK-$XR_OPENXR_VERSION"
+    _is_building=true
+
+    # Rebuild dependencies as well!
+    _update_deps_xr_openxr_sdk
+
+    prepare_opt
+
+    if [ ! -d $_src ]; then
+      mkdir -p $SRC
+
+      if [ "$XR_OPENXR_USE_REPO" = true ]; then
+        git clone $XR_OPENXR_SOURCE_REPO $_src
+      else
+        download XR_OPENXR_SOURCE[@] "$_src.tar.gz"
+        INFO "Unpacking XR-OpenXR-SDK-$XR_OPENXR_VERSION"
+        tar -C $SRC --transform "s,(.*/?)OpenXR-SDK-[^/]*(.*),\1XR-OpenXR-SDK-$XR_OPENXR_VERSION\2,x" \
+            -xf $_src.tar.gz
+      fi
+    fi
+
+    cd $_src
+
+    if [ "$XR_OPENXR_USE_REPO" = true ]; then
+      git pull origin $XR_OPENXR_REPO_BRANCH
+
+      # Stick to same rev as windows' libs...
+      git checkout $XR_OPENXR_REPO_UID
+      git reset --hard
+    fi
+
+    # Always refresh the whole build!
+    if [ -d build ]; then
+      rm -rf build
+    fi
+    mkdir build
+    cd build
+
+    # Keep flags in sync with XR_OPENXR_SDK_EXTRA_ARGS in xr_openxr.cmake!
+    cmake_d="-D CMAKE_BUILD_TYPE=Release"
+    cmake_d="$cmake_d -D CMAKE_INSTALL_PREFIX=$_inst"
+    cmake_d="$cmake_d -D BUILD_FORCE_GENERATION=OFF"
+    cmake_d="$cmake_d -D BUILD_LOADER=ON"
+    cmake_d="$cmake_d -D DYNAMIC_LOADER=OFF"
+    cmake_d="$cmake_d -D BUILD_WITH_WAYLAND_HEADERS=OFF"
+    cmake_d="$cmake_d -D BUILD_WITH_XCB_HEADERS=OFF"
+    cmake_d="$cmake_d -D BUILD_WITH_XLIB_HEADERS=ON"
+
+    cmake $cmake_d ..
+
+    make -j$THREADS && make install
+    make clean
+
+    if [ -d $_inst ]; then
+      _create_inst_shortcut
+    else
+      ERROR "XR-OpenXR-SDK-$XR_OPENXR_VERSION failed to compile, exiting"
+      exit 1
+    fi
+
+    magic_compile_set xr-openxr-$XR_OPENXR_VERSION $xr_openxr_magic
+
+    cd $CWD
+    INFO "Done compiling XR-OpenXR-SDK-$XR_OPENXR_VERSION!"
+    _is_building=false
+  else
+    INFO "Own XR-OpenXR-SDK-$XR_OPENXR_VERSION is up to date, nothing to do!"
+    INFO "If you want to force rebuild of this lib, use the --force-xr-openxr option."
+  fi
+
+  run_ldconfig "xr-openxr-sdk"
+}
+

 # ----------------------------------------------------------------------------
 # Install on DEB-like
@@ -3602,6 +3755,18 @@ install_DEB() {
      compile_FFmpeg
    fi
  fi
+
+  PRINT ""
+  if [ "$XR_OPENXR_SKIP" = true ]; then
+    WARNING "Skipping OpenXR-SDK installation, as requested..."
+  elif [ "$XR_OPENXR_FORCE_BUILD" = true ]; then
+    INFO "Forced OpenXR-SDK building, as requested..."
+    compile_XR_OpenXR_SDK
+  else
+    # No package currently!
+    PRINT ""
+    compile_XR_OpenXR_SDK
+  fi
 }


@@ -4208,6 +4373,17 @@ install_RPM() {
      compile_FFmpeg
    fi
  fi
+
+  PRINT ""
+  if [ "$XR_OPENXR_SKIP" = true ]; then
+    WARNING "Skipping OpenXR-SDK installation, as requested..."
+  elif [ "$XR_OPENXR_FORCE_BUILD" = true ]; then
+    INFO "Forced OpenXR-SDK building, as requested..."
+    compile_XR_OpenXR_SDK
+  else
+    # No package currently!
+    compile_XR_OpenXR_SDK
+  fi
 }


@@ -4709,6 +4885,17 @@ install_ARCH() {
      compile_FFmpeg
    fi
  fi
+
+  PRINT ""
+  if [ "$XR_OPENXR_SKIP" = true ]; then
+    WARNING "Skipping OpenXR-SDK installation, as requested..."
+  elif [ "$XR_OPENXR_FORCE_BUILD" = true ]; then
+    INFO "Forced OpenXR-SDK building, as requested..."
+    compile_XR_OpenXR_SDK
+  else
+    # No package currently!
+    compile_XR_OpenXR_SDK
+  fi
 }


@@ -4906,6 +5093,17 @@ install_OTHER() {
    INFO "Forced FFMpeg building, as requested..."
    compile_FFmpeg
  fi
+
+  PRINT ""
+  if [ "$XR_OPENXR_SKIP" = true ]; then
+    WARNING "Skipping OpenXR-SDK installation, as requested..."
+  elif [ "$XR_OPENXR_FORCE_BUILD" = true ]; then
+    INFO "Forced OpenXR-SDK building, as requested..."
+    compile_XR_OpenXR_SDK
+  else
+    # No package currently!
+    compile_XR_OpenXR_SDK
+  fi
 }

 # ----------------------------------------------------------------------------
@@ -5174,6 +5372,17 @@ print_info() {
    fi
  fi

+  if [ "$XR_OPENXR_SKIP" = false ]; then
+    _1="-D WITH_XR_OPENXR=ON"
+    PRINT "  $_1"
+    _buildargs="$_buildargs $_1"
+    if [ -d $INST/xr-openxr-sdk ]; then
+      _1="-D XR_OPENXR_ROOT_DIR=$INST/xr-openxr-sdk"
+      PRINT "  $_1"
+      _buildargs="$_buildargs $_1"
+    fi
+  fi
+
  PRINT ""
  PRINT "Or even simpler, just run (in your blender-source dir):"
  PRINT "  make -j$THREADS BUILD_CMAKE_ARGS=\"$_buildargs\""
--- a/build_files/cmake/Modules/FindLLVM.cmake
+++ b/build_files/cmake/Modules/FindLLVM.cmake
@@ -49,7 +49,6 @@ if(NOT LLVM_ROOT_DIR)
          OUTPUT_VARIABLE LLVM_ROOT_DIR
          OUTPUT_STRIP_TRAILING_WHITESPACE)
  set(LLVM_ROOT_DIR ${LLVM_ROOT_DIR} CACHE PATH "Path to the LLVM installation")
-  set(LLVM_INCLUDE_DIRS ${LLVM_ROOT_DIR}/include CACHE PATH "Path to the LLVM include directory")
 endif()
 if(NOT LLVM_LIBPATH)
  execute_process(COMMAND ${LLVM_CONFIG} --libdir
--- a/build_files/cmake/Modules/FindXR-OpenXR-SDK.cmake
+++ b/build_files/cmake/Modules/FindXR-OpenXR-SDK.cmake
@@ -0,0 +1,73 @@
+# - Find OpenXR-SDK libraries
+# Find the native OpenXR-SDK includes and libraries
+#
+# Note that there is a distinction between the OpenXR standard and the SDK. The
+# latter provides utilities to use the standard but is not part of it. Most
+# importantly, it contains C headers and a loader library, which manages
+# dynamic linking to OpenXR runtimes like Monado, Windows Mixed Reality or
+# Oculus. See the repository for more details:
+# https://github.com/KhronosGroup/OpenXR-SDK
+#
+# This module defines
+#  XR_OPENXR_SDK_INCLUDE_DIRS, where to find OpenXR-SDK headers, Set when
+#                           XR_OPENXR_SDK_INCLUDE_DIR is found.
+#  XR_OPENXR_SDK_LIBRARIES, libraries to link against to use OpenXR.
+#  XR_OPENXR_SDK_ROOT_DIR, the base directory to search for OpenXR-SDK.
+#                        This can also be an environment variable.
+#  XR_OPENXR_SDK_FOUND, if false, do not try to use OpenXR-SDK.
+#
+# also defined, but not for general use are
+#  XR_OPENXR_SDK_LOADER_LIBRARY, where to find the OpenXR-SDK loader library.
+
+#=============================================================================
+# Distributed under the OSI-approved BSD License (the "License");
+# see accompanying file Copyright.txt for details.
+#
+# This software is distributed WITHOUT ANY WARRANTY; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the License for more information.
+#=============================================================================
+
+# If XR_OPENXR_SDK_ROOT_DIR was defined in the environment, use it.
+IF(NOT XR_OPENXR_SDK_ROOT_DIR AND NOT $ENV{XR_OPENXR_SDK_ROOT_DIR} STREQUAL "")
+  SET(XR_OPENXR_SDK_ROOT_DIR $ENV{XR_OPENXR_SDK_ROOT_DIR})
+ENDIF()
+
+SET(_xr_openxr_sdk_SEARCH_DIRS
+  ${XR_OPENXR_SDK_ROOT_DIR}
+  /opt/lib/xr-openxr-sdk
+)
+
+FIND_PATH(XR_OPENXR_SDK_INCLUDE_DIR
+  NAMES
+    openxr/openxr.h
+  HINTS
+    ${_xr_openxr_sdk_SEARCH_DIRS}
+  PATH_SUFFIXES
+    include
+)
+
+FIND_LIBRARY(XR_OPENXR_SDK_LOADER_LIBRARY
+  NAMES
+    openxr_loader
+  HINTS
+    ${_xr_openxr_sdk_SEARCH_DIRS}
+  PATH_SUFFIXES
+    lib64 lib
+)
+
+# handle the QUIETLY and REQUIRED arguments and set XR_OPENXR_SDK_FOUND to TRUE if
+# all listed variables are TRUE
+INCLUDE(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(XR_OPENXR_SDK DEFAULT_MSG
+    XR_OPENXR_SDK_LOADER_LIBRARY XR_OPENXR_SDK_INCLUDE_DIR)
+
+IF(XR_OPENXR_SDK_FOUND)
+  SET(XR_OPENXR_SDK_LIBRARIES ${XR_OPENXR_SDK_LOADER_LIBRARY})
+  SET(XR_OPENXR_SDK_INCLUDE_DIRS ${XR_OPENXR_SDK_INCLUDE_DIR})
+ENDIF(XR_OPENXR_SDK_FOUND)
+
+MARK_AS_ADVANCED(
+  XR_OPENXR_SDK_INCLUDE_DIR
+  XR_OPENXR_SDK_LOADER_LIBRARY
+)
--- a/build_files/cmake/config/blender_full.cmake
+++ b/build_files/cmake/config/blender_full.cmake
@@ -61,3 +61,6 @@ if(UNIX AND NOT APPLE)
  set(WITH_X11_XINPUT          ON  CACHE BOOL "" FORCE)
  set(WITH_X11_XF86VMODE       ON  CACHE BOOL "" FORCE)
 endif()
+if(NOT APPLE)
+  set(WITH_XR_OPENXR           ON  CACHE BOOL "" FORCE)
+endif()
--- a/build_files/cmake/config/blender_lite.cmake
+++ b/build_files/cmake/config/blender_lite.cmake
@@ -44,6 +44,7 @@ set(WITH_OPENAL              OFF CACHE BOOL "" FORCE)
 set(WITH_OPENCOLLADA         OFF CACHE BOOL "" FORCE)
 set(WITH_OPENCOLORIO         OFF CACHE BOOL "" FORCE)
 set(WITH_OPENIMAGEDENOISE    OFF CACHE BOOL "" FORCE)
+set(WITH_XR_OPENXR           OFF CACHE BOOL "" FORCE)
 set(WITH_OPENIMAGEIO         OFF CACHE BOOL "" FORCE)
 set(WITH_OPENMP              OFF CACHE BOOL "" FORCE)
 set(WITH_OPENSUBDIV          OFF CACHE BOOL "" FORCE)
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -65,3 +65,6 @@ if(UNIX AND NOT APPLE)
  set(WITH_X11_XINPUT          ON  CACHE BOOL "" FORCE)
  set(WITH_X11_XF86VMODE       ON  CACHE BOOL "" FORCE)
 endif()
+if(NOT APPLE)
+  set(WITH_XR_OPENXR              ON  CACHE BOOL "" FORCE)
+endif()
--- a/build_files/cmake/macros.cmake
+++ b/build_files/cmake/macros.cmake
@@ -122,7 +122,7 @@ function(target_link_libraries_optimized
  )

  foreach(_LIB ${LIBS})
-    target_link_libraries(${TARGET} optimized "${_LIB}")
+    target_link_libraries(${TARGET} INTERFACE optimized "${_LIB}")
  endforeach()
 endfunction()

@@ -132,7 +132,7 @@ function(target_link_libraries_debug
  )

  foreach(_LIB ${LIBS})
-    target_link_libraries(${TARGET} debug "${_LIB}")
+    target_link_libraries(${TARGET} INTERFACE debug "${_LIB}")
  endforeach()
 endfunction()

@@ -170,6 +170,7 @@ function(blender_include_dirs_sys
 endfunction()

 function(blender_source_group
+  name
  sources
  )

@@ -205,6 +206,13 @@ function(blender_source_group
      source_group("${GROUP_ID}" FILES ${_SRC})
    endforeach()
  endif()
+
+  # if enabled, set the FOLDER property for visual studio projects
+  if(WINDOWS_USE_VISUAL_STUDIO_PROJECT_FOLDERS)
+    get_filename_component(FolderDir ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
+    string(REPLACE ${CMAKE_SOURCE_DIR} "" FolderDir ${FolderDir})
+    set_target_properties(${name} PROPERTIES FOLDER ${FolderDir})
+  endif()
 endfunction()


@@ -295,11 +303,11 @@ function(blender_add_lib__impl
        set(next_library_mode "${library_lower}")
      else()
        if("${next_library_mode}" STREQUAL "optimized")
-          target_link_libraries(${name} optimized ${library})
+          target_link_libraries(${name} INTERFACE optimized ${library})
        elseif("${next_library_mode}" STREQUAL "debug")
-          target_link_libraries(${name} debug ${library})
+          target_link_libraries(${name} INTERFACE debug ${library})
        else()
-          target_link_libraries(${name} ${library})
+          target_link_libraries(${name} INTERFACE ${library})
        endif()
        set(next_library_mode "")
      endif()
@@ -308,14 +316,7 @@ function(blender_add_lib__impl

  # works fine without having the includes
  # listed is helpful for IDE's (QtCreator/MSVC)
-  blender_source_group("${sources}")
-
-  # if enabled, set the FOLDER property for visual studio projects
-  if(WINDOWS_USE_VISUAL_STUDIO_PROJECT_FOLDERS)
-    get_filename_component(FolderDir ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
-    string(REPLACE ${CMAKE_SOURCE_DIR} "" FolderDir ${FolderDir})
-    set_target_properties(${name} PROPERTIES FOLDER ${FolderDir})
-  endif()
+  blender_source_group("${name}" "${sources}")

  list_assert_duplicates("${sources}")
  list_assert_duplicates("${includes}")
@@ -946,7 +947,7 @@ function(data_to_c_simple
  set_source_files_properties(${_file_to} PROPERTIES GENERATED TRUE)
 endfunction()

-# macro for converting pixmap directory to a png and then a c file
+# Function for converting pixmap directory to a '.png' and then a '.c' file.
 function(data_to_c_simple_icons
  path_from icon_prefix icon_names
  list_to_add
@@ -1159,12 +1160,12 @@ macro(blender_precompile_headers target cpp header)
 endmacro()

 macro(set_and_warn_dependency
-    _dependency _setting _val)
-    # when $_dependency is disabled, forces $_setting = $_val
-    if(NOT ${${_dependency}} AND ${${_setting}})
-      message(STATUS "'${_dependency}' is disabled: forcing 'set(${_setting} ${_val})'")
-      set(${_setting} ${_val})
-    endif()
+  _dependency _setting _val)
+  # when $_dependency is disabled, forces $_setting = $_val
+  if(NOT ${${_dependency}} AND ${${_setting}})
+    message(STATUS "'${_dependency}' is disabled: forcing 'set(${_setting} ${_val})'")
+    set(${_setting} ${_val})
+  endif()
 endmacro()

 macro(without_system_libs_begin)
--- a/build_files/cmake/platform/platform_apple.cmake
+++ b/build_files/cmake/platform/platform_apple.cmake
@@ -411,11 +411,21 @@ if(WITH_OPENMP)

    # Copy libomp.dylib to allow executables like datatoc and tests to work.
    execute_process(
-        COMMAND mkdir -p ${CMAKE_BINARY_DIR}/Resources/lib
-        COMMAND cp -p ${LIBDIR}/openmp/lib/libomp.dylib ${CMAKE_BINARY_DIR}/Resources/lib/libomp.dylib)
+      COMMAND mkdir -p ${CMAKE_BINARY_DIR}/Resources/lib
+      COMMAND cp -p ${LIBDIR}/openmp/lib/libomp.dylib ${CMAKE_BINARY_DIR}/Resources/lib/libomp.dylib
+    )
    execute_process(
-        COMMAND mkdir -p ${CMAKE_BINARY_DIR}/bin/Resources/lib
-        COMMAND cp -p ${LIBDIR}/openmp/lib/libomp.dylib ${CMAKE_BINARY_DIR}/bin/Resources/lib/libomp.dylib)
+      COMMAND mkdir -p ${CMAKE_BINARY_DIR}/bin/Resources/lib
+      COMMAND cp -p ${LIBDIR}/openmp/lib/libomp.dylib ${CMAKE_BINARY_DIR}/bin/Resources/lib/libomp.dylib
+    )
+  endif()
+endif()
+
+if(WITH_XR_OPENXR)
+  find_package(OpenXR-SDK)
+  if(NOT OPENXR_SDK_FOUND)
+    message(WARNING "OpenXR-SDK was not found, disabling WITH_XR_OPENXR")
+    set(WITH_XR_OPENXR OFF)
  endif()
 endif()

--- a/build_files/cmake/platform/platform_unix.cmake
+++ b/build_files/cmake/platform/platform_unix.cmake
@@ -428,6 +428,14 @@ if(WITH_TBB)
  find_package_wrapper(TBB)
 endif()

+if(WITH_XR_OPENXR)
+  find_package(XR-OpenXR-SDK)
+  if(NOT XR_OPENXR_SDK_FOUND)
+    message(WARNING "OpenXR-SDK not found, disabling WITH_XR_OPENXR")
+    set(WITH_XR_OPENXR OFF)
+  endif()
+endif()
+
 if(EXISTS ${LIBDIR})
  without_system_libs_end()
 endif()
--- a/build_files/cmake/platform/platform_win32.cmake
+++ b/build_files/cmake/platform/platform_win32.cmake
@@ -671,8 +671,8 @@ if(WITH_USD)
    set(USD_DEBUG_LIB ${LIBDIR}/usd/lib/libusd_m_d.lib)
    set(USD_LIBRARY_DIR ${LIBDIR}/usd/lib/usd)
    set(USD_LIBRARIES
-        debug ${USD_DEBUG_LIB}
-        optimized ${USD_RELEASE_LIB}
+      debug ${USD_DEBUG_LIB}
+      optimized ${USD_RELEASE_LIB}
    )
  endif()
 endif()
@@ -713,3 +713,15 @@ if(WINDOWS_PYTHON_DEBUG)
 </Project>")
  endif()
 endif()
+
+if(WITH_XR_OPENXR)
+  if(EXISTS ${LIBDIR}/xr_openxr_sdk)
+    set(XR_OPENXR_SDK ${LIBDIR}/xr_openxr_sdk)
+    set(XR_OPENXR_SDK_LIBPATH ${LIBDIR}/xr_openxr_sdk/lib)
+    set(XR_OPENXR_SDK_INCLUDE_DIR ${XR_OPENXR_SDK}/include)
+    set(XR_OPENXR_SDK_LIBRARIES optimized ${XR_OPENXR_SDK_LIBPATH}/openxr_loader.lib debug ${XR_OPENXR_SDK_LIBPATH}/openxr_loader_d.lib)
+  else()
+    message(WARNING "OpenXR-SDK was not found, disabling WITH_XR_OPENXR")
+    set(WITH_XR_OPENXR OFF)
+  endif()
+endif()
--- a/doc/python_api/examples/bpy.msgbus.1.py
+++ b/doc/python_api/examples/bpy.msgbus.1.py
@@ -0,0 +1,44 @@
+"""
+The message bus system can be used to receive notifications when properties of
+Blender datablocks are changed via the data API.
+
+
+Limitations
+-----------
+
+The message bus system is triggered by updates via the RNA system. This means
+that the following updates will result in a notification on the message bus:
+
+- Changes via the Python API, for example ``some_object.location.x += 3``.
+- Changes via the sliders, fields, and buttons in the user interface.
+
+The following updates do **not** trigger message bus notifications:
+
+- Moving objects in the 3D Viewport.
+- Changes performed by the animation system.
+
+
+Example Use
+-----------
+
+Below is an example of subscription to changes in the active object's location.
+"""
+
+import bpy
+
+# Any Python object can act as the subscription's owner.
+owner = object()
+
+subscribe_to = bpy.context.object.location
+
+def msgbus_callback(*args):
+    # This will print:
+    # Something changed! (1, 2, 3)
+    print("Something changed!", args)
+
+bpy.msgbus.subscribe_rna(
+    key=subscribe_to,
+    owner=owner,
+    args=(1, 2, 3),
+    notify=msgbus_callback,
+)
--- a/doc/python_api/examples/bpy.msgbus.2.py
+++ b/doc/python_api/examples/bpy.msgbus.2.py
@@ -0,0 +1,6 @@
+"""
+Some properties are converted to Python objects when you retrieve them. This
+needs to be avoided in order to create the subscription, by using
+``datablock.path_resolve("property_name", False)``:
+"""
+subscribe_to = bpy.context.object.path_resolve("name", False)
--- a/doc/python_api/examples/bpy.msgbus.3.py
+++ b/doc/python_api/examples/bpy.msgbus.3.py
@@ -0,0 +1,5 @@
+"""
+It is also possible to create subscriptions on a property of all instances of a
+certain type:
+"""
+subscribe_to = (bpy.types.Object, "location")
--- a/doc/python_api/examples/bpy.types.AddonPreferences.1.py
+++ b/doc/python_api/examples/bpy.types.AddonPreferences.1.py
@@ -6,7 +6,7 @@ bl_info = {
    "location": "SpaceBar Search -> Add-on Preferences Example",
    "description": "Example Add-on",
    "warning": "",
-    "wiki_url": "",
+    "doc_url": "",
    "tracker_url": "",
    "category": "Object",
 }
--- a/doc/python_api/rst/info_quickstart.rst
+++ b/doc/python_api/rst/info_quickstart.rst
@@ -255,9 +255,9 @@ Examples:
   >>> bpy.ops.object.scale_apply()
   {'FINISHED'}

-.. note::
+.. tip::

-   The menu item: :menuselection:`Help --> Operator Cheat Sheet`
+   The :ref:`Operator Cheat Sheet <blender_manual:bpy.ops.wm.operator_cheat_sheet>`.
   gives a list of all operators and their default values in Python syntax, along with the generated docs.
   This is a good way to get an overview of all Blender's operators.

--- a/doc/python_api/sphinx_doc_gen.py
+++ b/doc/python_api/sphinx_doc_gen.py
@@ -1756,6 +1756,7 @@ def write_rst_contents(basepath):
    app_modules = (
        "bpy.context",  # note: not actually a module
        "bpy.data",     # note: not actually a module
+        "bpy.msgbus",   # note: not actually a module
        "bpy.ops",
        "bpy.types",

@@ -1846,6 +1847,29 @@ def write_rst_ops_index(basepath):
        file.close()


+def write_rst_msgbus(basepath):
+    """
+    Write the rst files of bpy.msgbus module
+    """
+    if 'bpy.msgbus' in EXCLUDE_MODULES:
+        return
+
+    # Write the index.
+    filepath = os.path.join(basepath, "bpy.msgbus.rst")
+    file = open(filepath, "w", encoding="utf-8")
+    fw = file.write
+    fw(title_string("Message Bus (bpy.msgbus)", "="))
+    write_example_ref("", fw, "bpy.msgbus")
+    fw(".. toctree::\n")
+    fw("   :glob:\n\n")
+    fw("   bpy.msgbus.*\n\n")
+    file.close()
+
+    # Write the contents.
+    pymodule2sphinx(basepath, 'bpy.msgbus', bpy.msgbus, 'Message Bus')
+    EXAMPLE_SET_USED.add("bpy.msgbus")
+
+
 def write_rst_data(basepath):
    '''
    Write the rst file of bpy.data module
@@ -2000,6 +2024,7 @@ def rna2sphinx(basepath):
    write_rst_bpy(basepath)                 # bpy, disabled by default
    write_rst_types_index(basepath)         # bpy.types
    write_rst_ops_index(basepath)           # bpy.ops
+    write_rst_msgbus(basepath)              # bpy.msgbus
    pyrna2sphinx(basepath)                  # bpy.types.* and bpy.ops.*
    write_rst_data(basepath)                # bpy.data
    write_rst_importable_modules(basepath)
--- a/extern/draco/CMakeLists.txt
+++ b/extern/draco/CMakeLists.txt
@@ -24,6 +24,21 @@ set(CMAKE_CXX_STANDARD 14)
 add_subdirectory(dracoenc)

 # Build blender-draco-exporter module.
-add_library(extern_draco SHARED src/draco-compressor.cpp src/draco-compressor.h)
-target_include_directories(extern_draco PUBLIC dracoenc/src)
-target_link_libraries(extern_draco PUBLIC dracoenc)
+set(SRC
+  src/draco-compressor.cpp
+  src/draco-compressor.h
+)
+
+set(INC
+  dracoenc/src
+)
+
+set(LIB
+  dracoenc
+)
+
+add_library(extern_draco SHARED "${SRC}")
+target_include_directories(extern_draco PUBLIC "${INC}")
+target_link_libraries(extern_draco PUBLIC "${LIB}")
+
+blender_source_group(extern_draco "${SRC}")
--- a/extern/mantaflow/helper/util/rcmatrix.h
+++ b/extern/mantaflow/helper/util/rcmatrix.h
@@ -17,16 +17,13 @@

 // link to omp & tbb for now
 #if OPENMP == 1 || TBB == 1
-#  define MANTA_ENABLE_PARALLEL 0
+#  define MANTA_ENABLE_PARALLEL 1
 // allow the preconditioner to be computed in parallel? (can lead to slightly non-deterministic
 // results)
 #  define MANTA_ENABLE_PARALLEL_PC 0
-// use c++11 code?
-#  define MANTA_USE_CPP11 1
 #else
 #  define MANTA_ENABLE_PARALLEL 0
 #  define MANTA_ENABLE_PARALLEL_PC 0
-#  define MANTA_USE_CPP11 0
 #endif

 #if MANTA_ENABLE_PARALLEL == 1
@@ -503,11 +500,7 @@ template<class N, class T> struct RCMatrix {
      for (Iterator it = row_begin(i); it; ++it)
        result.fix_element(it.index(), i, it.value());
    }
-#if MANTA_USE_CPP11 == 1
-    return std::move(result);
-#else
    return result;
-#endif
  }

  RCMatrix getKtK() const
@@ -532,12 +525,7 @@ template<class N, class T> struct RCMatrix {
        }
      }
    }
-    parallel_end
-#if MANTA_USE_CPP11 == 1
-        return std::move(result);
-#else
-        return result;
-#endif
+    parallel_end return result;
  }

  RCMatrix operator*(const RCMatrix &m) const
@@ -561,12 +549,7 @@ template<class N, class T> struct RCMatrix {
        }
      }
    }
-    parallel_end
-#if MANTA_USE_CPP11 == 1
-        return std::move(result);
-#else
-        return result;
-#endif
+    parallel_end return result;
  }

  RCMatrix sqrt() const
@@ -581,12 +564,7 @@ template<class N, class T> struct RCMatrix {
        result.set_element(i, j, std::sqrt(it_A.value()));
      }
    }
-    parallel_end
-#if MANTA_USE_CPP11 == 1
-        return std::move(result);
-#else
-        return result;
-#endif
+    parallel_end return result;
  }

  RCMatrix operator*(const double k) const
@@ -601,12 +579,7 @@ template<class N, class T> struct RCMatrix {
        result.add_to_element(i, j, it_A.value() * k);
      }
    }
-    parallel_end
-#if MANTA_USE_CPP11 == 1
-        return std::move(result);
-#else
-        return result;
-#endif
+    parallel_end return result;
  }

  RCMatrix applyKernel(const RCMatrix &kernel, const int nx, const int ny) const
@@ -640,12 +613,7 @@ template<class N, class T> struct RCMatrix {
        }
      }
    }
-    parallel_end
-#if MANTA_USE_CPP11 == 1
-        return std::move(result);
-#else
-        return result;
-#endif
+    parallel_end return result;
  }

  RCMatrix applyHorizontalKernel(const RCMatrix &kernel, const int nx, const int ny) const
@@ -679,12 +647,7 @@ template<class N, class T> struct RCMatrix {
        }
      }
    }
-    parallel_end
-#if MANTA_USE_CPP11 == 1
-        return std::move(result);
-#else
-        return result;
-#endif
+    parallel_end return result;
  }

  RCMatrix applyVerticalKernel(const RCMatrix &kernel, const int nx, const int ny) const
@@ -718,12 +681,7 @@ template<class N, class T> struct RCMatrix {
        }
      }
    }
-    parallel_end
-#if MANTA_USE_CPP11 == 1
-        return std::move(result);
-#else
-        return result;
-#endif
+    parallel_end return result;
  }

  RCMatrix applySeparableKernel(const RCMatrix &kernelH,
@@ -747,11 +705,7 @@ template<class N, class T> struct RCMatrix {
  {
    std::vector<T> result(n, 0.0);
    multiply(rhs, result);
-#if MANTA_USE_CPP11 == 1
-    return std::move(result);
-#else
    return result;
-#endif
  }
  void multiply(const std::vector<T> &rhs, std::vector<T> &result) const
  {
@@ -832,11 +786,7 @@ template<class N, class T> struct RCMatrix {
    for (N i = 0; i < result.size(); i++) {
      result[i] = std::abs(result[i] - rhs[i]);
    }
-#if MANTA_USE_CPP11 == 1
-    return std::move(result);
-#else
    return result;
-#endif
  }
  T norm() const
  {
@@ -1024,11 +974,7 @@ template<class N, class T> struct RCFixedMatrix {
  {
    std::vector<T> result(n, 0.0);
    multiply(rhs, result);
-#if MANTA_USE_CPP11 == 1
-    return std::move(result);
-#else
    return result;
-#endif
  }
  void multiply(const std::vector<T> &rhs, std::vector<T> &result) const
  {
@@ -1064,12 +1010,7 @@ template<class N, class T> struct RCFixedMatrix {
        }
      }
    }
-    parallel_end
-#if MANTA_USE_CPP11 == 1
-        return std::move(result);
-#else
-        return result;
-#endif
+    parallel_end return result;
  }

  RCMatrix<N, T> toRCMatrix() const
@@ -1087,12 +1028,7 @@ template<class N, class T> struct RCFixedMatrix {
        result.matrix[i]->value[j] = value[rowstart[i] + j];
      }
    }
-    parallel_end
-#if MANTA_USE_CPP11 == 1
-        return std::move(result);
-#else
-        return result;
-#endif
+    parallel_end return result;
  }
 };

--- a/extern/mantaflow/preprocessed/gitinfo.h
+++ b/extern/mantaflow/preprocessed/gitinfo.h
@@ -1,3 +1,3 @@


-#define MANTA_GIT_VERSION "commit ce000bcbd7004e6549ac2f118755fcdc1f679bc3"
+#define MANTA_GIT_VERSION "commit 1d52e96ad602f1974dfee75bef293bc397e4b41b"
--- a/extern/mantaflow/preprocessed/plugin/initplugins.cpp
+++ b/extern/mantaflow/preprocessed/plugin/initplugins.cpp
@@ -1829,13 +1829,15 @@ struct KnUpdateFlagsObs : public KernelBase {
                   const MACGrid *fractions,
                   const Grid<Real> &phiObs,
                   const Grid<Real> *phiOut,
-                   const Grid<Real> *phiIn)
-      : KernelBase(&flags, 1),
+                   const Grid<Real> *phiIn,
+                   int boundaryWidth)
+      : KernelBase(&flags, boundaryWidth),
        flags(flags),
        fractions(fractions),
        phiObs(phiObs),
        phiOut(phiOut),
-        phiIn(phiIn)
+        phiIn(phiIn),
+        boundaryWidth(boundaryWidth)
  {
    runMessage();
    run();
@@ -1847,7 +1849,8 @@ struct KnUpdateFlagsObs : public KernelBase {
                 const MACGrid *fractions,
                 const Grid<Real> &phiObs,
                 const Grid<Real> *phiOut,
-                 const Grid<Real> *phiIn) const
+                 const Grid<Real> *phiIn,
+                 int boundaryWidth) const
  {

    bool isObs = false;
@@ -1910,6 +1913,11 @@ struct KnUpdateFlagsObs : public KernelBase {
    return phiIn;
  }
  typedef Grid<Real> type4;
+  inline int &getArg5()
+  {
+    return boundaryWidth;
+  }
+  typedef int type5;
  void runMessage()
  {
    debMsg("Executing kernel KnUpdateFlagsObs ", 3);
@@ -1923,15 +1931,15 @@ struct KnUpdateFlagsObs : public KernelBase {
    const int _maxY = maxY;
    if (maxZ > 1) {
      for (int k = __r.begin(); k != (int)__r.end(); k++)
-        for (int j = 1; j < _maxY; j++)
-          for (int i = 1; i < _maxX; i++)
-            op(i, j, k, flags, fractions, phiObs, phiOut, phiIn);
+        for (int j = boundaryWidth; j < _maxY; j++)
+          for (int i = boundaryWidth; i < _maxX; i++)
+            op(i, j, k, flags, fractions, phiObs, phiOut, phiIn, boundaryWidth);
    }
    else {
      const int k = 0;
      for (int j = __r.begin(); j != (int)__r.end(); j++)
-        for (int i = 1; i < _maxX; i++)
-          op(i, j, k, flags, fractions, phiObs, phiOut, phiIn);
+        for (int i = boundaryWidth; i < _maxX; i++)
+          op(i, j, k, flags, fractions, phiObs, phiOut, phiIn, boundaryWidth);
    }
  }
  void run()
@@ -1939,13 +1947,14 @@ struct KnUpdateFlagsObs : public KernelBase {
    if (maxZ > 1)
      tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
    else
-      tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
+      tbb::parallel_for(tbb::blocked_range<IndexInt>(boundaryWidth, maxY), *this);
  }
  FlagGrid &flags;
  const MACGrid *fractions;
  const Grid<Real> &phiObs;
  const Grid<Real> *phiOut;
  const Grid<Real> *phiIn;
+  int boundaryWidth;
 };

 //! update obstacle and outflow flags from levelsets
@@ -1954,9 +1963,10 @@ void setObstacleFlags(FlagGrid &flags,
                      const Grid<Real> &phiObs,
                      const MACGrid *fractions = NULL,
                      const Grid<Real> *phiOut = NULL,
-                      const Grid<Real> *phiIn = NULL)
+                      const Grid<Real> *phiIn = NULL,
+                      int boundaryWidth = 1)
 {
-  KnUpdateFlagsObs(flags, fractions, phiObs, phiOut, phiIn);
+  KnUpdateFlagsObs(flags, fractions, phiObs, phiOut, phiIn, boundaryWidth);
 }
 static PyObject *_W_18(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
 {
@@ -1973,8 +1983,9 @@ static PyObject *_W_18(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
      const MACGrid *fractions = _args.getPtrOpt<MACGrid>("fractions", 2, NULL, &_lock);
      const Grid<Real> *phiOut = _args.getPtrOpt<Grid<Real>>("phiOut", 3, NULL, &_lock);
      const Grid<Real> *phiIn = _args.getPtrOpt<Grid<Real>>("phiIn", 4, NULL, &_lock);
+      int boundaryWidth = _args.getOpt<int>("boundaryWidth", 5, 1, &_lock);
      _retval = getPyNone();
-      setObstacleFlags(flags, phiObs, fractions, phiOut, phiIn);
+      setObstacleFlags(flags, phiObs, fractions, phiOut, phiIn, boundaryWidth);
      _args.check();
    }
    pbFinalizePlugin(parent, "setObstacleFlags", !noTiming);
--- a/intern/cycles/app/CMakeLists.txt
+++ b/intern/cycles/app/CMakeLists.txt
@@ -51,14 +51,16 @@ endif()

 # Common configuration.

-link_directories(${OPENIMAGEIO_LIBPATH}
-                 ${BOOST_LIBPATH}
-                 ${PNG_LIBPATH}
-                 ${JPEG_LIBPATH}
-                 ${ZLIB_LIBPATH}
-                 ${TIFF_LIBPATH}
-                 ${OPENEXR_LIBPATH}
-                 ${OPENJPEG_LIBPATH})
+link_directories(
+  ${OPENIMAGEIO_LIBPATH}
+  ${BOOST_LIBPATH}
+  ${PNG_LIBPATH}
+  ${JPEG_LIBPATH}
+  ${ZLIB_LIBPATH}
+  ${TIFF_LIBPATH}
+  ${OPENEXR_LIBPATH}
+  ${OPENJPEG_LIBPATH}
+)

 if(WITH_OPENCOLORIO)
  link_directories(${OPENCOLORIO_LIBPATH})
--- a/intern/cycles/blender/addon/init.py
+++ b/intern/cycles/blender/addon/init.py
@@ -22,7 +22,7 @@ bl_info = {
    "blender": (2, 80, 0),
    "description": "Cycles renderer integration",
    "warning": "",
-    "wiki_url": "https://docs.blender.org/manual/en/latest/render/cycles/",
+    "doc_url": "https://docs.blender.org/manual/en/latest/render/cycles/",
    "tracker_url": "",
    "support": 'OFFICIAL',
    "category": "Render"}
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -255,6 +255,7 @@ def list_render_passes(srl):
    if crl.pass_debug_bvh_traversed_instances: yield ("Debug BVH Traversed Instances", "X",   'VALUE')
    if crl.pass_debug_bvh_intersections:       yield ("Debug BVH Intersections",       "X",   'VALUE')
    if crl.pass_debug_ray_bounces:             yield ("Debug Ray Bounces",             "X",   'VALUE')
+    if crl.pass_debug_sample_count:            yield ("Debug Sample Count",            "X",   'VALUE')
    if crl.use_pass_volume_direct:             yield ("VolumeDir",                     "RGB", 'COLOR')
    if crl.use_pass_volume_indirect:           yield ("VolumeInd",                     "RGB", 'COLOR')

--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -112,6 +112,7 @@ enum_use_layer_samples = (
 enum_sampling_pattern = (
    ('SOBOL', "Sobol", "Use Sobol random sampling pattern"),
    ('CORRELATED_MUTI_JITTER', "Correlated Multi-Jitter", "Use Correlated Multi-Jitter random sampling pattern"),
+    ('PROGRESSIVE_MUTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern"),
 )

 enum_integrator = (
@@ -349,6 +350,25 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
        default=0.01,
    )

+    use_adaptive_sampling: BoolProperty(
+        name="Use Adaptive Sampling",
+        description="Automatically reduce the number of samples per pixel based on estimated noise level",
+        default=False,
+    )
+
+    adaptive_threshold: FloatProperty(
+        name="Adaptive Sampling Threshold",
+        description="Noise level step to stop sampling at, lower values reduce noise the cost of render time. Zero for automatic setting based on number of AA samples",
+        min=0.0, max=1.0,
+        default=0.0,
+    )
+    adaptive_min_samples: IntProperty(
+        name="Adaptive Min Samples",
+        description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on number of AA samples",
+        min=0, max=4096,
+        default=0,
+    )
+
    min_light_bounces: IntProperty(
            name="Min Light Bounces",
            description="Minimum number of light bounces. Setting this higher reduces noise in the first bounces, "
@@ -1297,7 +1317,12 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
        default=False,
        update=update_render_passes,
    )
-
+    pass_debug_sample_count: BoolProperty(
+        name="Debug Sample Count",
+        description="Number of samples/camera rays per pixel",
+        default=False,
+        update=update_render_passes,
+    )
    use_pass_volume_direct: BoolProperty(
        name="Volume Direct",
        description="Deliver direct volumetric scattering pass",
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -230,6 +230,32 @@ class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
        draw_samples_info(layout, context)


+class CYCLES_RENDER_PT_sampling_adaptive(CyclesButtonsPanel, Panel):
+    bl_label = "Adaptive Sampling"
+    bl_parent_id = "CYCLES_RENDER_PT_sampling"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    def draw_header(self, context):
+        layout = self.layout
+        scene = context.scene
+        cscene = scene.cycles
+
+        layout.prop(cscene, "use_adaptive_sampling", text="")
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        cscene = scene.cycles
+
+        layout.active = cscene.use_adaptive_sampling
+
+        col = layout.column(align=True)
+        col.prop(cscene, "adaptive_min_samples", text="Min Samples")
+        col.prop(cscene, "adaptive_threshold", text="Noise Threshold")
+
 class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
    bl_label = "Advanced"
    bl_parent_id = "CYCLES_RENDER_PT_sampling"
@@ -247,7 +273,9 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
        row.prop(cscene, "seed")
        row.prop(cscene, "use_animated_seed", text="", icon='TIME')

-        layout.prop(cscene, "sampling_pattern", text="Pattern")
+        col = layout.column(align=True)
+        col.active = not(cscene.use_adaptive_sampling)
+        col.prop(cscene, "sampling_pattern", text="Pattern")

        layout.prop(cscene, "use_square_samples")

@@ -813,6 +841,8 @@ class CYCLES_RENDER_PT_passes_data(CyclesButtonsPanel, Panel):
        col.prop(cycles_view_layer, "denoising_store_passes", text="Denoising Data")
        col = flow.column()
        col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time")
+        col = flow.column()
+        col.prop(cycles_view_layer, "pass_debug_sample_count", text="Sample Count")

        layout.separator()

@@ -2238,6 +2268,7 @@ classes = (
    CYCLES_PT_integrator_presets,
    CYCLES_RENDER_PT_sampling,
    CYCLES_RENDER_PT_sampling_sub_samples,
+    CYCLES_RENDER_PT_sampling_adaptive,
    CYCLES_RENDER_PT_sampling_advanced,
    CYCLES_RENDER_PT_light_paths,
    CYCLES_RENDER_PT_light_paths_max_bounces,
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -470,7 +470,8 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
  b_rlay_name = b_view_layer.name();

  /* add passes */
-  vector<Pass> passes = sync->sync_render_passes(b_rlay, b_view_layer);
+  vector<Pass> passes = sync->sync_render_passes(
+      b_rlay, b_view_layer, session_params.adaptive_sampling);
  buffer_params.passes = passes;

  PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
--- a/intern/cycles/blender/blender_session.h
+++ b/intern/cycles/blender/blender_session.h
@@ -17,15 +17,19 @@
 #ifndef __BLENDER_SESSION_H__
 #define __BLENDER_SESSION_H__

+#include "RNA_blender_cpp.h"
+
 #include "device/device.h"
+
+#include "render/bake.h"
 #include "render/scene.h"
 #include "render/session.h"
-#include "render/bake.h"

 #include "util/util_vector.h"

 CCL_NAMESPACE_BEGIN

+class BlenderSync;
 class ImageMetaData;
 class Scene;
 class Session;
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -327,6 +327,7 @@ static ShaderNode *add_node(Scene *scene,
    BL::ShaderNodeVectorRotate b_vector_rotate_node(b_node);
    VectorRotateNode *vector_rotate_node = new VectorRotateNode();
    vector_rotate_node->type = (NodeVectorRotateType)b_vector_rotate_node.rotation_type();
+    vector_rotate_node->invert = b_vector_rotate_node.invert();
    node = vector_rotate_node;
  }
  else if (b_node.is_a(&RNA_ShaderNodeVectorTransform)) {
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -296,6 +296,16 @@ void BlenderSync::sync_integrator()
  integrator->sample_all_lights_indirect = get_boolean(cscene, "sample_all_lights_indirect");
  integrator->light_sampling_threshold = get_float(cscene, "light_sampling_threshold");

+  if (RNA_boolean_get(&cscene, "use_adaptive_sampling")) {
+    integrator->sampling_pattern = SAMPLING_PATTERN_PMJ;
+    integrator->adaptive_min_samples = get_int(cscene, "adaptive_min_samples");
+    integrator->adaptive_threshold = get_float(cscene, "adaptive_threshold");
+  }
+  else {
+    integrator->adaptive_min_samples = INT_MAX;
+    integrator->adaptive_threshold = 0.0f;
+  }
+
  int diffuse_samples = get_int(cscene, "diffuse_samples");
  int glossy_samples = get_int(cscene, "glossy_samples");
  int transmission_samples = get_int(cscene, "transmission_samples");
@@ -312,6 +322,8 @@ void BlenderSync::sync_integrator()
    integrator->mesh_light_samples = mesh_light_samples * mesh_light_samples;
    integrator->subsurface_samples = subsurface_samples * subsurface_samples;
    integrator->volume_samples = volume_samples * volume_samples;
+    integrator->adaptive_min_samples = min(
+        integrator->adaptive_min_samples * integrator->adaptive_min_samples, INT_MAX);
  }
  else {
    integrator->diffuse_samples = diffuse_samples;
@@ -484,6 +496,8 @@ PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass)
  MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES);
 #endif
  MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
+  MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER);
+  MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT);
  if (string_startswith(name, cryptomatte_prefix)) {
    return PASS_CRYPTOMATTE;
  }
@@ -519,7 +533,9 @@ int BlenderSync::get_denoising_pass(BL::RenderPass &b_pass)
  return -1;
 }

-vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer)
+vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay,
+                                             BL::ViewLayer &b_view_layer,
+                                             bool adaptive_sampling)
 {
  vector<Pass> passes;

@@ -595,6 +611,10 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa
    b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str());
    Pass::add(PASS_RENDER_TIME, passes, "Debug Render Time");
  }
+  if (get_boolean(crp, "pass_debug_sample_count")) {
+    b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
+    Pass::add(PASS_SAMPLE_COUNT, passes, "Debug Sample Count");
+  }
  if (get_boolean(crp, "use_pass_volume_direct")) {
    b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str());
    Pass::add(PASS_VOLUME_DIRECT, passes, "VolumeDir");
@@ -641,6 +661,13 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa
                                                        CRYPT_ACCURATE);
  }

+  if (adaptive_sampling) {
+    Pass::add(PASS_ADAPTIVE_AUX_BUFFER, passes);
+    if (!get_boolean(crp, "pass_debug_sample_count")) {
+      Pass::add(PASS_SAMPLE_COUNT, passes);
+    }
+  }
+
  RNA_BEGIN (&crp, b_aov, "aovs") {
    bool is_color = (get_enum(b_aov, "type") == 1);
    string name = get_string(b_aov, "name");
@@ -880,6 +907,8 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
  params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background &&
                         BlenderSession::print_render_stats;

+  params.adaptive_sampling = RNA_boolean_get(&cscene, "use_adaptive_sampling");
+
  return params;
 }

--- a/intern/cycles/blender/blender_sync.h
+++ b/intern/cycles/blender/blender_sync.h
@@ -71,7 +71,9 @@ class BlenderSync {
                 int height,
                 void **python_thread_state);
  void sync_view_layer(BL::SpaceView3D &b_v3d, BL::ViewLayer &b_view_layer);
-  vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer);
+  vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer,
+                                  BL::ViewLayer &b_view_layer,
+                                  bool adaptive_sampling);
  void sync_integrator();
  void sync_camera(BL::RenderSettings &b_render,
                   BL::Object &b_override,
--- a/intern/cycles/device/cuda/device_cuda.h
+++ b/intern/cycles/device/cuda/device_cuda.h
@@ -82,6 +82,17 @@ class CUDADevice : public Device {
  device_vector<TextureInfo> texture_info;
  bool need_texture_info;

+  /* Kernels */
+  struct {
+    bool loaded;
+
+    CUfunction adaptive_stopping;
+    CUfunction adaptive_filter_x;
+    CUfunction adaptive_filter_y;
+    CUfunction adaptive_scale_samples;
+    int adaptive_num_threads_per_block;
+  } functions;
+
  static bool have_precompiled_kernels();

  virtual bool show_samples() const;
@@ -114,6 +125,8 @@ class CUDADevice : public Device {

  virtual bool load_kernels(const DeviceRequestedFeatures &requested_features);

+  void load_functions();
+
  void reserve_local_memory(const DeviceRequestedFeatures &requested_features);

  void init_host_memory();
@@ -197,6 +210,15 @@ class CUDADevice : public Device {

  void denoise(RenderTile &rtile, DenoisingTask &denoising);

+  void adaptive_sampling_filter(uint filter_sample,
+                                WorkTile *wtile,
+                                CUdeviceptr d_wtile,
+                                CUstream stream = 0);
+  void adaptive_sampling_post(RenderTile &rtile,
+                              WorkTile *wtile,
+                              CUdeviceptr d_wtile,
+                              CUstream stream = 0);
+
  void path_trace(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles);

  void film_convert(DeviceTask &task,
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -208,6 +208,8 @@ CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool
  map_host_used = 0;
  can_map_host = 0;

+  functions.loaded = false;
+
  /* Intialize CUDA. */
  if (cuda_error(cuInit(0)))
    return;
@@ -531,9 +533,42 @@ bool CUDADevice::load_kernels(const DeviceRequestedFeatures &requested_features)
    reserve_local_memory(requested_features);
  }

+  load_functions();
+
  return (result == CUDA_SUCCESS);
 }

+void CUDADevice::load_functions()
+{
+  /* TODO: load all functions here. */
+  if (functions.loaded) {
+    return;
+  }
+  functions.loaded = true;
+
+  cuda_assert(cuModuleGetFunction(
+      &functions.adaptive_stopping, cuModule, "kernel_cuda_adaptive_stopping"));
+  cuda_assert(cuModuleGetFunction(
+      &functions.adaptive_filter_x, cuModule, "kernel_cuda_adaptive_filter_x"));
+  cuda_assert(cuModuleGetFunction(
+      &functions.adaptive_filter_y, cuModule, "kernel_cuda_adaptive_filter_y"));
+  cuda_assert(cuModuleGetFunction(
+      &functions.adaptive_scale_samples, cuModule, "kernel_cuda_adaptive_scale_samples"));
+
+  cuda_assert(cuFuncSetCacheConfig(functions.adaptive_stopping, CU_FUNC_CACHE_PREFER_L1));
+  cuda_assert(cuFuncSetCacheConfig(functions.adaptive_filter_x, CU_FUNC_CACHE_PREFER_L1));
+  cuda_assert(cuFuncSetCacheConfig(functions.adaptive_filter_y, CU_FUNC_CACHE_PREFER_L1));
+  cuda_assert(cuFuncSetCacheConfig(functions.adaptive_scale_samples, CU_FUNC_CACHE_PREFER_L1));
+
+  int unused_min_blocks;
+  cuda_assert(cuOccupancyMaxPotentialBlockSize(&unused_min_blocks,
+                                               &functions.adaptive_num_threads_per_block,
+                                               functions.adaptive_scale_samples,
+                                               NULL,
+                                               0,
+                                               0));
+}
+
 void CUDADevice::reserve_local_memory(const DeviceRequestedFeatures &requested_features)
 {
  if (use_split_kernel()) {
@@ -1666,6 +1701,80 @@ void CUDADevice::denoise(RenderTile &rtile, DenoisingTask &denoising)
  denoising.run_denoising(&rtile);
 }

+void CUDADevice::adaptive_sampling_filter(uint filter_sample,
+                                          WorkTile *wtile,
+                                          CUdeviceptr d_wtile,
+                                          CUstream stream)
+{
+  const int num_threads_per_block = functions.adaptive_num_threads_per_block;
+
+  /* These are a series of tiny kernels because there is no grid synchronization
+   * from within a kernel, so multiple kernel launches it is. */
+  uint total_work_size = wtile->h * wtile->w;
+  void *args2[] = {&d_wtile, &filter_sample, &total_work_size};
+  uint num_blocks = divide_up(total_work_size, num_threads_per_block);
+  cuda_assert(cuLaunchKernel(functions.adaptive_stopping,
+                             num_blocks,
+                             1,
+                             1,
+                             num_threads_per_block,
+                             1,
+                             1,
+                             0,
+                             stream,
+                             args2,
+                             0));
+  total_work_size = wtile->h;
+  num_blocks = divide_up(total_work_size, num_threads_per_block);
+  cuda_assert(cuLaunchKernel(functions.adaptive_filter_x,
+                             num_blocks,
+                             1,
+                             1,
+                             num_threads_per_block,
+                             1,
+                             1,
+                             0,
+                             stream,
+                             args2,
+                             0));
+  total_work_size = wtile->w;
+  num_blocks = divide_up(total_work_size, num_threads_per_block);
+  cuda_assert(cuLaunchKernel(functions.adaptive_filter_y,
+                             num_blocks,
+                             1,
+                             1,
+                             num_threads_per_block,
+                             1,
+                             1,
+                             0,
+                             stream,
+                             args2,
+                             0));
+}
+
+void CUDADevice::adaptive_sampling_post(RenderTile &rtile,
+                                        WorkTile *wtile,
+                                        CUdeviceptr d_wtile,
+                                        CUstream stream)
+{
+  const int num_threads_per_block = functions.adaptive_num_threads_per_block;
+  uint total_work_size = wtile->h * wtile->w;
+
+  void *args[] = {&d_wtile, &rtile.start_sample, &rtile.sample, &total_work_size};
+  uint num_blocks = divide_up(total_work_size, num_threads_per_block);
+  cuda_assert(cuLaunchKernel(functions.adaptive_scale_samples,
+                             num_blocks,
+                             1,
+                             1,
+                             num_threads_per_block,
+                             1,
+                             1,
+                             0,
+                             stream,
+                             args,
+                             0));
+}
+
 void CUDADevice::path_trace(DeviceTask &task,
                            RenderTile &rtile,
                            device_vector<WorkTile> &work_tiles)
@@ -1715,6 +1824,9 @@ void CUDADevice::path_trace(DeviceTask &task,
  }

  uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);
+  if (task.adaptive_sampling.use) {
+    step_samples = task.adaptive_sampling.align_static_samples(step_samples);
+  }

  /* Render all samples. */
  int start_sample = rtile.start_sample;
@@ -1736,6 +1848,12 @@ void CUDADevice::path_trace(DeviceTask &task,
    cuda_assert(
        cuLaunchKernel(cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));

+    /* Run the adaptive sampling kernels at selected samples aligned to step samples. */
+    uint filter_sample = sample + wtile->num_samples - 1;
+    if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(filter_sample)) {
+      adaptive_sampling_filter(filter_sample, wtile, d_work_tiles);
+    }
+
    cuda_assert(cuCtxSynchronize());

    /* Update progress. */
@@ -1747,6 +1865,14 @@ void CUDADevice::path_trace(DeviceTask &task,
        break;
    }
  }
+
+  /* Finalize adaptive sampling. */
+  if (task.adaptive_sampling.use) {
+    CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer;
+    adaptive_sampling_post(rtile, wtile, d_work_tiles);
+    cuda_assert(cuCtxSynchronize());
+    task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
+  }
 }

 void CUDADevice::film_convert(DeviceTask &task,
@@ -2144,7 +2270,7 @@ void CUDADevice::thread_run(DeviceTask *task)
 {
  CUDAContextScope scope(this);

-  if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE) {
+  if (task->type == DeviceTask::RENDER) {
    DeviceRequestedFeatures requested_features;
    if (use_split_kernel()) {
      if (split_kernel == NULL) {
@@ -2159,7 +2285,7 @@ void CUDADevice::thread_run(DeviceTask *task)
    RenderTile tile;
    DenoisingTask denoising(this, *task);

-    while (task->acquire_tile(this, tile)) {
+    while (task->acquire_tile(this, tile, task->tile_types)) {
      if (tile.task == RenderTile::PATH_TRACE) {
        if (use_split_kernel()) {
          device_only_memory<uchar> void_buffer(this, "void_buffer");
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -29,16 +29,19 @@
 #include "device/device_intern.h"
 #include "device/device_split_kernel.h"

+// clang-format off
 #include "kernel/kernel.h"
 #include "kernel/kernel_compat_cpu.h"
 #include "kernel/kernel_types.h"
 #include "kernel/split/kernel_split_data.h"
 #include "kernel/kernel_globals.h"
+#include "kernel/kernel_adaptive_sampling.h"

 #include "kernel/filter/filter.h"

 #include "kernel/osl/osl_shader.h"
 #include "kernel/osl/osl_globals.h"
+// clang-format on

 #include "render/buffers.h"
 #include "render/coverage.h"
@@ -317,6 +320,10 @@ class CPUDevice : public Device {
    REGISTER_SPLIT_KERNEL(next_iteration_setup);
    REGISTER_SPLIT_KERNEL(indirect_subsurface);
    REGISTER_SPLIT_KERNEL(buffer_update);
+    REGISTER_SPLIT_KERNEL(adaptive_stopping);
+    REGISTER_SPLIT_KERNEL(adaptive_filter_x);
+    REGISTER_SPLIT_KERNEL(adaptive_filter_y);
+    REGISTER_SPLIT_KERNEL(adaptive_adjust_samples);
 #undef REGISTER_SPLIT_KERNEL
 #undef KERNEL_FUNCTIONS
  }
@@ -511,7 +518,7 @@ class CPUDevice : public Device {

  void thread_run(DeviceTask *task)
  {
-    if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE)
+    if (task->type == DeviceTask::RENDER)
      thread_render(*task);
    else if (task->type == DeviceTask::SHADER)
      thread_shader(*task);
@@ -823,6 +830,49 @@ class CPUDevice : public Device {
    return true;
  }

+  bool adaptive_sampling_filter(KernelGlobals *kg, RenderTile &tile)
+  {
+    WorkTile wtile;
+    wtile.x = tile.x;
+    wtile.y = tile.y;
+    wtile.w = tile.w;
+    wtile.h = tile.h;
+    wtile.offset = tile.offset;
+    wtile.stride = tile.stride;
+    wtile.buffer = (float *)tile.buffer;
+
+    bool any = false;
+    for (int y = tile.y; y < tile.y + tile.h; ++y) {
+      any |= kernel_do_adaptive_filter_x(kg, y, &wtile);
+    }
+    for (int x = tile.x; x < tile.x + tile.w; ++x) {
+      any |= kernel_do_adaptive_filter_y(kg, x, &wtile);
+    }
+    return (!any);
+  }
+
+  void adaptive_sampling_post(const RenderTile &tile, KernelGlobals *kg)
+  {
+    float *render_buffer = (float *)tile.buffer;
+    for (int y = tile.y; y < tile.y + tile.h; y++) {
+      for (int x = tile.x; x < tile.x + tile.w; x++) {
+        int index = tile.offset + x + y * tile.stride;
+        ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
+        if (buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+          buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
+          float sample_multiplier = tile.sample / max((float)tile.start_sample + 1.0f,
+                                                      buffer[kernel_data.film.pass_sample_count]);
+          if (sample_multiplier != 1.0f) {
+            kernel_adaptive_post_adjust(kg, buffer, sample_multiplier);
+          }
+        }
+        else {
+          kernel_adaptive_post_adjust(kg, buffer, tile.sample / (tile.sample - 1.0f));
+        }
+      }
+    }
+  }
+
  void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
  {
    const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;
@@ -855,14 +905,27 @@ class CPUDevice : public Device {
          path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
        }
      }
-
      tile.sample = sample + 1;

+      if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(sample)) {
+        const bool stop = adaptive_sampling_filter(kg, tile);
+        if (stop) {
+          const int num_progress_samples = end_sample - sample;
+          tile.sample = end_sample;
+          task.update_progress(&tile, tile.w * tile.h * num_progress_samples);
+          break;
+        }
+      }
+
      task.update_progress(&tile, tile.w * tile.h);
    }
    if (use_coverage) {
      coverage.finalize();
    }
+
+    if (task.adaptive_sampling.use) {
+      adaptive_sampling_post(tile, kg);
+    }
  }

  void denoise(DenoisingTask &denoising, RenderTile &tile)
@@ -927,7 +990,7 @@ class CPUDevice : public Device {
    DenoisingTask denoising(this, task);
    denoising.profiler = &kg->profiler;

-    while (task.acquire_tile(this, tile)) {
+    while (task.acquire_tile(this, tile, task.tile_types)) {
      if (tile.task == RenderTile::PATH_TRACE) {
        if (use_split_kernel) {
          device_only_memory<uchar> void_buffer(this, "void_buffer");
--- a/intern/cycles/device/device_intern.h
+++ b/intern/cycles/device/device_intern.h
@@ -17,9 +17,15 @@
 #ifndef __DEVICE_INTERN_H__
 #define __DEVICE_INTERN_H__

+#include "util/util_string.h"
+#include "util/util_vector.h"
+
 CCL_NAMESPACE_BEGIN

 class Device;
+class DeviceInfo;
+class Profiler;
+class Stats;

 Device *device_cpu_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
 bool device_opencl_init();
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -183,6 +183,14 @@ class MultiDevice : public Device {
    return true;
  }

+  virtual void *osl_memory()
+  {
+    if (devices.size() > 1) {
+      return NULL;
+    }
+    return devices.front().device->osl_memory();
+  }
+
  void mem_alloc(device_memory &mem)
  {
    device_ptr key = unique_key++;
@@ -482,11 +490,24 @@ class MultiDevice : public Device {

  void task_add(DeviceTask &task)
  {
-    list<SubDevice> &task_devices = denoising_devices.empty() ||
-                                            (task.type != DeviceTask::DENOISE &&
-                                             task.type != DeviceTask::DENOISE_BUFFER) ?
-                                        devices :
-                                        denoising_devices;
+    list<SubDevice> task_devices = devices;
+    if (!denoising_devices.empty()) {
+      if (task.type == DeviceTask::DENOISE_BUFFER) {
+        /* Denoising tasks should be redirected to the denoising devices entirely. */
+        task_devices = denoising_devices;
+      }
+      else if (task.type == DeviceTask::RENDER && (task.tile_types & RenderTile::DENOISE)) {
+        const uint tile_types = task.tile_types;
+        /* For normal rendering tasks only redirect the denoising part to the denoising devices.
+         * Do not need to split the task here, since they all run through 'acquire_tile'. */
+        task.tile_types = RenderTile::DENOISE;
+        foreach (SubDevice &sub, denoising_devices) {
+          sub.device->task_add(task);
+        }
+        /* Rendering itself should still be executed on the rendering devices. */
+        task.tile_types = tile_types ^ RenderTile::DENOISE;
+      }
+    }

    list<DeviceTask> tasks;
    task.split(tasks, task_devices.size());
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -186,14 +186,15 @@ class OptiXDevice : public CUDADevice {
  OptixTraversableHandle tlas_handle = 0;

  OptixDenoiser denoiser = NULL;
-  pair<int2, CUdeviceptr> denoiser_state = {};
+  device_only_memory<unsigned char> denoiser_state;
  int denoiser_input_passes = 0;

 public:
  OptiXDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
      : CUDADevice(info_, stats_, profiler_, background_),
        sbt_data(this, "__sbt", MEM_READ_ONLY),
-        launch_params(this, "__params")
+        launch_params(this, "__params"),
+        denoiser_state(this, "__denoiser_state")
  {
    // Store number of CUDA streams in device info
    info.cpu_threads = DebugFlags().optix.cuda_streams;
@@ -255,13 +256,10 @@ class OptiXDevice : public CUDADevice {
      cuMemFree(mem);
    }

-    if (denoiser_state.second) {
-      cuMemFree(denoiser_state.second);
-    }
-
    sbt_data.free();
    texture_info.free();
    launch_params.free();
+    denoiser_state.free();

    // Unload modules
    if (optix_module != NULL)
@@ -571,9 +569,14 @@ class OptiXDevice : public CUDADevice {
    if (have_error())
      return;  // Abort early if there was an error previously

-    if (task.type == DeviceTask::RENDER || task.type == DeviceTask::DENOISE) {
+    if (task.type == DeviceTask::RENDER) {
+      if (thread_index != 0) {
+        // Only execute denoising in a single thread (see also 'task_add')
+        task.tile_types &= ~RenderTile::DENOISE;
+      }
+
      RenderTile tile;
-      while (task.acquire_tile(this, tile)) {
+      while (task.acquire_tile(this, tile, task.tile_types)) {
        if (tile.task == RenderTile::PATH_TRACE)
          launch_render(task, tile, thread_index);
        else if (tile.task == RenderTile::DENOISE)
@@ -624,7 +627,11 @@ class OptiXDevice : public CUDADevice {

    const int end_sample = rtile.start_sample + rtile.num_samples;
    // Keep this number reasonable to avoid running into TDRs
-    const int step_samples = (info.display_device ? 8 : 32);
+    int step_samples = (info.display_device ? 8 : 32);
+    if (task.adaptive_sampling.use) {
+      step_samples = task.adaptive_sampling.align_static_samples(step_samples);
+    }
+
    // Offset into launch params buffer so that streams use separate data
    device_ptr launch_params_ptr = launch_params.device_pointer +
                                   thread_index * launch_params.data_elements;
@@ -635,10 +642,9 @@ class OptiXDevice : public CUDADevice {
      // Copy work tile information to device
      wtile.num_samples = min(step_samples, end_sample - sample);
      wtile.start_sample = sample;
-      check_result_cuda(cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParams, tile),
-                                          &wtile,
-                                          sizeof(wtile),
-                                          cuda_stream[thread_index]));
+      device_ptr d_wtile_ptr = launch_params_ptr + offsetof(KernelParams, tile);
+      check_result_cuda(
+          cuMemcpyHtoDAsync(d_wtile_ptr, &wtile, sizeof(wtile), cuda_stream[thread_index]));

      OptixShaderBindingTable sbt_params = {};
      sbt_params.raygenRecord = sbt_data.device_pointer + PG_RGEN * sizeof(SbtRecord);
@@ -663,6 +669,12 @@ class OptiXDevice : public CUDADevice {
                                     wtile.h,
                                     1));

+      // Run the adaptive sampling kernels at selected samples aligned to step samples.
+      uint filter_sample = wtile.start_sample + wtile.num_samples - 1;
+      if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(filter_sample)) {
+        adaptive_sampling_filter(filter_sample, &wtile, d_wtile_ptr, cuda_stream[thread_index]);
+      }
+
      // Wait for launch to finish
      check_result_cuda(cuStreamSynchronize(cuda_stream[thread_index]));

@@ -674,6 +686,14 @@ class OptiXDevice : public CUDADevice {
      if (task.get_cancel() && !task.need_finish_queue)
        return;  // Cancel rendering
    }
+
+    // Finalize adaptive sampling
+    if (task.adaptive_sampling.use) {
+      device_ptr d_wtile_ptr = launch_params_ptr + offsetof(KernelParams, tile);
+      adaptive_sampling_post(rtile, &wtile, d_wtile_ptr, cuda_stream[thread_index]);
+      check_result_cuda(cuStreamSynchronize(cuda_stream[thread_index]));
+      task.update_progress(&rtile, rtile.w * rtile.h * wtile.num_samples);
+    }
  }

  bool launch_denoise(DeviceTask &task, RenderTile &rtile)
@@ -813,32 +833,26 @@ class OptiXDevice : public CUDADevice {
      check_result_optix_ret(
          optixDenoiserComputeMemoryResources(denoiser, rect_size.x, rect_size.y, &sizes));

-      auto &state = denoiser_state.second;
-      auto &state_size = denoiser_state.first;
      const size_t scratch_size = sizes.recommendedScratchSizeInBytes;
      const size_t scratch_offset = sizes.stateSizeInBytes;

      // Allocate denoiser state if tile size has changed since last setup
-      if (state_size.x != rect_size.x || state_size.y != rect_size.y || recreate_denoiser) {
-        // Free existing state before allocating new one
-        if (state) {
-          cuMemFree(state);
-          state = 0;
-        }
-
-        check_result_cuda_ret(cuMemAlloc(&state, scratch_offset + scratch_size));
+      if (recreate_denoiser || (denoiser_state.data_width != rect_size.x ||
+                                denoiser_state.data_height != rect_size.y)) {
+        denoiser_state.alloc_to_device(scratch_offset + scratch_size);

        // Initialize denoiser state for the current tile size
        check_result_optix_ret(optixDenoiserSetup(denoiser,
                                                  0,
                                                  rect_size.x,
                                                  rect_size.y,
-                                                  state,
+                                                  denoiser_state.device_pointer,
                                                  scratch_offset,
-                                                  state + scratch_offset,
+                                                  denoiser_state.device_pointer + scratch_offset,
                                                  scratch_size));

-        state_size = rect_size;
+        denoiser_state.data_width = rect_size.x;
+        denoiser_state.data_height = rect_size.y;
      }

      // Set up input and output layer information
@@ -880,14 +894,14 @@ class OptiXDevice : public CUDADevice {
      check_result_optix_ret(optixDenoiserInvoke(denoiser,
                                                 0,
                                                 &params,
-                                                 state,
+                                                 denoiser_state.device_pointer,
                                                 scratch_offset,
                                                 input_layers,
                                                 task.denoising.optix_input_passes,
                                                 overlap_offset.x,
                                                 overlap_offset.y,
                                                 output_layers,
-                                                 state + scratch_offset,
+                                                 denoiser_state.device_pointer + scratch_offset,
                                                 scratch_size));

 #  if OPTIX_DENOISER_NO_PIXEL_STRIDE
@@ -1459,7 +1473,7 @@ class OptiXDevice : public CUDADevice {
      return;
    }

-    if (task.type == DeviceTask::DENOISE || task.type == DeviceTask::DENOISE_BUFFER) {
+    if (task.type == DeviceTask::DENOISE_BUFFER) {
      // Execute denoising in a single thread (e.g. to avoid race conditions during creation)
      task_pool.push(new OptiXDeviceTask(this, task, 0));
      return;
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -55,6 +55,10 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device)
  kernel_next_iteration_setup = NULL;
  kernel_indirect_subsurface = NULL;
  kernel_buffer_update = NULL;
+  kernel_adaptive_stopping = NULL;
+  kernel_adaptive_filter_x = NULL;
+  kernel_adaptive_filter_y = NULL;
+  kernel_adaptive_adjust_samples = NULL;
 }

 DeviceSplitKernel::~DeviceSplitKernel()
@@ -83,6 +87,10 @@ DeviceSplitKernel::~DeviceSplitKernel()
  delete kernel_next_iteration_setup;
  delete kernel_indirect_subsurface;
  delete kernel_buffer_update;
+  delete kernel_adaptive_stopping;
+  delete kernel_adaptive_filter_x;
+  delete kernel_adaptive_filter_y;
+  delete kernel_adaptive_adjust_samples;
 }

 bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures &requested_features)
@@ -114,6 +122,10 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures &requested_fe
  LOAD_KERNEL(next_iteration_setup);
  LOAD_KERNEL(indirect_subsurface);
  LOAD_KERNEL(buffer_update);
+  LOAD_KERNEL(adaptive_stopping);
+  LOAD_KERNEL(adaptive_filter_x);
+  LOAD_KERNEL(adaptive_filter_y);
+  LOAD_KERNEL(adaptive_adjust_samples);

 #undef LOAD_KERNEL

@@ -202,13 +214,21 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
    /* initial guess to start rolling average */
    const int initial_num_samples = 1;
    /* approx number of samples per second */
-    int samples_per_second = (avg_time_per_sample > 0.0) ?
-                                 int(double(time_multiplier) / avg_time_per_sample) + 1 :
-                                 initial_num_samples;
+    const int samples_per_second = (avg_time_per_sample > 0.0) ?
+                                       int(double(time_multiplier) / avg_time_per_sample) + 1 :
+                                       initial_num_samples;

    RenderTile subtile = tile;
    subtile.start_sample = tile.sample;
-    subtile.num_samples = min(samples_per_second,
+    subtile.num_samples = samples_per_second;
+
+    if (task->adaptive_sampling.use) {
+      subtile.num_samples = task->adaptive_sampling.align_dynamic_samples(subtile.start_sample,
+                                                                          subtile.num_samples);
+    }
+
+    /* Don't go beyond requested number of samples. */
+    subtile.num_samples = min(subtile.num_samples,
                              tile.start_sample + tile.num_samples - tile.sample);

    if (device->have_error()) {
@@ -302,6 +322,23 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
      }
    }

+    int filter_sample = tile.sample + subtile.num_samples - 1;
+    if (task->adaptive_sampling.use && task->adaptive_sampling.need_filter(filter_sample)) {
+      size_t buffer_size[2];
+      buffer_size[0] = round_up(tile.w, local_size[0]);
+      buffer_size[1] = round_up(tile.h, local_size[1]);
+      kernel_adaptive_stopping->enqueue(
+          KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+      buffer_size[0] = round_up(tile.h, local_size[0]);
+      buffer_size[1] = round_up(1, local_size[1]);
+      kernel_adaptive_filter_x->enqueue(
+          KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+      buffer_size[0] = round_up(tile.w, local_size[0]);
+      buffer_size[1] = round_up(1, local_size[1]);
+      kernel_adaptive_filter_y->enqueue(
+          KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+    }
+
    double time_per_sample = ((time_dt() - start_time) / subtile.num_samples);

    if (avg_time_per_sample == 0.0) {
@@ -324,6 +361,28 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
    }
  }

+  if (task->adaptive_sampling.use) {
+    /* Reset the start samples. */
+    RenderTile subtile = tile;
+    subtile.start_sample = tile.start_sample;
+    subtile.num_samples = tile.sample - tile.start_sample;
+    enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
+                                   subtile,
+                                   num_global_elements,
+                                   kgbuffer,
+                                   kernel_data,
+                                   split_data,
+                                   ray_state,
+                                   queue_index,
+                                   use_queues_flag,
+                                   work_pool_wgs);
+    size_t buffer_size[2];
+    buffer_size[0] = round_up(tile.w, local_size[0]);
+    buffer_size[1] = round_up(tile.h, local_size[1]);
+    kernel_adaptive_adjust_samples->enqueue(
+        KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+  }
+
  return true;
 }

--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -75,6 +75,10 @@ class DeviceSplitKernel {
  SplitKernelFunction *kernel_next_iteration_setup;
  SplitKernelFunction *kernel_indirect_subsurface;
  SplitKernelFunction *kernel_buffer_update;
+  SplitKernelFunction *kernel_adaptive_stopping;
+  SplitKernelFunction *kernel_adaptive_filter_x;
+  SplitKernelFunction *kernel_adaptive_filter_y;
+  SplitKernelFunction *kernel_adaptive_adjust_samples;

  /* Global memory variables [porting]; These memory is used for
   * co-operation between different kernels; Data written by one
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -68,7 +68,7 @@ int DeviceTask::get_subtask_count(int num, int max_size)
  if (type == SHADER) {
    num = min(shader_w, num);
  }
-  else if (type == RENDER || type == DENOISE) {
+  else if (type == RENDER) {
  }
  else {
    num = min(h, num);
@@ -94,7 +94,7 @@ void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size)
      tasks.push_back(task);
    }
  }
-  else if (type == RENDER || type == DENOISE) {
+  else if (type == RENDER) {
    for (int i = 0; i < num; i++)
      tasks.push_back(*this);
  }
@@ -136,4 +136,59 @@ void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
  }
 }

+/* Adaptive Sampling */
+
+AdaptiveSampling::AdaptiveSampling()
+    : use(true), adaptive_step(ADAPTIVE_SAMPLE_STEP), min_samples(0)
+{
+}
+
+/* Render samples in steps that align with the adaptive filtering. */
+int AdaptiveSampling::align_static_samples(int samples) const
+{
+  if (samples > adaptive_step) {
+    /* Make multiple of adaptive_step. */
+    while (samples % adaptive_step != 0) {
+      samples--;
+    }
+  }
+  else if (samples < adaptive_step) {
+    /* Make divisor of adaptive_step. */
+    while (adaptive_step % samples != 0) {
+      samples--;
+    }
+  }
+
+  return max(samples, 1);
+}
+
+/* Render samples in steps that align with the adaptive filtering, with the
+ * suggested number of samples dynamically changing. */
+int AdaptiveSampling::align_dynamic_samples(int offset, int samples) const
+{
+  /* Round so that we end up on multiples of adaptive_samples. */
+  samples += offset;
+
+  if (samples > adaptive_step) {
+    /* Make multiple of adaptive_step. */
+    while (samples % adaptive_step != 0) {
+      samples--;
+    }
+  }
+
+  samples -= offset;
+
+  return max(samples, 1);
+}
+
+bool AdaptiveSampling::need_filter(int sample) const
+{
+  if (sample > min_samples) {
+    return (sample & (adaptive_step - 1)) == (adaptive_step - 1);
+  }
+  else {
+    return false;
+  }
+}
+
 CCL_NAMESPACE_END
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -62,9 +62,22 @@ class DenoiseParams {
  }
 };

+class AdaptiveSampling {
+ public:
+  AdaptiveSampling();
+
+  int align_static_samples(int samples) const;
+  int align_dynamic_samples(int offset, int samples) const;
+  bool need_filter(int sample) const;
+
+  bool use;
+  int adaptive_step;
+  int min_samples;
+};
+
 class DeviceTask : public Task {
 public:
-  typedef enum { RENDER, DENOISE, DENOISE_BUFFER, FILM_CONVERT, SHADER } Type;
+  typedef enum { RENDER, FILM_CONVERT, SHADER, DENOISE_BUFFER } Type;
  Type type;

  int x, y, w, h;
@@ -90,7 +103,7 @@ class DeviceTask : public Task {

  void update_progress(RenderTile *rtile, int pixel_samples = -1);

-  function<bool(Device *device, RenderTile &)> acquire_tile;
+  function<bool(Device *device, RenderTile &, uint)> acquire_tile;
  function<void(long, int)> update_progress_sample;
  function<void(RenderTile &)> update_tile_sample;
  function<void(RenderTile &)> release_tile;
@@ -98,6 +111,7 @@ class DeviceTask : public Task {
  function<void(RenderTile *, Device *)> map_neighbor_tiles;
  function<void(RenderTile *, Device *)> unmap_neighbor_tiles;

+  uint tile_types;
  DenoiseParams denoising;
  bool denoising_from_render;
  vector<int> denoising_frames;
@@ -114,6 +128,7 @@ class DeviceTask : public Task {

  bool need_finish_queue;
  bool integrator_branched;
+  AdaptiveSampling adaptive_sampling;

 protected:
  double last_update_time;
--- a/intern/cycles/device/opencl/device_opencl.h
+++ b/intern/cycles/device/opencl/device_opencl.h
@@ -445,6 +445,7 @@ class OpenCLDevice : public Device {
                    device_ptr rgba_byte,
                    device_ptr rgba_half);
  void shader(DeviceTask &task);
+  void update_adaptive(DeviceTask &task, RenderTile &tile, int sample);

  void denoise(RenderTile &tile, DenoisingTask &denoising);

--- a/intern/cycles/device/opencl/device_opencl_impl.cpp
+++ b/intern/cycles/device/opencl/device_opencl_impl.cpp
@@ -56,7 +56,11 @@ static const string SPLIT_BUNDLE_KERNELS =
    "enqueue_inactive "
    "next_iteration_setup "
    "indirect_subsurface "
-    "buffer_update";
+    "buffer_update "
+    "adaptive_stopping "
+    "adaptive_filter_x "
+    "adaptive_filter_y "
+    "adaptive_adjust_samples";

 const string OpenCLDevice::get_opencl_program_name(const string &kernel_name)
 {
@@ -283,6 +287,10 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
    ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup);
    ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface);
    ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update);
+    ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_stopping);
+    ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_filter_x);
+    ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_filter_y);
+    ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_adjust_samples);
    programs.push_back(&program_split);

 #  undef ADD_SPLIT_KERNEL_PROGRAM
@@ -1308,7 +1316,7 @@ void OpenCLDevice::thread_run(DeviceTask *task)
 {
  flush_texture_buffers();

-  if (task->type == DeviceTask::RENDER || task->type == DeviceTask::DENOISE) {
+  if (task->type == DeviceTask::RENDER) {
    RenderTile tile;
    DenoisingTask denoising(this, *task);

@@ -1317,7 +1325,7 @@ void OpenCLDevice::thread_run(DeviceTask *task)
    kgbuffer.alloc_to_device(1);

    /* Keep rendering tiles until done. */
-    while (task->acquire_tile(this, tile)) {
+    while (task->acquire_tile(this, tile, task->tile_types)) {
      if (tile.task == RenderTile::PATH_TRACE) {
        assert(tile.task == RenderTile::PATH_TRACE);
        scoped_timer timer(&tile.buffers->render_time);
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -36,6 +36,10 @@ set(SRC_CUDA_KERNELS
 )

 set(SRC_OPENCL_KERNELS
+  kernels/opencl/kernel_adaptive_stopping.cl
+  kernels/opencl/kernel_adaptive_filter_x.cl
+  kernels/opencl/kernel_adaptive_filter_y.cl
+  kernels/opencl/kernel_adaptive_adjust_samples.cl
  kernels/opencl/kernel_bake.cl
  kernels/opencl/kernel_base.cl
  kernels/opencl/kernel_displace.cl
@@ -94,6 +98,7 @@ set(SRC_BVH_HEADERS

 set(SRC_HEADERS
  kernel_accumulate.h
+  kernel_adaptive_sampling.h
  kernel_bake.h
  kernel_camera.h
  kernel_color.h
@@ -324,6 +329,10 @@ set(SRC_UTIL_HEADERS
 )

 set(SRC_SPLIT_HEADERS
+  split/kernel_adaptive_adjust_samples.h
+  split/kernel_adaptive_filter_x.h
+  split/kernel_adaptive_filter_y.h
+  split/kernel_adaptive_stopping.h
  split/kernel_branched.h
  split/kernel_buffer_update.h
  split/kernel_data_init.h
--- a/intern/cycles/kernel/bvh/bvh_embree.h
+++ b/intern/cycles/kernel/bvh/bvh_embree.h
@@ -17,9 +17,12 @@
 #include <embree3/rtcore_ray.h>
 #include <embree3/rtcore_scene.h>

+// clang-format off
 #include "kernel/kernel_compat_cpu.h"
 #include "kernel/split/kernel_split_data_types.h"
 #include "kernel/kernel_globals.h"
+// clang-format on
+
 #include "util/util_vector.h"

 CCL_NAMESPACE_BEGIN
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -14,6 +14,7 @@
 * limitations under the License.
 */

+// clang-format off
 #include "kernel/closure/bsdf_ashikhmin_velvet.h"
 #include "kernel/closure/bsdf_diffuse.h"
 #include "kernel/closure/bsdf_oren_nayar.h"
@@ -32,6 +33,7 @@
 #include "kernel/closure/bsdf_principled_sheen.h"
 #include "kernel/closure/bssrdf.h"
 #include "kernel/closure/volume.h"
+// clang-format on

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@ -14,6 +14,7 @@
 * limitations under the License.
 */

+// clang-format off
 #include "kernel/geom/geom_attribute.h"
 #include "kernel/geom/geom_object.h"
 #ifdef __PATCH_EVAL__
@@ -30,3 +31,4 @@
 #include "kernel/geom/geom_curve_intersect.h"
 #include "kernel/geom/geom_volume.h"
 #include "kernel/geom/geom_primitive.h"
+// clang-format on
--- a/intern/cycles/kernel/kernel_adaptive_sampling.h
+++ b/intern/cycles/kernel/kernel_adaptive_sampling.h
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KERNEL_ADAPTIVE_SAMPLING_H__
+#define __KERNEL_ADAPTIVE_SAMPLING_H__
+
+CCL_NAMESPACE_BEGIN
+
+/* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */
+
+ccl_device void kernel_do_adaptive_stopping(KernelGlobals *kg,
+                                            ccl_global float *buffer,
+                                            int sample)
+{
+  /* TODO Stefan: Is this better in linear, sRGB or something else? */
+  float4 I = *((ccl_global float4 *)buffer);
+  float4 A = *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+  /* The per pixel error as seen in section 2.1 of
+   * "A hierarchical automatic stopping condition for Monte Carlo global illumination"
+   * A small epsilon is added to the divisor to prevent division by zero. */
+  float error = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) /
+                (sample * 0.0001f + sqrtf(I.x + I.y + I.z));
+  if (error < kernel_data.integrator.adaptive_threshold * (float)sample) {
+    /* Set the fourth component to non-zero value to indicate that this pixel has converged. */
+    buffer[kernel_data.film.pass_adaptive_aux_buffer + 3] += 1.0f;
+  }
+}
+
+/* Adjust the values of an adaptively sampled pixel. */
+
+ccl_device void kernel_adaptive_post_adjust(KernelGlobals *kg,
+                                            ccl_global float *buffer,
+                                            float sample_multiplier)
+{
+  *(ccl_global float4 *)(buffer) *= sample_multiplier;
+
+  /* Scale the aux pass too, this is necessary for progressive rendering to work properly. */
+  kernel_assert(kernel_data.film.pass_adaptive_aux_buffer);
+  *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer) *= sample_multiplier;
+
+#ifdef __PASSES__
+  int flag = kernel_data.film.pass_flag;
+
+  if (flag & PASSMASK(SHADOW))
+    *(ccl_global float3 *)(buffer + kernel_data.film.pass_shadow) *= sample_multiplier;
+
+  if (flag & PASSMASK(MIST))
+    *(ccl_global float *)(buffer + kernel_data.film.pass_mist) *= sample_multiplier;
+
+  if (flag & PASSMASK(NORMAL))
+    *(ccl_global float3 *)(buffer + kernel_data.film.pass_normal) *= sample_multiplier;
+
+  if (flag & PASSMASK(UV))
+    *(ccl_global float3 *)(buffer + kernel_data.film.pass_uv) *= sample_multiplier;
+
+  if (flag & PASSMASK(MOTION)) {
+    *(ccl_global float4 *)(buffer + kernel_data.film.pass_motion) *= sample_multiplier;
+    *(ccl_global float *)(buffer + kernel_data.film.pass_motion_weight) *= sample_multiplier;
+  }
+
+  if (kernel_data.film.use_light_pass) {
+    int light_flag = kernel_data.film.light_pass_flag;
+
+    if (light_flag & PASSMASK(DIFFUSE_INDIRECT))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_indirect) *= sample_multiplier;
+    if (light_flag & PASSMASK(GLOSSY_INDIRECT))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_indirect) *= sample_multiplier;
+    if (light_flag & PASSMASK(TRANSMISSION_INDIRECT))
+      *(ccl_global float3 *)(buffer +
+                             kernel_data.film.pass_transmission_indirect) *= sample_multiplier;
+    if (light_flag & PASSMASK(VOLUME_INDIRECT))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_volume_indirect) *= sample_multiplier;
+    if (light_flag & PASSMASK(DIFFUSE_DIRECT))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_direct) *= sample_multiplier;
+    if (light_flag & PASSMASK(GLOSSY_DIRECT))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_direct) *= sample_multiplier;
+    if (light_flag & PASSMASK(TRANSMISSION_DIRECT))
+      *(ccl_global float3 *)(buffer +
+                             kernel_data.film.pass_transmission_direct) *= sample_multiplier;
+    if (light_flag & PASSMASK(VOLUME_DIRECT))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_volume_direct) *= sample_multiplier;
+
+    if (light_flag & PASSMASK(EMISSION))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_emission) *= sample_multiplier;
+    if (light_flag & PASSMASK(BACKGROUND))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_background) *= sample_multiplier;
+    if (light_flag & PASSMASK(AO))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_ao) *= sample_multiplier;
+
+    if (light_flag & PASSMASK(DIFFUSE_COLOR))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_color) *= sample_multiplier;
+    if (light_flag & PASSMASK(GLOSSY_COLOR))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_color) *= sample_multiplier;
+    if (light_flag & PASSMASK(TRANSMISSION_COLOR))
+      *(ccl_global float3 *)(buffer +
+                             kernel_data.film.pass_transmission_color) *= sample_multiplier;
+  }
+#endif
+
+#ifdef __DENOISING_FEATURES__
+
+#  define scale_float3_variance(buffer, offset, scale) \
+    *(buffer + offset) *= scale; \
+    *(buffer + offset + 1) *= scale; \
+    *(buffer + offset + 2) *= scale; \
+    *(buffer + offset + 3) *= scale * scale; \
+    *(buffer + offset + 4) *= scale * scale; \
+    *(buffer + offset + 5) *= scale * scale;
+
+#  define scale_shadow_variance(buffer, offset, scale) \
+    *(buffer + offset) *= scale; \
+    *(buffer + offset + 1) *= scale; \
+    *(buffer + offset + 2) *= scale * scale;
+
+  if (kernel_data.film.pass_denoising_data) {
+    scale_shadow_variance(
+        buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_SHADOW_A, sample_multiplier);
+    scale_shadow_variance(
+        buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_SHADOW_B, sample_multiplier);
+    if (kernel_data.film.pass_denoising_clean) {
+      scale_float3_variance(
+          buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample_multiplier);
+      *(buffer + kernel_data.film.pass_denoising_clean) *= sample_multiplier;
+      *(buffer + kernel_data.film.pass_denoising_clean + 1) *= sample_multiplier;
+      *(buffer + kernel_data.film.pass_denoising_clean + 2) *= sample_multiplier;
+    }
+    else {
+      scale_float3_variance(
+          buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample_multiplier);
+    }
+    scale_float3_variance(
+        buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL, sample_multiplier);
+    scale_float3_variance(
+        buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO, sample_multiplier);
+    *(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH) *= sample_multiplier;
+    *(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH +
+      1) *= sample_multiplier * sample_multiplier;
+  }
+#endif /* __DENOISING_FEATURES__ */
+
+  if (kernel_data.film.cryptomatte_passes) {
+    int num_slots = 0;
+    num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) ? 1 : 0;
+    num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) ? 1 : 0;
+    num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) ? 1 : 0;
+    num_slots = num_slots * 2 * kernel_data.film.cryptomatte_depth;
+    ccl_global float2 *id_buffer = (ccl_global float2 *)(buffer +
+                                                         kernel_data.film.pass_cryptomatte);
+    for (int slot = 0; slot < num_slots; slot++) {
+      id_buffer[slot].y *= sample_multiplier;
+    }
+  }
+}
+
+/* This is a simple box filter in two passes.
+ * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */
+
+ccl_device bool kernel_do_adaptive_filter_x(KernelGlobals *kg, int y, ccl_global WorkTile *tile)
+{
+  bool any = false;
+  bool prev = false;
+  for (int x = tile->x; x < tile->x + tile->w; ++x) {
+    int index = tile->offset + x + y * tile->stride;
+    ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+    ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+                                                   kernel_data.film.pass_adaptive_aux_buffer);
+    if (aux->w == 0.0f) {
+      any = true;
+      if (x > tile->x && !prev) {
+        index = index - 1;
+        buffer = tile->buffer + index * kernel_data.film.pass_stride;
+        aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+        aux->w = 0.0f;
+      }
+      prev = true;
+    }
+    else {
+      if (prev) {
+        aux->w = 0.0f;
+      }
+      prev = false;
+    }
+  }
+  return any;
+}
+
+ccl_device bool kernel_do_adaptive_filter_y(KernelGlobals *kg, int x, ccl_global WorkTile *tile)
+{
+  bool prev = false;
+  bool any = false;
+  for (int y = tile->y; y < tile->y + tile->h; ++y) {
+    int index = tile->offset + x + y * tile->stride;
+    ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+    ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+                                                   kernel_data.film.pass_adaptive_aux_buffer);
+    if (aux->w == 0.0f) {
+      any = true;
+      if (y > tile->y && !prev) {
+        index = index - tile->stride;
+        buffer = tile->buffer + index * kernel_data.film.pass_stride;
+        aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+        aux->w = 0.0f;
+      }
+      prev = true;
+    }
+    else {
+      if (prev) {
+        aux->w = 0.0f;
+      }
+      prev = false;
+    }
+  }
+  return any;
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __KERNEL_ADAPTIVE_SAMPLING_H__ */
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -195,4 +195,35 @@ ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy)
 }
 #endif

+ccl_device float pmj_sample_1D(KernelGlobals *kg, int sample, int rng_hash, int dimension)
+{
+  /* Fallback to random */
+  if (sample > NUM_PMJ_SAMPLES) {
+    int p = rng_hash + dimension;
+    return cmj_randfloat(sample, p);
+  }
+  uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
+  int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
+  return __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ (tmp_rng & 0x007fffff)) -
+         1.0f;
+}
+
+ccl_device void pmj_sample_2D(
+    KernelGlobals *kg, int sample, int rng_hash, int dimension, float *fx, float *fy)
+{
+  if (sample > NUM_PMJ_SAMPLES) {
+    int p = rng_hash + dimension;
+    *fx = cmj_randfloat(sample, p);
+    *fy = cmj_randfloat(sample, p + 1);
+  }
+  uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
+  int index = ((dimension % NUM_PMJ_PATTERNS) * NUM_PMJ_SAMPLES + sample) * 2;
+  *fx = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ (tmp_rng & 0x007fffff)) -
+        1.0f;
+  tmp_rng = cmj_hash_simple(dimension + 1, rng_hash);
+  *fy = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index + 1) ^
+                        (tmp_rng & 0x007fffff)) -
+        1.0f;
+}
+
 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -29,7 +29,9 @@ ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg,
  if (kernel_data.film.pass_denoising_data == 0)
    return;

-  buffer += (sample & 1) ? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A;
+  buffer += sample_is_even(kernel_data.integrator.sampling_pattern, sample) ?
+                DENOISING_PASS_SHADOW_B :
+                DENOISING_PASS_SHADOW_A;

  path_total = ensure_finite(path_total);
  path_total_shaded = ensure_finite(path_total_shaded);
@@ -386,6 +388,41 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg,
 #ifdef __KERNEL_DEBUG__
  kernel_write_debug_passes(kg, buffer, L);
 #endif
+
+  /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping
+     criteria. This is the heuristic from "A hierarchical automatic stopping condition for Monte
+     Carlo global illumination" except that here it is applied per pixel and not in hierarchical
+     tiles. */
+  if (kernel_data.film.pass_adaptive_aux_buffer &&
+      kernel_data.integrator.adaptive_threshold > 0.0f) {
+    if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) {
+      kernel_write_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer,
+                               make_float4(L_sum.x * 2.0f, L_sum.y * 2.0f, L_sum.z * 2.0f, 0.0f));
+    }
+#ifdef __KERNEL_CPU__
+    if (sample > kernel_data.integrator.adaptive_min_samples &&
+        (sample & (ADAPTIVE_SAMPLE_STEP - 1)) == (ADAPTIVE_SAMPLE_STEP - 1)) {
+      kernel_do_adaptive_stopping(kg, buffer, sample);
+    }
+#endif
+  }
+
+  /* Write the sample count as negative numbers initially to mark the samples as in progress.
+   * Once the tile has finished rendering, the sign gets flipped and all the pixel values
+   * are scaled as if they were taken at a uniform sample count. */
+  if (kernel_data.film.pass_sample_count) {
+    /* Make sure it's a negative number. In progressive refine mode, this bit gets flipped between
+     * passes. */
+#ifdef __ATOMIC_PASS_WRITE__
+    atomic_fetch_and_or_uint32((ccl_global uint *)(buffer + kernel_data.film.pass_sample_count),
+                               0x80000000);
+#else
+    if (buffer[kernel_data.film.pass_sample_count] > 0) {
+      buffer[kernel_data.film.pass_sample_count] *= -1.0f;
+    }
+#endif
+    kernel_write_pass_float(buffer + kernel_data.film.pass_sample_count, -1.0f);
+  }
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -18,6 +18,7 @@
 #  include "kernel/osl/osl_shader.h"
 #endif

+// clang-format off
 #include "kernel/kernel_random.h"
 #include "kernel/kernel_projection.h"
 #include "kernel/kernel_montecarlo.h"
@@ -31,6 +32,7 @@
 #include "kernel/kernel_accumulate.h"
 #include "kernel/kernel_shader.h"
 #include "kernel/kernel_light.h"
+#include "kernel/kernel_adaptive_sampling.h"
 #include "kernel/kernel_passes.h"

 #if defined(__VOLUME__) || defined(__SUBSURFACE__)
@@ -48,6 +50,7 @@
 #include "kernel/kernel_path_surface.h"
 #include "kernel/kernel_path_volume.h"
 #include "kernel/kernel_path_subsurface.h"
+// clang-format on

 CCL_NAMESPACE_BEGIN

@@ -656,6 +659,14 @@ ccl_device void kernel_path_trace(

  buffer += index * pass_stride;

+  if (kernel_data.film.pass_adaptive_aux_buffer) {
+    ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+                                                   kernel_data.film.pass_adaptive_aux_buffer);
+    if (aux->w > 0.0f) {
+      return;
+    }
+  }
+
  /* Initialize random numbers and sample ray. */
  uint rng_hash;
  Ray ray;
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -523,6 +523,14 @@ ccl_device void kernel_branched_path_trace(

  buffer += index * pass_stride;

+  if (kernel_data.film.pass_adaptive_aux_buffer) {
+    ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+                                                   kernel_data.film.pass_adaptive_aux_buffer);
+    if (aux->w > 0.0f) {
+      return;
+    }
+  }
+
  /* initialize random numbers and ray */
  uint rng_hash;
  Ray ray;
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -43,7 +43,7 @@ ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension)
  uint i = index + SOBOL_SKIP;
  for (int j = 0, x; (x = find_first_set(i)); i >>= x) {
    j += x;
-    result ^= kernel_tex_fetch(__sobol_directions, 32 * dimension + j - 1);
+    result ^= kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j - 1);
  }
  return result;
 }
@@ -56,7 +56,9 @@ ccl_device_forceinline float path_rng_1D(
 #ifdef __DEBUG_CORRELATION__
  return (float)drand48();
 #endif
-
+  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) {
+    return pmj_sample_1D(kg, sample, rng_hash, dimension);
+  }
 #ifdef __CMJ__
 #  ifdef __SOBOL__
  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
@@ -99,7 +101,10 @@ ccl_device_forceinline void path_rng_2D(KernelGlobals *kg,
  *fy = (float)drand48();
  return;
 #endif
-
+  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) {
+    pmj_sample_2D(kg, sample, rng_hash, dimension, fx, fy);
+    return;
+  }
 #ifdef __CMJ__
 #  ifdef __SOBOL__
  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
@@ -284,4 +289,28 @@ ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng)
  return (float)*rng * (1.0f / (float)0xFFFFFFFF);
 }

+ccl_device_inline bool sample_is_even(int pattern, int sample)
+{
+  if (pattern == SAMPLING_PATTERN_PMJ) {
+    /* See Section 10.2.1, "Progressive Multi-Jittered Sample Sequences", Christensen et al.
+     * We can use this to get divide sample sequence into two classes for easier variance
+     * estimation. There must be a more elegant way of writing this? */
+#if defined(__GNUC__) && !defined(__KERNEL_GPU__)
+    return __builtin_popcount(sample & 0xaaaaaaaa) & 1;
+#elif defined(__NVCC__)
+    return __popc(sample & 0xaaaaaaaa) & 1;
+#else
+    int i = sample & 0xaaaaaaaa;
+    i = i - ((i >> 1) & 0x55555555);
+    i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
+    i = (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
+    return i & 1;
+#endif
+  }
+  else {
+    /* TODO(Stefan): Are there reliable ways of dividing CMJ and Sobol into two classes? */
+    return sample & 0x1;
+  }
+}
+
 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -23,10 +23,12 @@
 * Release.
 */

+// clang-format off
 #include "kernel/closure/alloc.h"
 #include "kernel/closure/bsdf_util.h"
 #include "kernel/closure/bsdf.h"
 #include "kernel/closure/emissive.h"
+// clang-format on

 #include "kernel/svm/svm.h"

--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -77,7 +77,7 @@ KERNEL_TEX(KernelShader, __shaders)
 KERNEL_TEX(float, __lookup_table)

 /* sobol */
-KERNEL_TEX(uint, __sobol_directions)
+KERNEL_TEX(uint, __sample_pattern_lut)

 /* image textures */
 KERNEL_TEX(TextureInfo, __texture_info)
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -63,6 +63,11 @@ CCL_NAMESPACE_BEGIN

 #define VOLUME_STACK_SIZE 32

+/* Adaptive sampling constants */
+#define ADAPTIVE_SAMPLE_STEP 4
+static_assert((ADAPTIVE_SAMPLE_STEP & (ADAPTIVE_SAMPLE_STEP - 1)) == 0,
+              "ADAPTIVE_SAMPLE_STEP must be power of two for bitwise operations to work");
+
 /* Split kernel constants */
 #define WORK_POOL_SIZE_GPU 64
 #define WORK_POOL_SIZE_CPU 1
@@ -267,6 +272,7 @@ enum PathTraceDimension {
 enum SamplingPattern {
  SAMPLING_PATTERN_SOBOL = 0,
  SAMPLING_PATTERN_CMJ = 1,
+  SAMPLING_PATTERN_PMJ = 2,

  SAMPLING_NUM_PATTERNS,
 };
@@ -373,6 +379,8 @@ typedef enum PassType {
  PASS_CRYPTOMATTE,
  PASS_AOV_COLOR,
  PASS_AOV_VALUE,
+  PASS_ADAPTIVE_AUX_BUFFER,
+  PASS_SAMPLE_COUNT,
  PASS_CATEGORY_MAIN_END = 31,

  PASS_MIST = 32,
@@ -1222,6 +1230,9 @@ typedef struct KernelFilm {
  int cryptomatte_depth;
  int pass_cryptomatte;

+  int pass_adaptive_aux_buffer;
+  int pass_sample_count;
+
  int pass_mist;
  float mist_start;
  float mist_inv_depth;
@@ -1255,6 +1266,8 @@ typedef struct KernelFilm {
  int display_divide_pass_stride;
  int use_display_exposure;
  int use_display_pass_alpha;
+
+  int pad3, pad4, pad5;
 } KernelFilm;
 static_assert_align(KernelFilm, 16);

@@ -1336,6 +1349,8 @@ typedef struct KernelIntegrator {
  /* sampler */
  int sampling_pattern;
  int aa_samples;
+  int adaptive_min_samples;
+  float adaptive_threshold;

  /* volume render */
  int use_volumes;
@@ -1347,7 +1362,7 @@ typedef struct KernelIntegrator {

  int max_closures;

-  int pad1;
+  int pad1, pad2, pad3;
 } KernelIntegrator;
 static_assert_align(KernelIntegrator, 16);

@@ -1661,12 +1676,16 @@ typedef struct WorkTile {
  uint start_sample;
  uint num_samples;

-  uint offset;
+  int offset;
  uint stride;

  ccl_global float *buffer;
 } WorkTile;

+/* Precoumputed sample table sizes for PMJ02 sampler. */
+#define NUM_PMJ_SAMPLES 64 * 64
+#define NUM_PMJ_PATTERNS 48
+
 CCL_NAMESPACE_END

 #endif /*  __KERNEL_TYPES_H__ */
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -23,41 +23,6 @@ CCL_NAMESPACE_BEGIN
 * Utility functions for work stealing
 */

-#ifdef __KERNEL_OPENCL__
-#  pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
-#endif
-
-#ifdef __SPLIT_KERNEL__
-/* Returns true if there is work */
-ccl_device bool get_next_work(KernelGlobals *kg,
-                              ccl_global uint *work_pools,
-                              uint total_work_size,
-                              uint ray_index,
-                              ccl_private uint *global_work_index)
-{
-  /* With a small amount of work there may be more threads than work due to
-   * rounding up of global size, stop such threads immediately. */
-  if (ray_index >= total_work_size) {
-    return false;
-  }
-
-  /* Increase atomic work index counter in pool. */
-  uint pool = ray_index / WORK_POOL_SIZE;
-  uint work_index = atomic_fetch_and_inc_uint32(&work_pools[pool]);
-
-  /* Map per-pool work index to a global work index. */
-  uint global_size = ccl_global_size(0) * ccl_global_size(1);
-  kernel_assert(global_size % WORK_POOL_SIZE == 0);
-  kernel_assert(ray_index < global_size);
-
-  *global_work_index = (work_index / WORK_POOL_SIZE) * global_size + (pool * WORK_POOL_SIZE) +
-                       (work_index % WORK_POOL_SIZE);
-
-  /* Test if all work for this pool is done. */
-  return (*global_work_index < total_work_size);
-}
-#endif
-
 /* Map global work index to tile, pixel X/Y and sample. */
 ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
                                      uint global_work_index,
@@ -82,6 +47,71 @@ ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
  *sample = tile->start_sample + sample_offset;
 }

+#ifdef __KERNEL_OPENCL__
+#  pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
+#endif
+
+#ifdef __SPLIT_KERNEL__
+/* Returns true if there is work */
+ccl_device bool get_next_work_item(KernelGlobals *kg,
+                                   ccl_global uint *work_pools,
+                                   uint total_work_size,
+                                   uint ray_index,
+                                   ccl_private uint *global_work_index)
+{
+  /* With a small amount of work there may be more threads than work due to
+   * rounding up of global size, stop such threads immediately. */
+  if (ray_index >= total_work_size) {
+    return false;
+  }
+
+  /* Increase atomic work index counter in pool. */
+  uint pool = ray_index / WORK_POOL_SIZE;
+  uint work_index = atomic_fetch_and_inc_uint32(&work_pools[pool]);
+
+  /* Map per-pool work index to a global work index. */
+  uint global_size = ccl_global_size(0) * ccl_global_size(1);
+  kernel_assert(global_size % WORK_POOL_SIZE == 0);
+  kernel_assert(ray_index < global_size);
+
+  *global_work_index = (work_index / WORK_POOL_SIZE) * global_size + (pool * WORK_POOL_SIZE) +
+                       (work_index % WORK_POOL_SIZE);
+
+  /* Test if all work for this pool is done. */
+  return (*global_work_index < total_work_size);
+}
+
+ccl_device bool get_next_work(KernelGlobals *kg,
+                              ccl_global uint *work_pools,
+                              uint total_work_size,
+                              uint ray_index,
+                              ccl_private uint *global_work_index)
+{
+  bool got_work = false;
+  if (kernel_data.film.pass_adaptive_aux_buffer) {
+    do {
+      got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index);
+      if (got_work) {
+        ccl_global WorkTile *tile = &kernel_split_params.tile;
+        uint x, y, sample;
+        get_work_pixel(tile, *global_work_index, &x, &y, &sample);
+        uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride;
+        ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+        ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+                                                       kernel_data.film.pass_adaptive_aux_buffer);
+        if (aux->w == 0.0f) {
+          break;
+        }
+      }
+    } while (got_work);
+  }
+  else {
+    got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index);
+  }
+  return got_work;
+}
+#endif
+
 CCL_NAMESPACE_END

 #endif /* __KERNEL_WORK_STEALING_H__ */
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -89,5 +89,9 @@ DECLARE_SPLIT_KERNEL_FUNCTION(enqueue_inactive)
 DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
 DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
 DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)

 #undef KERNEL_ARCH
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -20,6 +20,7 @@
 * simply includes this file without worry of copying actual implementation over.
 */

+// clang-format off
 #include "kernel/kernel_compat_cpu.h"

 #ifndef KERNEL_STUB
@@ -58,6 +59,10 @@
 #    include "kernel/split/kernel_next_iteration_setup.h"
 #    include "kernel/split/kernel_indirect_subsurface.h"
 #    include "kernel/split/kernel_buffer_update.h"
+#    include "kernel/split/kernel_adaptive_stopping.h"
+#    include "kernel/split/kernel_adaptive_filter_x.h"
+#    include "kernel/split/kernel_adaptive_filter_y.h"
+#    include "kernel/split/kernel_adaptive_adjust_samples.h"
 #  endif /* __SPLIT_KERNEL__ */
 #else
 #  define STUB_ASSERT(arch, name) \
@@ -67,6 +72,7 @@
 #    include "kernel/split/kernel_data_init.h"
 #  endif /* __SPLIT_KERNEL__ */
 #endif   /* KERNEL_STUB */
+// clang-format on

 CCL_NAMESPACE_BEGIN

@@ -204,6 +210,10 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
 #endif   /* __SPLIT_KERNEL__ */

 #undef KERNEL_STUB
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@@ -33,6 +33,7 @@
 #include "kernel/kernel_path_branched.h"
 #include "kernel/kernel_bake.h"
 #include "kernel/kernel_work_stealing.h"
+#include "kernel/kernel_adaptive_sampling.h"

 /* kernels */
 extern "C" __global__ void
@@ -81,6 +82,75 @@ kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size)
 }
 #endif

+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_stopping(WorkTile *tile, int sample, uint total_work_size)
+{
+	int work_index = ccl_global_id(0);
+	bool thread_is_active = work_index < total_work_size;
+	KernelGlobals kg;
+	if(thread_is_active && kernel_data.film.pass_adaptive_aux_buffer) {
+		uint x = tile->x + work_index % tile->w;
+		uint y = tile->y + work_index / tile->w;
+		int index = tile->offset + x + y * tile->stride;
+		ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+		kernel_do_adaptive_stopping(&kg, buffer, sample);
+	}
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_filter_x(WorkTile *tile, int sample, uint)
+{
+	KernelGlobals kg;
+	if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) {
+		if(ccl_global_id(0) < tile->h) {
+			int y = tile->y + ccl_global_id(0);
+			kernel_do_adaptive_filter_x(&kg, y, tile);
+		}
+	}
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_filter_y(WorkTile *tile, int sample, uint)
+{
+	KernelGlobals kg;
+	if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) {
+		if(ccl_global_id(0) < tile->w) {
+			int x = tile->x + ccl_global_id(0);
+			kernel_do_adaptive_filter_y(&kg, x, tile);
+		}
+	}
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_scale_samples(WorkTile *tile, int start_sample, int sample, uint total_work_size)
+{
+	if(kernel_data.film.pass_adaptive_aux_buffer) {
+		int work_index = ccl_global_id(0);
+		bool thread_is_active = work_index < total_work_size;
+		KernelGlobals kg;
+		if(thread_is_active) {
+			uint x = tile->x + work_index % tile->w;
+			uint y = tile->y + work_index / tile->w;
+			int index = tile->offset + x + y * tile->stride;
+			ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+			if(buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+				buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
+				float sample_multiplier = sample / max((float)start_sample + 1.0f, buffer[kernel_data.film.pass_sample_count]);
+				if(sample_multiplier != 1.0f) {
+					kernel_adaptive_post_adjust(&kg, buffer, sample_multiplier);
+				}
+			}
+			else {
+				kernel_adaptive_post_adjust(&kg, buffer, sample / (sample - 1.0f));
+			}
+		}
+	}
+}
+
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
 kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -43,6 +43,10 @@
 #include "kernel/split/kernel_next_iteration_setup.h"
 #include "kernel/split/kernel_indirect_subsurface.h"
 #include "kernel/split/kernel_buffer_update.h"
+#include "kernel/split/kernel_adaptive_stopping.h"
+#include "kernel/split/kernel_adaptive_filter_x.h"
+#include "kernel/split/kernel_adaptive_filter_y.h"
+#include "kernel/split/kernel_adaptive_adjust_samples.h"

 #include "kernel/kernel_film.h"

@@ -121,6 +125,10 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)

 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_adjust_samples.h"
+
+#define KERNEL_NAME adaptive_adjust_samples
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_filter_x.h"
+
+#define KERNEL_NAME adaptive_filter_x
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_filter_y.h"
+
+#define KERNEL_NAME adaptive_filter_y
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_stopping.h"
+
+#define KERNEL_NAME adaptive_stopping
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
--- a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
@@ -28,3 +28,7 @@
 #include "kernel/kernels/opencl/kernel_next_iteration_setup.cl"
 #include "kernel/kernels/opencl/kernel_indirect_subsurface.cl"
 #include "kernel/kernels/opencl/kernel_buffer_update.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_stopping.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_filter_x.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_filter_y.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl"
--- a/intern/cycles/kernel/osl/background.cpp
+++ b/intern/cycles/kernel/osl/background.cpp
@@ -36,9 +36,11 @@

 #include "kernel/osl/osl_closures.h"

+// clang-format off
 #include "kernel/kernel_compat_cpu.h"
 #include "kernel/closure/alloc.h"
 #include "kernel/closure/emissive.h"
+// clang-format on

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
+++ b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
@@ -37,10 +37,12 @@
 #include "kernel/kernel_compat_cpu.h"
 #include "kernel/osl/osl_closures.h"

+// clang-format off
 #include "kernel/kernel_types.h"
 #include "kernel/kernel_montecarlo.h"
 #include "kernel/closure/alloc.h"
 #include "kernel/closure/bsdf_diffuse_ramp.h"
+// clang-format on

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
+++ b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
@@ -37,9 +37,11 @@
 #include "kernel/kernel_compat_cpu.h"
 #include "kernel/osl/osl_closures.h"

+// clang-format off
 #include "kernel/kernel_types.h"
 #include "kernel/closure/alloc.h"
 #include "kernel/closure/bsdf_phong_ramp.h"
+// clang-format on

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/kernel/osl/emissive.cpp
+++ b/intern/cycles/kernel/osl/emissive.cpp
@@ -36,10 +36,12 @@

 #include "kernel/osl/osl_closures.h"

+// clang-format off
 #include "kernel/kernel_compat_cpu.h"
 #include "kernel/kernel_types.h"
 #include "kernel/closure/alloc.h"
 #include "kernel/closure/emissive.h"
+// clang-format on

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/kernel/osl/osl_bssrdf.cpp
+++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp
@@ -35,6 +35,7 @@
 #include "kernel/kernel_compat_cpu.h"
 #include "kernel/osl/osl_closures.h"

+// clang-format off
 #include "kernel/kernel_types.h"
 #include "kernel/kernel_montecarlo.h"

@@ -43,6 +44,7 @@
 #include "kernel/closure/bsdf_diffuse.h"
 #include "kernel/closure/bsdf_principled_diffuse.h"
 #include "kernel/closure/bssrdf.h"
+// clang-format on

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/kernel/osl/osl_closures.cpp
+++ b/intern/cycles/kernel/osl/osl_closures.cpp
@@ -39,6 +39,7 @@
 #include "util/util_math.h"
 #include "util/util_param.h"

+// clang-format off
 #include "kernel/kernel_types.h"
 #include "kernel/kernel_compat_cpu.h"
 #include "kernel/split/kernel_split_data_types.h"
@@ -63,6 +64,7 @@
 #include "kernel/closure/bsdf_principled_diffuse.h"
 #include "kernel/closure/bsdf_principled_sheen.h"
 #include "kernel/closure/volume.h"
+// clang-format on

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -39,6 +39,7 @@
 #include "util/util_logging.h"
 #include "util/util_string.h"

+// clang-format off
 #include "kernel/kernel_compat_cpu.h"
 #include "kernel/split/kernel_split_data_types.h"
 #include "kernel/kernel_globals.h"
@@ -56,6 +57,7 @@
 #include "kernel/kernel_projection.h"
 #include "kernel/kernel_accumulate.h"
 #include "kernel/kernel_shader.h"
+// clang-format on

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/kernel/osl/osl_shader.cpp
+++ b/intern/cycles/kernel/osl/osl_shader.cpp
@@ -16,6 +16,7 @@

 #include <OSL/oslexec.h>

+// clang-format off
 #include "kernel/kernel_compat_cpu.h"
 #include "kernel/kernel_montecarlo.h"
 #include "kernel/kernel_types.h"
@@ -28,6 +29,7 @@
 #include "kernel/osl/osl_globals.h"
 #include "kernel/osl/osl_services.h"
 #include "kernel/osl/osl_shader.h"
+// clang-format on

 #include "util/util_foreach.h"

--- a/intern/cycles/kernel/shaders/node_vector_rotate.osl
+++ b/intern/cycles/kernel/shaders/node_vector_rotate.osl
@@ -17,7 +17,8 @@
 #include "stdcycles.h"
 #include "node_math.h"

-shader node_vector_rotate(string type = "axis",
+shader node_vector_rotate(int invert = 0,
+                          string type = "axis",
                          vector VectorIn = vector(0.0, 0.0, 0.0),
                          point Center = point(0.0, 0.0, 0.0),
                          point Rotation = point(0.0, 0.0, 0.0),
@@ -26,20 +27,23 @@ shader node_vector_rotate(string type = "axis",
                          output vector VectorOut = vector(0.0, 0.0, 0.0))
 {
  if (type == "euler_xyz") {
-    VectorOut = transform(euler_to_mat(Rotation), VectorIn - Center) + Center;
+    matrix rmat = (invert) ? transpose(euler_to_mat(Rotation)) : euler_to_mat(Rotation);
+    VectorOut = transform(rmat, VectorIn - Center) + Center;
  }
-  else if (type == "x_axis") {
-    VectorOut = rotate(VectorIn - Center, Angle, point(0.0), vector(1.0, 0.0, 0.0)) + Center;
-  }
-  else if (type == "y_axis") {
-    VectorOut = rotate(VectorIn - Center, Angle, point(0.0), vector(0.0, 1.0, 0.0)) + Center;
-  }
-  else if (type == "z_axis") {
-    VectorOut = rotate(VectorIn - Center, Angle, point(0.0), vector(0.0, 0.0, 1.0)) + Center;
-  }
-  else {  // axis
-    VectorOut = (length(Axis) != 0.0) ?
-                    rotate(VectorIn - Center, Angle, point(0.0), Axis) + Center :
-                    VectorIn;
+  else {
+    float a = (invert) ? -Angle : Angle;
+    if (type == "x_axis") {
+      VectorOut = rotate(VectorIn - Center, a, point(0.0), vector(1.0, 0.0, 0.0)) + Center;
+    }
+    else if (type == "y_axis") {
+      VectorOut = rotate(VectorIn - Center, a, point(0.0), vector(0.0, 1.0, 0.0)) + Center;
+    }
+    else if (type == "z_axis") {
+      VectorOut = rotate(VectorIn - Center, a, point(0.0), vector(0.0, 0.0, 1.0)) + Center;
+    }
+    else {  // axis
+      VectorOut = (length(Axis) != 0.0) ? rotate(VectorIn - Center, a, point(0.0), Axis) + Center :
+                                          VectorIn;
+    }
  }
 }
--- a/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h
+++ b/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_adjust_samples(KernelGlobals *kg)
+{
+  int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (pixel_index < kernel_split_params.tile.w * kernel_split_params.tile.h) {
+    int x = kernel_split_params.tile.x + pixel_index % kernel_split_params.tile.w;
+    int y = kernel_split_params.tile.y + pixel_index / kernel_split_params.tile.w;
+    int buffer_offset = (kernel_split_params.tile.offset + x +
+                         y * kernel_split_params.tile.stride) *
+                        kernel_data.film.pass_stride;
+    ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+    int sample = kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples;
+    if (buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+      buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
+      float sample_multiplier = sample / max((float)kernel_split_params.tile.start_sample + 1.0f,
+                                             buffer[kernel_data.film.pass_sample_count]);
+      if (sample_multiplier != 1.0f) {
+        kernel_adaptive_post_adjust(kg, buffer, sample_multiplier);
+      }
+    }
+    else {
+      kernel_adaptive_post_adjust(kg, buffer, sample / (sample - 1.0f));
+    }
+  }
+}
+
+CCL_NAMESPACE_END
--- a/intern/cycles/kernel/split/kernel_adaptive_filter_x.h
+++ b/intern/cycles/kernel/split/kernel_adaptive_filter_x.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_filter_x(KernelGlobals *kg)
+{
+  int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (pixel_index < kernel_split_params.tile.h &&
+      kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+          kernel_data.integrator.adaptive_min_samples) {
+    int y = kernel_split_params.tile.y + pixel_index;
+    kernel_do_adaptive_filter_x(kg, y, &kernel_split_params.tile);
+  }
+}
+
+CCL_NAMESPACE_END
--- a/intern/cycles/kernel/split/kernel_adaptive_filter_y.h
+++ b/intern/cycles/kernel/split/kernel_adaptive_filter_y.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_filter_y(KernelGlobals *kg)
+{
+  int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (pixel_index < kernel_split_params.tile.w &&
+      kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+          kernel_data.integrator.adaptive_min_samples) {
+    int x = kernel_split_params.tile.x + pixel_index;
+    kernel_do_adaptive_filter_y(kg, x, &kernel_split_params.tile);
+  }
+}
+CCL_NAMESPACE_END
--- a/intern/cycles/kernel/split/kernel_adaptive_stopping.h
+++ b/intern/cycles/kernel/split/kernel_adaptive_stopping.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_stopping(KernelGlobals *kg)
+{
+  int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (pixel_index < kernel_split_params.tile.w * kernel_split_params.tile.h &&
+      kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+          kernel_data.integrator.adaptive_min_samples) {
+    int x = kernel_split_params.tile.x + pixel_index % kernel_split_params.tile.w;
+    int y = kernel_split_params.tile.y + pixel_index / kernel_split_params.tile.w;
+    int buffer_offset = (kernel_split_params.tile.offset + x +
+                         y * kernel_split_params.tile.stride) *
+                        kernel_data.film.pass_stride;
+    ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+    kernel_do_adaptive_stopping(kg,
+                                buffer,
+                                kernel_split_params.tile.start_sample +
+                                    kernel_split_params.tile.num_samples - 1);
+  }
+}
+CCL_NAMESPACE_END
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -17,6 +17,7 @@
 #ifndef __KERNEL_SPLIT_H__
 #define __KERNEL_SPLIT_H__

+// clang-format off
 #include "kernel/kernel_math.h"
 #include "kernel/kernel_types.h"

@@ -52,6 +53,7 @@
 #ifdef __BRANCHED_PATH__
 #  include "kernel/split/kernel_branched.h"
 #endif
+// clang-format on

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -18,6 +18,7 @@
 #define __KERNEL_SPLIT_DATA_H__

 #include "kernel/split/kernel_split_data_types.h"
+
 #include "kernel/kernel_globals.h"

 CCL_NAMESPACE_BEGIN
--- a/intern/cycles/kernel/svm/svm_vector_rotate.h
+++ b/intern/cycles/kernel/svm/svm_vector_rotate.h
@@ -25,45 +25,52 @@ ccl_device void svm_node_vector_rotate(ShaderData *sd,
                                       uint result_stack_offset)
 {
  uint type, vector_stack_offset, rotation_stack_offset, center_stack_offset, axis_stack_offset,
-      angle_stack_offset;
+      angle_stack_offset, invert;

-  svm_unpack_node_uchar3(input_stack_offsets, &type, &vector_stack_offset, &rotation_stack_offset);
+  svm_unpack_node_uchar4(
+      input_stack_offsets, &type, &vector_stack_offset, &rotation_stack_offset, &invert);
  svm_unpack_node_uchar3(
      axis_stack_offsets, &center_stack_offset, &axis_stack_offset, &angle_stack_offset);

-  float3 vector = stack_load_float3(stack, vector_stack_offset);
-  float3 center = stack_load_float3(stack, center_stack_offset);
-  float3 result = make_float3(0.0f, 0.0f, 0.0f);
-
-  if (type == NODE_VECTOR_ROTATE_TYPE_EULER_XYZ) {
-    float3 rotation = stack_load_float3(stack, rotation_stack_offset);  // Default XYZ.
-    Transform rotationTransform = euler_to_transform(rotation);
-    result = transform_direction(&rotationTransform, vector - center) + center;
-  }
-  else {
-    float3 axis;
-    switch (type) {
-      case NODE_VECTOR_ROTATE_TYPE_AXIS_X:
-        axis = make_float3(1.0f, 0.0f, 0.0f);
-        break;
-      case NODE_VECTOR_ROTATE_TYPE_AXIS_Y:
-        axis = make_float3(0.0f, 1.0f, 0.0f);
-        break;
-      case NODE_VECTOR_ROTATE_TYPE_AXIS_Z:
-        axis = make_float3(0.0f, 0.0f, 1.0f);
-        break;
-      default:
-        axis = normalize(stack_load_float3(stack, axis_stack_offset));
-        break;
-    }
-    float angle = stack_load_float(stack, angle_stack_offset);
-    result = (len_squared(axis) != 0.0f) ?
-                 rotate_around_axis(vector - center, axis, angle) + center :
-                 vector;
-  }
-
-  /* Output */
  if (stack_valid(result_stack_offset)) {
+
+    float3 vector = stack_load_float3(stack, vector_stack_offset);
+    float3 center = stack_load_float3(stack, center_stack_offset);
+    float3 result = make_float3(0.0f, 0.0f, 0.0f);
+
+    if (type == NODE_VECTOR_ROTATE_TYPE_EULER_XYZ) {
+      float3 rotation = stack_load_float3(stack, rotation_stack_offset);  // Default XYZ.
+      Transform rotationTransform = euler_to_transform(rotation);
+      if (invert) {
+        result = transform_direction_transposed(&rotationTransform, vector - center) + center;
+      }
+      else {
+        result = transform_direction(&rotationTransform, vector - center) + center;
+      }
+    }
+    else {
+      float3 axis;
+      switch (type) {
+        case NODE_VECTOR_ROTATE_TYPE_AXIS_X:
+          axis = make_float3(1.0f, 0.0f, 0.0f);
+          break;
+        case NODE_VECTOR_ROTATE_TYPE_AXIS_Y:
+          axis = make_float3(0.0f, 1.0f, 0.0f);
+          break;
+        case NODE_VECTOR_ROTATE_TYPE_AXIS_Z:
+          axis = make_float3(0.0f, 0.0f, 1.0f);
+          break;
+        default:
+          axis = normalize(stack_load_float3(stack, axis_stack_offset));
+          break;
+      }
+      float angle = stack_load_float(stack, angle_stack_offset);
+      angle = invert ? -angle : angle;
+      result = (len_squared(axis) != 0.0f) ?
+                   rotate_around_axis(vector - center, axis, angle) + center :
+                   vector;
+    }
+
    stack_store_float3(stack, result_stack_offset, result);
  }
 }
--- a/intern/cycles/render/CMakeLists.txt
+++ b/intern/cycles/render/CMakeLists.txt
@@ -24,6 +24,7 @@ set(SRC
  hair.cpp
  image.cpp
  integrator.cpp
+  jitter.cpp
  light.cpp
  merge.cpp
  mesh.cpp
@@ -62,6 +63,7 @@ set(SRC_HEADERS
  image.h
  integrator.h
  light.h
+  jitter.h
  merge.h
  mesh.h
  nodes.h
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -260,6 +260,22 @@ bool RenderBuffers::get_pass_rect(
    return false;
  }

+  float *sample_count = NULL;
+  if (name == "Combined") {
+    int sample_offset = 0;
+    for (size_t j = 0; j < params.passes.size(); j++) {
+      Pass &pass = params.passes[j];
+      if (pass.type != PASS_SAMPLE_COUNT) {
+        sample_offset += pass.components;
+        continue;
+      }
+      else {
+        sample_count = buffer.data() + sample_offset;
+        break;
+      }
+    }
+  }
+
  int pass_offset = 0;

  for (size_t j = 0; j < params.passes.size(); j++) {
@@ -420,6 +436,11 @@ bool RenderBuffers::get_pass_rect(
      }
      else {
        for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
+          if (sample_count && sample_count[i * pass_stride] < 0.0f) {
+            scale = (pass.filter) ? -1.0f / (sample_count[i * pass_stride]) : 1.0f;
+            scale_exposure = (pass.exposure) ? scale * exposure : scale;
+          }
+
          float4 f = make_float4(in[0], in[1], in[2], in[3]);

          pixels[0] = f.x * scale_exposure;
--- a/intern/cycles/render/buffers.h
+++ b/intern/cycles/render/buffers.h
@@ -130,7 +130,7 @@ class DisplayBuffer {

 class RenderTile {
 public:
-  typedef enum { PATH_TRACE, DENOISE } Task;
+  typedef enum { PATH_TRACE = (1 << 0), DENOISE = (1 << 1) } Task;

  Task task;
  int x, y, w, h;
--- a/intern/cycles/render/camera.cpp
+++ b/intern/cycles/render/camera.cpp
@@ -29,6 +29,7 @@
 #include "util/util_vector.h"

 /* needed for calculating differentials */
+// clang-format off
 #include "kernel/kernel_compat_cpu.h"
 #include "kernel/split/kernel_split_data.h"
 #include "kernel/kernel_globals.h"
@@ -36,6 +37,7 @@
 #include "kernel/kernel_differential.h"
 #include "kernel/kernel_montecarlo.h"
 #include "kernel/kernel_camera.h"
+// clang-format on

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/render/coverage.cpp
+++ b/intern/cycles/render/coverage.cpp
@@ -15,13 +15,16 @@
 */

 #include "render/coverage.h"
+#include "render/buffers.h"
+
 #include "kernel/kernel_compat_cpu.h"
+#include "kernel/kernel_types.h"
 #include "kernel/split/kernel_split_data.h"
+
 #include "kernel/kernel_globals.h"
 #include "kernel/kernel_id_passes.h"
-#include "kernel/kernel_types.h"
+
 #include "util/util_map.h"
-#include "util/util_vector.h"

 CCL_NAMESPACE_BEGIN

--- a/intern/cycles/render/coverage.h
+++ b/intern/cycles/render/coverage.h
@@ -14,18 +14,19 @@
 * limitations under the License.
 */

-#include "render/buffers.h"
-#include "kernel/kernel_compat_cpu.h"
-#include "kernel/split/kernel_split_data.h"
-#include "kernel/kernel_globals.h"
+#ifndef __COVERAGE_H__
+#define __COVERAGE_H__
+
 #include "util/util_map.h"
 #include "util/util_vector.h"

-#ifndef __COVERAGE_H__
-#  define __COVERAGE_H__
-
 CCL_NAMESPACE_BEGIN

+struct KernelGlobals;
+class RenderTile;
+
+typedef unordered_map<float, float> CoverageMap;
+
 class Coverage {
 public:
  Coverage(KernelGlobals *kg_, RenderTile &tile_) : kg(kg_), tile(tile_)
--- a/intern/cycles/render/film.cpp
+++ b/intern/cycles/render/film.cpp
@@ -183,6 +183,13 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name)
    case PASS_CRYPTOMATTE:
      pass.components = 4;
      break;
+    case PASS_ADAPTIVE_AUX_BUFFER:
+      pass.components = 4;
+      break;
+    case PASS_SAMPLE_COUNT:
+      pass.components = 1;
+      pass.exposure = false;
+      break;
    case PASS_AOV_COLOR:
      pass.components = 4;
      break;
@@ -311,6 +318,7 @@ NODE_DEFINE(Film)
  SOCKET_BOOLEAN(denoising_clean_pass, "Generate Denoising Clean Pass", false);
  SOCKET_BOOLEAN(denoising_prefiltered_pass, "Generate Denoising Prefiltered Pass", false);
  SOCKET_INT(denoising_flags, "Denoising Flags", 0);
+  SOCKET_BOOLEAN(use_adaptive_sampling, "Use Adaptive Sampling", false);

  return type;
 }
@@ -482,6 +490,12 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
                                      kfilm->pass_stride;
        have_cryptomatte = true;
        break;
+      case PASS_ADAPTIVE_AUX_BUFFER:
+        kfilm->pass_adaptive_aux_buffer = kfilm->pass_stride;
+        break;
+      case PASS_SAMPLE_COUNT:
+        kfilm->pass_sample_count = kfilm->pass_stride;
+        break;
      case PASS_AOV_COLOR:
        if (!have_aov_color) {
          kfilm->pass_aov_color = kfilm->pass_stride;
--- a/intern/cycles/render/film.h
+++ b/intern/cycles/render/film.h
@@ -81,6 +81,8 @@ class Film : public Node {
  CryptomatteType cryptomatte_passes;
  int cryptomatte_depth;

+  bool use_adaptive_sampling;
+
  bool need_update;

  Film();
--- a/intern/cycles/render/integrator.cpp
+++ b/intern/cycles/render/integrator.cpp
@@ -18,12 +18,16 @@
 #include "render/background.h"
 #include "render/integrator.h"
 #include "render/film.h"
+#include "render/jitter.h"
 #include "render/light.h"
 #include "render/scene.h"
 #include "render/shader.h"
 #include "render/sobol.h"

+#include "kernel/kernel_types.h"
+
 #include "util/util_foreach.h"
+#include "util/util_logging.h"
 #include "util/util_hash.h"

 CCL_NAMESPACE_BEGIN
@@ -66,6 +70,9 @@ NODE_DEFINE(Integrator)
  SOCKET_INT(volume_samples, "Volume Samples", 1);
  SOCKET_INT(start_sample, "Start Sample", 0);

+  SOCKET_FLOAT(adaptive_threshold, "Adaptive Threshold", 0.0f);
+  SOCKET_INT(adaptive_min_samples, "Adaptive Min Samples", 0);
+
  SOCKET_BOOLEAN(sample_all_lights_direct, "Sample All Lights Direct", true);
  SOCKET_BOOLEAN(sample_all_lights_indirect, "Sample All Lights Indirect", true);
  SOCKET_FLOAT(light_sampling_threshold, "Light Sampling Threshold", 0.05f);
@@ -78,6 +85,7 @@ NODE_DEFINE(Integrator)
  static NodeEnum sampling_pattern_enum;
  sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL);
  sampling_pattern_enum.insert("cmj", SAMPLING_PATTERN_CMJ);
+  sampling_pattern_enum.insert("pmj", SAMPLING_PATTERN_PMJ);
  SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL);

  return type;
@@ -174,6 +182,22 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene

  kintegrator->sampling_pattern = sampling_pattern;
  kintegrator->aa_samples = aa_samples;
+  if (aa_samples > 0 && adaptive_min_samples == 0) {
+    kintegrator->adaptive_min_samples = max(4, (int)sqrtf(aa_samples));
+    VLOG(1) << "Cycles adaptive sampling: automatic min samples = "
+            << kintegrator->adaptive_min_samples;
+  }
+  else {
+    kintegrator->adaptive_min_samples = max(4, adaptive_min_samples);
+  }
+  if (aa_samples > 0 && adaptive_threshold == 0.0f) {
+    kintegrator->adaptive_threshold = max(0.001f, 1.0f / (float)aa_samples);
+    VLOG(1) << "Cycles adaptive sampling: automatic threshold = "
+            << kintegrator->adaptive_threshold;
+  }
+  else {
+    kintegrator->adaptive_threshold = adaptive_threshold;
+  }

  if (light_sampling_threshold > 0.0f) {
    kintegrator->light_inv_rr_threshold = 1.0f / light_sampling_threshold;
@@ -203,18 +227,34 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
  int dimensions = PRNG_BASE_NUM + max_samples * PRNG_BOUNCE_NUM;
  dimensions = min(dimensions, SOBOL_MAX_DIMENSIONS);

-  uint *directions = dscene->sobol_directions.alloc(SOBOL_BITS * dimensions);
+  if (sampling_pattern == SAMPLING_PATTERN_SOBOL) {
+    uint *directions = dscene->sample_pattern_lut.alloc(SOBOL_BITS * dimensions);

-  sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions);
+    sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions);

-  dscene->sobol_directions.copy_to_device();
+    dscene->sample_pattern_lut.copy_to_device();
+  }
+  else {
+    constexpr int sequence_size = NUM_PMJ_SAMPLES;
+    constexpr int num_sequences = NUM_PMJ_PATTERNS;
+    float2 *directions = (float2 *)dscene->sample_pattern_lut.alloc(sequence_size * num_sequences *
+                                                                    2);
+    TaskPool pool;
+    for (int j = 0; j < num_sequences; ++j) {
+      float2 *sequence = directions + j * sequence_size;
+      pool.push(
+          function_bind(&progressive_multi_jitter_02_generate_2D, sequence, sequence_size, j));
+    }
+    pool.wait_work();
+    dscene->sample_pattern_lut.copy_to_device();
+  }

  need_update = false;
 }

 void Integrator::device_free(Device *, DeviceScene *dscene)
 {
-  dscene->sobol_directions.free();
+  dscene->sample_pattern_lut.free();
 }

 bool Integrator::modified(const Integrator &integrator)
--- a/intern/cycles/render/integrator.h
+++ b/intern/cycles/render/integrator.h
@@ -75,6 +75,9 @@ class Integrator : public Node {
  bool sample_all_lights_indirect;
  float light_sampling_threshold;

+  int adaptive_min_samples;
+  float adaptive_threshold;
+
  enum Method {
    BRANCHED_PATH = 0,
    PATH = 1,
--- a/intern/cycles/render/jitter.cpp
+++ b/intern/cycles/render/jitter.cpp
@@ -0,0 +1,287 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This file is based on "Progressive Multi-Jittered Sample Sequences"
+ * by Per Christensen, Andrew Kensler and Charlie Kilpatrick.
+ * http://graphics.pixar.com/library/ProgressiveMultiJitteredSampling/paper.pdf
+ *
+ * Performance can be improved in the future by implementing the new
+ * algorithm from Matt Pharr in  http://jcgt.org/published/0008/01/04/
+ * "Efficient Generation of Points that Satisfy Two-Dimensional Elementary Intervals"
+ */
+
+#include "render/jitter.h"
+
+#include <math.h>
+#include <vector>
+
+CCL_NAMESPACE_BEGIN
+
+static uint cmj_hash(uint i, uint p)
+{
+  i ^= p;
+  i ^= i >> 17;
+  i ^= i >> 10;
+  i *= 0xb36534e5;
+  i ^= i >> 12;
+  i ^= i >> 21;
+  i *= 0x93fc4795;
+  i ^= 0xdf6e307f;
+  i ^= i >> 17;
+  i *= 1 | p >> 18;
+
+  return i;
+}
+
+static float cmj_randfloat(uint i, uint p)
+{
+  return cmj_hash(i, p) * (1.0f / 4294967808.0f);
+}
+
+class PMJ_Generator {
+ public:
+  static void generate_2D(float2 points[], int size, int rng_seed_in)
+  {
+    PMJ_Generator g(rng_seed_in);
+    points[0].x = g.rnd();
+    points[0].y = g.rnd();
+    int N = 1;
+    while (N < size) {
+      g.extend_sequence_even(points, N);
+      g.extend_sequence_odd(points, 2 * N);
+      N = 4 * N;
+    }
+  }
+
+ protected:
+  PMJ_Generator(int rnd_seed_in) : num_samples(1), rnd_index(2), rnd_seed(rnd_seed_in)
+  {
+  }
+
+  float rnd()
+  {
+    return cmj_randfloat(++rnd_index, rnd_seed);
+  }
+
+  virtual void mark_occupied_strata(float2 points[], int N)
+  {
+    int NN = 2 * N;
+    for (int s = 0; s < NN; ++s) {
+      occupied1Dx[s] = occupied1Dy[s] = false;
+    }
+    for (int s = 0; s < N; ++s) {
+      int xstratum = (int)(NN * points[s].x);
+      int ystratum = (int)(NN * points[s].y);
+      occupied1Dx[xstratum] = true;
+      occupied1Dy[ystratum] = true;
+    }
+  }
+
+  virtual void generate_sample_point(
+      float2 points[], float i, float j, float xhalf, float yhalf, int n, int N)
+  {
+    int NN = 2 * N;
+    float2 pt;
+    int xstratum, ystratum;
+    do {
+      pt.x = (i + 0.5f * (xhalf + rnd())) / n;
+      xstratum = (int)(NN * pt.x);
+    } while (occupied1Dx[xstratum]);
+    do {
+      pt.y = (j + 0.5f * (yhalf + rnd())) / n;
+      ystratum = (int)(NN * pt.y);
+    } while (occupied1Dy[ystratum]);
+    occupied1Dx[xstratum] = true;
+    occupied1Dy[ystratum] = true;
+    points[num_samples] = pt;
+    ++num_samples;
+  }
+
+  void extend_sequence_even(float2 points[], int N)
+  {
+    int n = (int)sqrtf(N);
+    occupied1Dx.resize(2 * N);
+    occupied1Dy.resize(2 * N);
+    mark_occupied_strata(points, N);
+    for (int s = 0; s < N; ++s) {
+      float2 oldpt = points[s];
+      float i = floorf(n * oldpt.x);
+      float j = floorf(n * oldpt.y);
+      float xhalf = floorf(2.0f * (n * oldpt.x - i));
+      float yhalf = floorf(2.0f * (n * oldpt.y - j));
+      xhalf = 1.0f - xhalf;
+      yhalf = 1.0f - yhalf;
+      generate_sample_point(points, i, j, xhalf, yhalf, n, N);
+    }
+  }
+
+  void extend_sequence_odd(float2 points[], int N)
+  {
+    int n = (int)sqrtf(N / 2);
+    occupied1Dx.resize(2 * N);
+    occupied1Dy.resize(2 * N);
+    mark_occupied_strata(points, N);
+    std::vector<float> xhalves(N / 2);
+    std::vector<float> yhalves(N / 2);
+    for (int s = 0; s < N / 2; ++s) {
+      float2 oldpt = points[s];
+      float i = floorf(n * oldpt.x);
+      float j = floorf(n * oldpt.y);
+      float xhalf = floorf(2.0f * (n * oldpt.x - i));
+      float yhalf = floorf(2.0f * (n * oldpt.y - j));
+      if (rnd() > 0.5f) {
+        xhalf = 1.0f - xhalf;
+      }
+      else {
+        yhalf = 1.0f - yhalf;
+      }
+      xhalves[s] = xhalf;
+      yhalves[s] = yhalf;
+      generate_sample_point(points, i, j, xhalf, yhalf, n, N);
+    }
+    for (int s = 0; s < N / 2; ++s) {
+      float2 oldpt = points[s];
+      float i = floorf(n * oldpt.x);
+      float j = floorf(n * oldpt.y);
+      float xhalf = 1.0f - xhalves[s];
+      float yhalf = 1.0f - yhalves[s];
+      generate_sample_point(points, i, j, xhalf, yhalf, n, N);
+    }
+  }
+
+  std::vector<bool> occupied1Dx, occupied1Dy;
+  int num_samples;
+  int rnd_index, rnd_seed;
+};
+
+class PMJ02_Generator : public PMJ_Generator {
+ protected:
+  void generate_sample_point(
+      float2 points[], float i, float j, float xhalf, float yhalf, int n, int N) override
+  {
+    int NN = 2 * N;
+    float2 pt;
+    do {
+      pt.x = (i + 0.5f * (xhalf + rnd())) / n;
+      pt.y = (j + 0.5f * (yhalf + rnd())) / n;
+    } while (is_occupied(pt, NN));
+    mark_occupied_strata1(pt, NN);
+    points[num_samples] = pt;
+    ++num_samples;
+  }
+
+  void mark_occupied_strata(float2 points[], int N) override
+  {
+    int NN = 2 * N;
+    int num_shapes = (int)log2f(NN) + 1;
+    occupiedStrata.resize(num_shapes);
+    for (int shape = 0; shape < num_shapes; ++shape) {
+      occupiedStrata[shape].resize(NN);
+      for (int n = 0; n < NN; ++n) {
+        occupiedStrata[shape][n] = false;
+      }
+    }
+    for (int s = 0; s < N; ++s) {
+      mark_occupied_strata1(points[s], NN);
+    }
+  }
+
+  void mark_occupied_strata1(float2 pt, int NN)
+  {
+    int shape = 0;
+    int xdivs = NN;
+    int ydivs = 1;
+    do {
+      int xstratum = (int)(xdivs * pt.x);
+      int ystratum = (int)(ydivs * pt.y);
+      size_t index = ystratum * xdivs + xstratum;
+      assert(index < NN);
+      occupiedStrata[shape][index] = true;
+      shape = shape + 1;
+      xdivs = xdivs / 2;
+      ydivs = ydivs * 2;
+    } while (xdivs > 0);
+  }
+
+  bool is_occupied(float2 pt, int NN)
+  {
+    int shape = 0;
+    int xdivs = NN;
+    int ydivs = 1;
+    do {
+      int xstratum = (int)(xdivs * pt.x);
+      int ystratum = (int)(ydivs * pt.y);
+      size_t index = ystratum * xdivs + xstratum;
+      assert(index < NN);
+      if (occupiedStrata[shape][index]) {
+        return true;
+      }
+      shape = shape + 1;
+      xdivs = xdivs / 2;
+      ydivs = ydivs * 2;
+    } while (xdivs > 0);
+    return false;
+  }
+
+ private:
+  std::vector<std::vector<bool>> occupiedStrata;
+};
+
+static void shuffle(float2 points[], int size, int rng_seed)
+{
+  /* Offset samples by 1.0 for faster scrambling in kernel_random.h */
+  for (int i = 0; i < size; ++i) {
+    points[i].x += 1.0f;
+    points[i].y += 1.0f;
+  }
+
+  if (rng_seed == 0) {
+    return;
+  }
+
+  constexpr int odd[8] = {0, 1, 4, 5, 10, 11, 14, 15};
+  constexpr int even[8] = {2, 3, 6, 7, 8, 9, 12, 13};
+
+  int rng_index = 0;
+  for (int yy = 0; yy < size / 16; ++yy) {
+    for (int xx = 0; xx < 8; ++xx) {
+      int other = (int)(cmj_randfloat(++rng_index, rng_seed) * (8.0f - xx) + xx);
+      float2 tmp = points[odd[other] + yy * 16];
+      points[odd[other] + yy * 16] = points[odd[xx] + yy * 16];
+      points[odd[xx] + yy * 16] = tmp;
+    }
+    for (int xx = 0; xx < 8; ++xx) {
+      int other = (int)(cmj_randfloat(++rng_index, rng_seed) * (8.0f - xx) + xx);
+      float2 tmp = points[even[other] + yy * 16];
+      points[even[other] + yy * 16] = points[even[xx] + yy * 16];
+      points[even[xx] + yy * 16] = tmp;
+    }
+  }
+}
+
+void progressive_multi_jitter_generate_2D(float2 points[], int size, int rng_seed)
+{
+  PMJ_Generator::generate_2D(points, size, rng_seed);
+  shuffle(points, size, rng_seed);
+}
+
+void progressive_multi_jitter_02_generate_2D(float2 points[], int size, int rng_seed)
+{
+  PMJ02_Generator::generate_2D(points, size, rng_seed);
+  shuffle(points, size, rng_seed);
+}
+
+CCL_NAMESPACE_END
--- a/intern/cycles/render/jitter.h
+++ b/intern/cycles/render/jitter.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __JITTER_H__
+#define __JITTER_H__
+
+#include "util/util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+void progressive_multi_jitter_generate_2D(float2 points[], int size, int rng_seed);
+void progressive_multi_jitter_02_generate_2D(float2 points[], int size, int rng_seed);
+
+CCL_NAMESPACE_END
+
+#endif /* __JITTER_H__ */
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -23,18 +23,19 @@
 #include "render/nodes.h"
 #include "render/scene.h"
 #include "render/svm.h"
-#include "kernel/svm/svm_color_util.h"
-#include "kernel/svm/svm_ramp_util.h"
-#include "kernel/svm/svm_math_util.h"
-#include "kernel/svm/svm_mapping_util.h"
 #include "render/osl.h"
 #include "render/constant_fold.h"

-#include "util/util_sky_model.h"
 #include "util/util_foreach.h"
 #include "util/util_logging.h"
+#include "util/util_sky_model.h"
 #include "util/util_transform.h"

+#include "kernel/svm/svm_color_util.h"
+#include "kernel/svm/svm_mapping_util.h"
+#include "kernel/svm/svm_math_util.h"
+#include "kernel/svm/svm_ramp_util.h"
+
 CCL_NAMESPACE_BEGIN

 /* Texture Mapping */
@@ -6157,6 +6158,8 @@ NODE_DEFINE(VectorRotateNode)
  type_enum.insert("euler_xyz", NODE_VECTOR_ROTATE_TYPE_EULER_XYZ);
  SOCKET_ENUM(type, "Type", type_enum, NODE_VECTOR_ROTATE_TYPE_AXIS);

+  SOCKET_BOOLEAN(invert, "Invert", false);
+
  SOCKET_IN_VECTOR(vector, "Vector", make_float3(0.0f, 0.0f, 0.0f));
  SOCKET_IN_POINT(rotation, "Rotation", make_float3(0.0f, 0.0f, 0.0f));
  SOCKET_IN_POINT(center, "Center", make_float3(0.0f, 0.0f, 0.0f));
@@ -6180,19 +6183,20 @@ void VectorRotateNode::compile(SVMCompiler &compiler)
  ShaderInput *angle_in = input("Angle");
  ShaderOutput *vector_out = output("Vector");

-  compiler.add_node(NODE_VECTOR_ROTATE,
-                    compiler.encode_uchar4(type,
-                                           compiler.stack_assign(vector_in),
-                                           compiler.stack_assign(rotation_in)),
-                    compiler.encode_uchar4(compiler.stack_assign(center_in),
-                                           compiler.stack_assign(axis_in),
-                                           compiler.stack_assign(angle_in)),
-                    compiler.stack_assign(vector_out));
+  compiler.add_node(
+      NODE_VECTOR_ROTATE,
+      compiler.encode_uchar4(
+          type, compiler.stack_assign(vector_in), compiler.stack_assign(rotation_in), invert),
+      compiler.encode_uchar4(compiler.stack_assign(center_in),
+                             compiler.stack_assign(axis_in),
+                             compiler.stack_assign(angle_in)),
+      compiler.stack_assign(vector_out));
 }

 void VectorRotateNode::compile(OSLCompiler &compiler)
 {
  compiler.parameter(this, "type");
+  compiler.parameter(this, "invert");
  compiler.add(this, "node_vector_rotate");
 }

--- a/Show More
+++ b/Show More