Fix T94707: inverted output of separate geometry node incorrect

This was an oversight in rB3e92b4ed2408eacd126c0.
Geometry Nodes: Experimental Scale Elements node (WIP).
2022-01-07 08:02:20 +01:00 · 2022-01-06 18:19:06 +01:00 · 2022-01-06 17:01:17 +01:00 · 2022-01-06 16:13:56 +01:00 · 2022-01-06 15:04:53 +01:00 · 2022-01-06 14:32:40 +01:00
1360 changed files with 33812 additions and 25320 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -536,14 +536,12 @@ option(WITH_OPENGL              "When off limits visibility of the opengl header
 option(WITH_GLEW_ES             "Switches to experimental copy of GLEW that has support for OpenGL ES. (temporary option for development purposes)" OFF)
 option(WITH_GL_EGL              "Use the EGL OpenGL system library instead of the platform specific OpenGL system library (CGL, glX, or WGL)"       OFF)
 option(WITH_GL_PROFILE_ES20     "Support using OpenGL ES 2.0. (through either EGL or the AGL/WGL/XGL 'es20' profile)"                               OFF)
-option(WITH_GPU_SHADER_BUILDER  "Shader builder is a developer option enabling linting on GLSL during compilation"                                  OFF)

 mark_as_advanced(
  WITH_OPENGL
  WITH_GLEW_ES
  WITH_GL_EGL
  WITH_GL_PROFILE_ES20
-  WITH_GPU_SHADER_BUILDER
 )

 if(WIN32)
@@ -561,18 +559,12 @@ if(WIN32)
  set(CPACK_INSTALL_PREFIX ${CMAKE_GENERIC_PROGRAM_FILES}/${})
 endif()

-# Compiler tool-chain.
-if(UNIX AND NOT APPLE)
-  if(CMAKE_COMPILER_IS_GNUCC)
-    option(WITH_LINKER_GOLD "Use ld.gold linker which is usually faster than ld.bfd" ON)
-    mark_as_advanced(WITH_LINKER_GOLD)
-    option(WITH_LINKER_LLD "Use ld.lld linker which is usually faster than ld.gold" OFF)
-    mark_as_advanced(WITH_LINKER_LLD)
-  endif()
-  if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
-    option(WITH_LINKER_MOLD "Use ld.mold linker which is usually faster than ld.gold & ld.lld." OFF)
-    mark_as_advanced(WITH_LINKER_MOLD)
-  endif()
+# Compiler toolchain
+if(CMAKE_COMPILER_IS_GNUCC)
+  option(WITH_LINKER_GOLD "Use ld.gold linker which is usually faster than ld.bfd" ON)
+  mark_as_advanced(WITH_LINKER_GOLD)
+  option(WITH_LINKER_LLD "Use ld.lld linker which is usually faster than ld.gold" OFF)
+  mark_as_advanced(WITH_LINKER_LLD)
 endif()

 option(WITH_COMPILER_ASAN "Build and link against address sanitizer (only for Debug & RelWithDebInfo targets)." OFF)
--- a/build_files/build_environment/cmake/gmp.cmake
+++ b/build_files/build_environment/cmake/gmp.cmake
@@ -38,6 +38,13 @@ elseif(UNIX AND NOT APPLE)
  )
 endif()

+if(BLENDER_PLATFORM_ARM)
+  set(GMP_OPTIONS
+    ${GMP_OPTIONS}
+    --disable-assembly
+  )
+endif()
+
 ExternalProject_Add(external_gmp
  URL file://${PACKAGE_DIR}/${GMP_FILE}
  DOWNLOAD_DIR ${DOWNLOAD_DIR}
--- a/build_files/build_environment/cmake/versions.cmake
+++ b/build_files/build_environment/cmake/versions.cmake
@@ -474,9 +474,9 @@ set(ISPC_HASH 2e3abedbc0ea9aaec17d6562c632454d)
 set(ISPC_HASH_TYPE MD5)
 set(ISPC_FILE ispc-${ISPC_VERSION}.tar.gz)

-set(GMP_VERSION 6.2.1)
+set(GMP_VERSION 6.2.0)
 set(GMP_URI https://gmplib.org/download/gmp/gmp-${GMP_VERSION}.tar.xz)
-set(GMP_HASH 0b82665c4a92fd2ade7440c13fcaa42b)
+set(GMP_HASH a325e3f09e6d91e62101e59f9bda3ec1)
 set(GMP_HASH_TYPE MD5)
 set(GMP_FILE gmp-${GMP_VERSION}.tar.xz)

--- a/build_files/build_environment/install_deps.sh
+++ b/build_files/build_environment/install_deps.sh
@@ -492,7 +492,7 @@ OIIO_SKIP=false
 LLVM_VERSION="12.0.0"
 LLVM_VERSION_SHORT="12.0"
 LLVM_VERSION_MIN="11.0"
-LLVM_VERSION_MEX="14.0"
+LLVM_VERSION_MEX="13.0"
 LLVM_VERSION_FOUND=""
 LLVM_FORCE_BUILD=false
 LLVM_FORCE_REBUILD=false
@@ -3620,8 +3620,8 @@ compile_FFmpeg() {
    fi

    ./configure --cc="gcc -Wl,--as-needed" \
-        --extra-ldflags="-pthread" \
-        --prefix=$_inst --enable-shared \
+        --extra-ldflags="-pthread -static-libgcc" \
+        --prefix=$_inst --enable-static \
        --disable-ffplay --disable-doc \
        --enable-gray \
        --enable-avfilter --disable-vdpau \
@@ -5721,6 +5721,76 @@ install_OTHER() {
 # ----------------------------------------------------------------------------
 # Printing User Info

+print_info_ffmpeglink_DEB() {
+  dpkg -L $_packages | grep -e ".*\/lib[^\/]\+\.so" | gawk '{ printf(nlines ? "'"$_ffmpeg_list_sep"'%s" : "%s", gensub(/.*lib([^\/]+)\.so/, "\\1", "g", $0)); nlines++ }'
+}
+
+print_info_ffmpeglink_RPM() {
+  rpm -ql $_packages | grep -e ".*\/lib[^\/]\+\.so" | gawk '{ printf(nlines ? "'"$_ffmpeg_list_sep"'%s" : "%s", gensub(/.*lib([^\/]+)\.so/, "\\1", "g", $0)); nlines++ }'
+}
+
+print_info_ffmpeglink_ARCH() {
+  pacman -Ql $_packages | grep -e ".*\/lib[^\/]\+\.so$" | gawk '{ printf(nlines ? "'"$_ffmpeg_list_sep"'%s" : "%s", gensub(/.*lib([^\/]+)\.so/, "\\1", "g", $0)); nlines++ }'
+}
+
+print_info_ffmpeglink() {
+  # This func must only print a ';'-separated list of libs...
+  if [ -z "$DISTRO" ]; then
+    ERROR "Failed to detect distribution type"
+    exit 1
+  fi
+
+  # Create list of packages from which to get libs names...
+  _packages=""
+
+  if [ "$THEORA_USE" = true ]; then
+    _packages="$_packages $THEORA_DEV"
+  fi
+
+  if [ "$VORBIS_USE" = true ]; then
+    _packages="$_packages $VORBIS_DEV"
+  fi
+
+  if [ "$OGG_USE" = true ]; then
+    _packages="$_packages $OGG_DEV"
+  fi
+
+  if [ "$XVID_USE" = true ]; then
+    _packages="$_packages $XVID_DEV"
+  fi
+
+  if [ "$VPX_USE" = true ]; then
+    _packages="$_packages $VPX_DEV"
+  fi
+
+  if [ "$OPUS_USE" = true ]; then
+    _packages="$_packages $OPUS_DEV"
+  fi
+
+  if [ "$MP3LAME_USE" = true ]; then
+    _packages="$_packages $MP3LAME_DEV"
+  fi
+
+  if [ "$X264_USE" = true ]; then
+    _packages="$_packages $X264_DEV"
+  fi
+
+  if [ "$OPENJPEG_USE" = true ]; then
+    _packages="$_packages $OPENJPEG_DEV"
+  fi
+
+  if [ "$DISTRO" = "DEB" ]; then
+    print_info_ffmpeglink_DEB
+  elif [ "$DISTRO" = "RPM" ]; then
+    print_info_ffmpeglink_RPM
+  elif [ "$DISTRO" = "ARCH" ]; then
+    print_info_ffmpeglink_ARCH
+  # XXX TODO!
+  else
+    PRINT "<Could not determine additional link libraries needed for ffmpeg, replace this by valid list of libs...>"
+  fi
+}
+
 print_info() {
  PRINT ""
  PRINT ""
@@ -5932,10 +6002,12 @@ print_info() {

  if [ "$FFMPEG_SKIP" = false ]; then
    _1="-D WITH_CODEC_FFMPEG=ON"
+    _2="-D FFMPEG_LIBRARIES='avformat;avcodec;avutil;avdevice;swscale;swresample;lzma;rt;`print_info_ffmpeglink`'"
    PRINT "  $_1"
-    _buildargs="$_buildargs $_1"
+    PRINT "  $_2"
+    _buildargs="$_buildargs $_1 $_2"
    if [ -d $INST/ffmpeg ]; then
-      _1="-D FFMPEG_ROOT_DIR=$INST/ffmpeg"
+      _1="-D FFMPEG=$INST/ffmpeg"
      PRINT "  $_1"
      _buildargs="$_buildargs $_1"
    fi
--- a/build_files/cmake/Modules/FindFFmpeg.cmake
+++ b/build_files/cmake/Modules/FindFFmpeg.cmake
@@ -33,8 +33,6 @@ if(NOT FFMPEG_FIND_COMPONENTS)
    avfilter
    avformat
    avutil
-    swscale
-    swresample
  )
 endif()

@@ -52,9 +50,9 @@ foreach(_component ${FFMPEG_FIND_COMPONENTS})
  string(TOUPPER ${_component} _upper_COMPONENT)
  find_library(FFMPEG_${_upper_COMPONENT}_LIBRARY
    NAMES
-      ${_component}
+      ${_upper_COMPONENT}
    HINTS
-      ${_ffmpeg_SEARCH_DIRS}
+      ${LIBDIR}/ffmpeg
    PATH_SUFFIXES
      lib64 lib
  )
--- a/build_files/cmake/Modules/FindOptiX.cmake
+++ b/build_files/cmake/Modules/FindOptiX.cmake
@@ -21,7 +21,7 @@ ENDIF()

 SET(_optix_SEARCH_DIRS
  ${OPTIX_ROOT_DIR}
-  "$ENV{PROGRAMDATA}/NVIDIA Corporation/OptiX SDK 7.3.0"
+  "$ENV{PROGRAMDATA}/NVIDIA Corporation/OptiX SDK 7.0.0"
 )

 FIND_PATH(OPTIX_INCLUDE_DIR
--- a/build_files/cmake/macros.cmake
+++ b/build_files/cmake/macros.cmake
@@ -488,6 +488,7 @@ function(blender_add_test_executable

  include_directories(${includes})
  include_directories(${includes_sys})
+  setup_libdirs()

  BLENDER_SRC_GTEST_EX(
    NAME ${name}
@@ -524,6 +525,83 @@ function(setup_heavy_lib_pool)
  endif()
 endfunction()

+function(SETUP_LIBDIRS)
+
+  # NOTE: For all new libraries, use absolute library paths.
+  # This should eventually be phased out.
+  # APPLE platform uses full paths for linking libraries, and avoids link_directories.
+  if(NOT MSVC AND NOT APPLE)
+    link_directories(${JPEG_LIBPATH} ${PNG_LIBPATH} ${ZLIB_LIBPATH} ${FREETYPE_LIBPATH})
+
+    if(WITH_PYTHON)  #  AND NOT WITH_PYTHON_MODULE  # WIN32 needs
+      link_directories(${PYTHON_LIBPATH})
+    endif()
+    if(WITH_SDL AND NOT WITH_SDL_DYNLOAD)
+      link_directories(${SDL_LIBPATH})
+    endif()
+    if(WITH_CODEC_FFMPEG)
+      link_directories(${FFMPEG_LIBPATH})
+    endif()
+    if(WITH_IMAGE_OPENEXR)
+      link_directories(${OPENEXR_LIBPATH})
+    endif()
+    if(WITH_IMAGE_TIFF)
+      link_directories(${TIFF_LIBPATH})
+    endif()
+    if(WITH_BOOST)
+      link_directories(${BOOST_LIBPATH})
+    endif()
+    if(WITH_OPENIMAGEIO)
+      link_directories(${OPENIMAGEIO_LIBPATH})
+    endif()
+    if(WITH_OPENIMAGEDENOISE)
+      link_directories(${OPENIMAGEDENOISE_LIBPATH})
+    endif()
+    if(WITH_OPENCOLORIO)
+      link_directories(${OPENCOLORIO_LIBPATH})
+    endif()
+    if(WITH_OPENVDB)
+      link_directories(${OPENVDB_LIBPATH})
+    endif()
+    if(WITH_OPENAL)
+      link_directories(${OPENAL_LIBPATH})
+    endif()
+    if(WITH_JACK AND NOT WITH_JACK_DYNLOAD)
+      link_directories(${JACK_LIBPATH})
+    endif()
+    if(WITH_PULSEAUDIO AND NOT WITH_PULSEAUDIO_DYNLOAD)
+      link_directories(${LIBPULSE_LIBPATH})
+    endif()
+    if(WITH_CODEC_SNDFILE)
+      link_directories(${LIBSNDFILE_LIBPATH})
+    endif()
+    if(WITH_FFTW3)
+      link_directories(${FFTW3_LIBPATH})
+    endif()
+    if(WITH_OPENCOLLADA)
+      link_directories(${OPENCOLLADA_LIBPATH})
+      # # Never set
+      # link_directories(${PCRE_LIBPATH})
+      # link_directories(${EXPAT_LIBPATH})
+    endif()
+    if(WITH_LLVM)
+      link_directories(${LLVM_LIBPATH})
+    endif()
+
+    if(WITH_ALEMBIC)
+      link_directories(${ALEMBIC_LIBPATH})
+    endif()
+
+    if(WITH_GMP)
+      link_directories(${GMP_LIBPATH})
+    endif()
+
+    if(WIN32 AND NOT UNIX)
+      link_directories(${PTHREADS_LIBPATH})
+    endif()
+  endif()
+endfunction()
+
 # Platform specific linker flags for targets.
 function(setup_platform_linker_flags
  target)
@@ -1214,6 +1292,29 @@ macro(openmp_delayload
    endif()
 endmacro()

+macro(blender_precompile_headers target cpp header)
+  if(MSVC)
+    # get the name for the pch output file
+    get_filename_component(pchbase ${cpp} NAME_WE)
+    set(pchfinal "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${pchbase}.pch")
+
+    # mark the cpp as the one outputting the pch
+    set_property(SOURCE ${cpp} APPEND PROPERTY OBJECT_OUTPUTS "${pchfinal}")
+
+    # get all sources for the target
+    get_target_property(sources ${target} SOURCES)
+
+    # make all sources depend on the pch to enforce the build order
+    foreach(src ${sources})
+      set_property(SOURCE ${src} APPEND PROPERTY OBJECT_DEPENDS "${pchfinal}")
+    endforeach()
+
+    target_sources(${target} PRIVATE ${cpp} ${header})
+    set_target_properties(${target} PROPERTIES COMPILE_FLAGS "/Yu${header} /Fp${pchfinal} /FI${header}")
+    set_source_files_properties(${cpp} PROPERTIES COMPILE_FLAGS "/Yc${header} /Fp${pchfinal}")
+  endif()
+endmacro()
+
 macro(set_and_warn_dependency
  _dependency _setting _val)
  # when $_dependency is disabled, forces $_setting = $_val
--- a/build_files/cmake/platform/platform_apple.cmake
+++ b/build_files/cmake/platform/platform_apple.cmake
@@ -173,7 +173,6 @@ if(WITH_IMAGE_OPENEXR)
 endif()

 if(WITH_CODEC_FFMPEG)
-  set(FFMPEG_ROOT_DIR ${LIBDIR}/ffmpeg)
  set(FFMPEG_FIND_COMPONENTS
    avcodec avdevice avformat avutil
    mp3lame ogg opus swresample swscale
@@ -480,11 +479,8 @@ string(APPEND PLATFORM_LINKFLAGS " -stdlib=libc++")
 # Suppress ranlib "has no symbols" warnings (workaround for T48250)
 set(CMAKE_C_ARCHIVE_CREATE   "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
 set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
-# llvm-ranlib doesn't support this flag. Xcode's libtool does.
-if(NOT ${CMAKE_RANLIB} MATCHES ".*llvm-ranlib$")
-  set(CMAKE_C_ARCHIVE_FINISH   "<CMAKE_RANLIB> -no_warning_for_no_symbols -c <TARGET>")
-  set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -no_warning_for_no_symbols -c <TARGET>")
-endif()
+set(CMAKE_C_ARCHIVE_FINISH   "<CMAKE_RANLIB> -no_warning_for_no_symbols -c <TARGET>")
+set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -no_warning_for_no_symbols -c <TARGET>")

 if(WITH_COMPILER_CCACHE)
  if(NOT CMAKE_GENERATOR STREQUAL "Xcode")
@@ -511,6 +507,3 @@ list(APPEND CMAKE_BUILD_RPATH "${OpenMP_LIBRARY_DIR}")

 set(CMAKE_SKIP_INSTALL_RPATH FALSE)
 list(APPEND CMAKE_INSTALL_RPATH "@loader_path/../Resources/${BLENDER_VERSION}/lib")
-
-# Same as `CFBundleIdentifier` in Info.plist.
-set(CMAKE_XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER "org.blenderfoundation.blender")
--- a/build_files/cmake/platform/platform_apple_xcode.cmake
+++ b/build_files/cmake/platform/platform_apple_xcode.cmake
@@ -96,7 +96,7 @@ else()
    # Detect SDK version to use.
    if(NOT DEFINED OSX_SYSTEM)
      execute_process(
-          COMMAND xcrun --sdk macosx --show-sdk-version
+          COMMAND xcrun --show-sdk-version
          OUTPUT_VARIABLE OSX_SYSTEM
          OUTPUT_STRIP_TRAILING_WHITESPACE)
    endif()
--- a/build_files/cmake/platform/platform_unix.cmake
+++ b/build_files/cmake/platform/platform_unix.cmake
@@ -18,7 +18,7 @@
 # All rights reserved.
 # ***** END GPL LICENSE BLOCK *****

-# Libraries configuration for any *nix system including Linux and Unix (excluding APPLE).
+# Libraries configuration for any *nix system including Linux and Unix.

 # Detect precompiled library directory
 if(NOT DEFINED LIBDIR)
@@ -178,30 +178,26 @@ endif()

 if(WITH_CODEC_FFMPEG)
  if(EXISTS ${LIBDIR})
-    set(FFMPEG_ROOT_DIR ${LIBDIR}/ffmpeg)
-    # Override FFMPEG components to also include static library dependencies
-    # included with precompiled libraries, and to ensure correct link order.
-    set(FFMPEG_FIND_COMPONENTS
-      avformat avcodec avdevice avutil swresample swscale
-      sndfile
-      FLAC
-      mp3lame
-      opus
-      theora theoradec theoraenc
-      vorbis vorbisenc vorbisfile ogg
-      vpx
-      x264
-      xvidcore)
-  elseif(FFMPEG)
-    # Old cache variable used for root dir, convert to new standard.
-    set(FFMPEG_ROOT_DIR ${FFMPEG})
+    # For precompiled lib directory, all ffmpeg dependencies are in the same folder
+    file(GLOB ffmpeg_libs ${LIBDIR}/ffmpeg/lib/*.a ${LIBDIR}/sndfile/lib/*.a)
+    set(FFMPEG ${LIBDIR}/ffmpeg CACHE PATH "FFMPEG Directory")
+    set(FFMPEG_LIBRARIES ${ffmpeg_libs} ${ffmpeg_libs} CACHE STRING "FFMPEG Libraries")
+  else()
+    set(FFMPEG /usr CACHE PATH "FFMPEG Directory")
+    set(FFMPEG_LIBRARIES avformat avcodec avutil avdevice swscale CACHE STRING "FFMPEG Libraries")
  endif()
-  find_package(FFmpeg)

-  if(NOT FFMPEG_FOUND)
-    set(WITH_CODEC_FFMPEG OFF)
-    message(STATUS "FFmpeg not found, disabling it")
+  mark_as_advanced(FFMPEG)
+
+  # lame, but until we have proper find module for ffmpeg
+  set(FFMPEG_INCLUDE_DIRS ${FFMPEG}/include)
+  if(EXISTS "${FFMPEG}/include/ffmpeg/")
+    list(APPEND FFMPEG_INCLUDE_DIRS "${FFMPEG}/include/ffmpeg")
  endif()
+  # end lameness
+
+  mark_as_advanced(FFMPEG_LIBRARIES)
+  set(FFMPEG_LIBPATH ${FFMPEG}/lib)
 endif()

 if(WITH_FFTW3)
@@ -648,9 +644,6 @@ endif()
 # ----------------------------------------------------------------------------
 # Compilers

-# Only set the linker once.
-set(_IS_LINKER_DEFAULT ON)
-
 # GNU Compiler
 if(CMAKE_COMPILER_IS_GNUCC)
  # ffp-contract=off:
@@ -669,89 +662,26 @@ if(CMAKE_COMPILER_IS_GNUCC)
  string(PREPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO "${GCC_EXTRA_FLAGS_RELEASE} ")
  unset(GCC_EXTRA_FLAGS_RELEASE)

-  # NOTE(@campbellbarton): Eventually mold will be able to use `-fuse-ld=mold`,
-  # however at the moment this only works for GCC 12.1+ (unreleased at time of writing).
-  # So a workaround is used here "-B" which points to another path to find system commands
-  # such as `ld`.
-  if(WITH_LINKER_MOLD AND _IS_LINKER_DEFAULT)
-    find_program(MOLD_BIN "mold")
-    mark_as_advanced(MOLD_BIN)
-    if(NOT MOLD_BIN)
-      message(STATUS "The \"mold\" binary could not be found, using system linker.")
-      set(WITH_LINKER_MOLD OFF)
-    else()
-      # By default mold installs the binary to:
-      # - `{PREFIX}/bin/mold` as well as a symbolic-link in...
-      # - `{PREFIX}/lib/mold/ld`.
-      # (where `PREFIX` is typically `/usr/`).
-      #
-      # This block of code finds `{PREFIX}/lib/mold` from the `mold` binary.
-      # Other methods of searching for the path could also be made to work,
-      # we could even make our own directory and symbolic-link, however it's more
-      # convenient to use the one provided by mold.
-      #
-      # Use the binary path to "mold", to find the common prefix which contains "lib/mold".
-      # The parent directory: e.g. `/usr/bin/mold` -> `/usr/bin/`.
-      get_filename_component(MOLD_PREFIX "${MOLD_BIN}" DIRECTORY)
-      # The common prefix path: e.g. `/usr/bin/` -> `/usr/` to use as a hint.
-      get_filename_component(MOLD_PREFIX "${MOLD_PREFIX}" DIRECTORY)
-      # Find `{PREFIX}/lib/mold/ld`, store the directory component (without the `ld`).
-      # Then pass `-B {PREFIX}/lib/mold` to GCC so the `ld` located there overrides the default.
-      find_path(
-        MOLD_BIN_DIR "ld"
-        HINTS "${MOLD_PREFIX}"
-        # The default path is `libexec`, Arch Linux for e.g.
-        # replaces this with `lib` so check both.
-        PATH_SUFFIXES "libexec/mold" "lib/mold" "lib64/mold"
-        NO_DEFAULT_PATH
-        NO_CACHE
-      )
-      if(NOT MOLD_BIN_DIR)
-        message(STATUS
-          "The mold linker could not find the directory containing the linker command "
-          "(typically "
-          "\"${MOLD_PREFIX}/libexec/mold/ld\") or "
-          "\"${MOLD_PREFIX}/lib/mold/ld\") using system linker.")
-        set(WITH_LINKER_MOLD OFF)
-      endif()
-      unset(MOLD_PREFIX)
-    endif()
-
-    if(WITH_LINKER_MOLD)
-      # GCC will search for `ld` in this directory first.
-      string(APPEND CMAKE_EXE_LINKER_FLAGS    " -B \"${MOLD_BIN_DIR}\"")
-      string(APPEND CMAKE_SHARED_LINKER_FLAGS " -B \"${MOLD_BIN_DIR}\"")
-      string(APPEND CMAKE_MODULE_LINKER_FLAGS " -B \"${MOLD_BIN_DIR}\"")
-      set(_IS_LINKER_DEFAULT OFF)
-    endif()
-    unset(MOLD_BIN)
-    unset(MOLD_BIN_DIR)
-  endif()
-
-  if(WITH_LINKER_GOLD AND _IS_LINKER_DEFAULT)
+  if(WITH_LINKER_GOLD)
    execute_process(
      COMMAND ${CMAKE_C_COMPILER} -fuse-ld=gold -Wl,--version
      ERROR_QUIET OUTPUT_VARIABLE LD_VERSION)
    if("${LD_VERSION}" MATCHES "GNU gold")
-      string(APPEND CMAKE_EXE_LINKER_FLAGS    " -fuse-ld=gold")
-      string(APPEND CMAKE_SHARED_LINKER_FLAGS " -fuse-ld=gold")
-      string(APPEND CMAKE_MODULE_LINKER_FLAGS " -fuse-ld=gold")
-      set(_IS_LINKER_DEFAULT OFF)
+      string(APPEND CMAKE_C_FLAGS " -fuse-ld=gold")
+      string(APPEND CMAKE_CXX_FLAGS " -fuse-ld=gold")
    else()
      message(STATUS "GNU gold linker isn't available, using the default system linker.")
    endif()
    unset(LD_VERSION)
  endif()

-  if(WITH_LINKER_LLD AND _IS_LINKER_DEFAULT)
+  if(WITH_LINKER_LLD)
    execute_process(
      COMMAND ${CMAKE_C_COMPILER} -fuse-ld=lld -Wl,--version
      ERROR_QUIET OUTPUT_VARIABLE LD_VERSION)
    if("${LD_VERSION}" MATCHES "LLD")
-      string(APPEND CMAKE_EXE_LINKER_FLAGS    " -fuse-ld=lld")
-      string(APPEND CMAKE_SHARED_LINKER_FLAGS " -fuse-ld=lld")
-      string(APPEND CMAKE_MODULE_LINKER_FLAGS " -fuse-ld=lld")
-      set(_IS_LINKER_DEFAULT OFF)
+      string(APPEND CMAKE_C_FLAGS " -fuse-ld=lld")
+      string(APPEND CMAKE_CXX_FLAGS " -fuse-ld=lld")
    else()
      message(STATUS "LLD linker isn't available, using the default system linker.")
    endif()
@@ -761,28 +691,6 @@ if(CMAKE_COMPILER_IS_GNUCC)
 # CLang is the same as GCC for now.
 elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
  set(PLATFORM_CFLAGS "-pipe -fPIC -funsigned-char -fno-strict-aliasing")
-
-  if(WITH_LINKER_MOLD AND _IS_LINKER_DEFAULT)
-    find_program(MOLD_BIN "mold")
-    mark_as_advanced(MOLD_BIN)
-    if(NOT MOLD_BIN)
-      message(STATUS "The \"mold\" binary could not be found, using system linker.")
-      set(WITH_LINKER_MOLD OFF)
-    else()
-      if(CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0)
-        string(APPEND CMAKE_EXE_LINKER_FLAGS    " --ld-path=\"${MOLD_BIN}\"")
-        string(APPEND CMAKE_SHARED_LINKER_FLAGS " --ld-path=\"${MOLD_BIN}\"")
-        string(APPEND CMAKE_MODULE_LINKER_FLAGS " --ld-path=\"${MOLD_BIN}\"")
-      else()
-        string(APPEND CMAKE_EXE_LINKER_FLAGS    " -fuse-ld=\"${MOLD_BIN}\"")
-        string(APPEND CMAKE_SHARED_LINKER_FLAGS " -fuse-ld=\"${MOLD_BIN}\"")
-        string(APPEND CMAKE_MODULE_LINKER_FLAGS " -fuse-ld=\"${MOLD_BIN}\"")
-      endif()
-      set(_IS_LINKER_DEFAULT OFF)
-    endif()
-    unset(MOLD_BIN)
-  endif()
-
 # Intel C++ Compiler
 elseif(CMAKE_C_COMPILER_ID MATCHES "Intel")
  # think these next two are broken
@@ -806,8 +714,6 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Intel")
  string(APPEND PLATFORM_LINKFLAGS " -static-intel")
 endif()

-unset(_IS_LINKER_DEFAULT)
-
 # Avoid conflicts with Mesa llvmpipe, Luxrender, and other plug-ins that may
 # use the same libraries as Blender with a different version or build options.
 set(PLATFORM_LINKFLAGS
--- a/doc/doxygen/doxygen.intern.h
+++ b/doc/doxygen/doxygen.intern.h
@@ -51,6 +51,9 @@
 /** \defgroup intern_mikktspace MikktSpace
 *  \ingroup intern */

+/** \defgroup intern_numaapi NUMA (Non Uniform Memory Architecture)
+ *  \ingroup intern */
+
 /** \defgroup intern_rigidbody Rigid-Body C-API
 *  \ingroup intern */

--- a/doc/python_api/examples/bpy.types.Bone.convert_local_to_pose.py
+++ b/doc/python_api/examples/bpy.types.Bone.convert_local_to_pose.py
@@ -8,42 +8,27 @@ def set_pose_matrices(obj, matrix_map):
    "Assign pose space matrices of all bones at once, ignoring constraints."

    def rec(pbone, parent_matrix):
-        if pbone.name in matrix_map:
-            matrix = matrix_map[pbone.name]
+        matrix = matrix_map[pbone.name]

-            ## Instead of:
-            # pbone.matrix = matrix
-            # bpy.context.view_layer.update()
+        ## Instead of:
+        # pbone.matrix = matrix
+        # bpy.context.view_layer.update()

-            # Compute and assign local matrix, using the new parent matrix
-            if pbone.parent:
-                pbone.matrix_basis = pbone.bone.convert_local_to_pose(
-                    matrix,
-                    pbone.bone.matrix_local,
-                    parent_matrix=parent_matrix,
-                    parent_matrix_local=pbone.parent.bone.matrix_local,
-                    invert=True
-                )
-            else:
-                pbone.matrix_basis = pbone.bone.convert_local_to_pose(
-                    matrix,
-                    pbone.bone.matrix_local,
-                    invert=True
-                )
+        # Compute and assign local matrix, using the new parent matrix
+        if pbone.parent:
+            pbone.matrix_basis = pbone.bone.convert_local_to_pose(
+                matrix,
+                pbone.bone.matrix_local,
+                parent_matrix=parent_matrix,
+                parent_matrix_local=pbone.parent.bone.matrix_local,
+                invert=True
+            )
        else:
-            # Compute the updated pose matrix from local and new parent matrix
-            if pbone.parent:
-                matrix = pbone.bone.convert_local_to_pose(
-                    pbone.matrix_basis,
-                    pbone.bone.matrix_local,
-                    parent_matrix=parent_matrix,
-                    parent_matrix_local=pbone.parent.bone.matrix_local,
-                )
-            else:
-                matrix = pbone.bone.convert_local_to_pose(
-                    pbone.matrix_basis,
-                    pbone.bone.matrix_local,
-                )
+            pbone.matrix_basis = pbone.bone.convert_local_to_pose(
+                matrix,
+                pbone.bone.matrix_local,
+                invert=True
+            )

        # Recursively process children, passing the new matrix through
        for child in pbone.children:
--- a/extern/audaspace/CMakeLists.txt
+++ b/extern/audaspace/CMakeLists.txt
@@ -1092,12 +1092,12 @@ if(WITH_PYTHON)
 		configure_file(${PYTHON_SOURCE_DIRECTORY}/setup.py.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py ESCAPE_QUOTES @ONLY)

 		if(APPLE)
-			add_custom_command(OUTPUT build COMMAND MACOSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET} ${PYTHON_EXECUTABLE} setup.py build DEPENDS ${PYTHON_SRC} ${PYTHON_HDR} setup.py)
+			add_custom_command(OUTPUT build COMMAND MACOSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET} ${PYTHON_EXECUTABLE} setup.py build DEPENDS ${PYTHON_SRC} ${PYTHON_HDR})
 		elseif(WIN32)
 			set(ENV{VS100COMNTOOLS} $ENV{VS120COMNTOOLS})
-			add_custom_command(OUTPUT build COMMAND ${PYTHON_EXECUTABLE} setup.py build DEPENDS ${PYTHON_SRC} ${PYTHON_HDR} setup.py)
+			add_custom_command(OUTPUT build COMMAND ${PYTHON_EXECUTABLE} setup.py build DEPENDS ${PYTHON_SRC} ${PYTHON_HDR})
 		else()
-			add_custom_command(OUTPUT build COMMAND ${PYTHON_EXECUTABLE} setup.py build DEPENDS ${PYTHON_SRC} ${PYTHON_HDR} setup.py)
+			add_custom_command(OUTPUT build COMMAND ${PYTHON_EXECUTABLE} setup.py build DEPENDS ${PYTHON_SRC} ${PYTHON_HDR})
 		endif()
 		add_custom_target(pythonmodule ALL DEPENDS build SOURCES ${PYTHON_SOURCE_DIRECTORY}/setup.py.in ${PYTHON_SRC} ${PYTHON_HDR})
 		add_dependencies(pythonmodule audaspace)
--- a/extern/audaspace/bindings/python/setup.py.in
+++ b/extern/audaspace/bindings/python/setup.py.in
@@ -8,20 +8,20 @@ import numpy
 from distutils.core import setup, Extension

 if len(sys.argv) > 2 and sys.argv[1] == '--build-docs':
-    import subprocess
-    from distutils.core import Distribution
-    from distutils.command.build import build
+	import subprocess
+	from distutils.core import Distribution
+	from distutils.command.build import build

-    dist = Distribution()
-    cmd = build(dist)
-    cmd.finalize_options()
-    #print(cmd.build_platlib)
+	dist = Distribution()
+	cmd = build(dist)
+	cmd.finalize_options()
+	#print(cmd.build_platlib)

-    os.environ['PYTHONPATH'] = os.path.join(os.getcwd(), cmd.build_platlib)
-    os.environ['LD_LIBRARY_PATH'] = os.getcwd()
+	os.environ['PYTHONPATH'] = os.path.join(os.getcwd(), cmd.build_platlib)
+	os.environ['LD_LIBRARY_PATH'] = os.getcwd()

-    ret = subprocess.call(sys.argv[2:])
-    sys.exit(ret)
+	ret = subprocess.call(sys.argv[2:])
+	sys.exit(ret)


 # the following line is not working due to https://bugs.python.org/issue9023
@@ -43,8 +43,7 @@ audaspace = Extension(
                      library_dirs = ['.', 'Release', 'Debug'],
                      language = 'c++',
                      extra_compile_args = extra_args,
-                      define_macros = [('WITH_CONVOLUTION', None)] if '@WITH_FFTW@' == 'ON' else [],
-                      sources = [os.path.join(source_directory, file) for file in ['PyAPI.cpp', 'PyDevice.cpp', 'PyHandle.cpp', 'PySound.cpp', 'PySequenceEntry.cpp', 'PySequence.cpp', 'PyPlaybackManager.cpp', 'PyDynamicMusic.cpp', 'PyThreadPool.cpp', 'PySource.cpp'] + (['PyImpulseResponse.cpp', 'PyHRTF.cpp'] if '@WITH_FFTW@' == 'ON' else [])]
+					  sources = [os.path.join(source_directory, file) for file in ['PyAPI.cpp', 'PyDevice.cpp', 'PyHandle.cpp', 'PySound.cpp', 'PySequenceEntry.cpp', 'PySequence.cpp', 'PyPlaybackManager.cpp', 'PyDynamicMusic.cpp', 'PyThreadPool.cpp', 'PySource.cpp'] + (['PyImpulseResponse.cpp', 'PyHRTF.cpp'] if '@WITH_FFTW@' == 'ON' else [])]
 )

 setup(
@@ -57,6 +56,6 @@ setup(
      license = 'Apache License 2.0',
      long_description = codecs.open(os.path.join(source_directory, '../../README.md'), 'r', 'utf-8').read(),
      ext_modules = [audaspace],
-      headers = [os.path.join(source_directory, file) for file in ['PyAPI.h', 'PyDevice.h', 'PyHandle.h', 'PySound.h', 'PySequenceEntry.h', 'PySequence.h', 'PyPlaybackManager.h', 'PyDynamicMusic.h', 'PyThreadPool.h', 'PySource.h'] + (['PyImpulseResponse.h', 'PyHRTF.h'] if '@WITH_FFTW@' == 'ON' else [])] + ['Audaspace.h']
+	  headers = [os.path.join(source_directory, file) for file in ['PyAPI.h', 'PyDevice.h', 'PyHandle.h', 'PySound.h', 'PySequenceEntry.h', 'PySequence.h', 'PyPlaybackManager.h', 'PyDynamicMusic.h', 'PyThreadPool.h', 'PySource.h'] + (['PyImpulseResponse.h', 'PyHRTF.h'] if '@WITH_FFTW@' == 'ON' else [])] + ['Audaspace.h']
 )

--- a/extern/audaspace/plugins/wasapi/WASAPIDevice.cpp
+++ b/extern/audaspace/plugins/wasapi/WASAPIDevice.cpp
@@ -95,13 +95,6 @@ void WASAPIDevice::runMixingThread()
 				sleep_duration = std::chrono::milliseconds(buffer_size * 1000 / int(m_specs.rate) / 2);
 			}

-			if(m_default_device_changed)
-			{
-				m_default_device_changed = false;
-				result = AUDCLNT_E_DEVICE_INVALIDATED;
-				goto stop_thread;
-			}
-
 			if(FAILED(result = m_audio_client->GetCurrentPadding(&padding)))
 				goto stop_thread;

@@ -303,78 +296,13 @@ bool WASAPIDevice::setupDevice(DeviceSpecs &specs)
 	return true;
 }

-ULONG WASAPIDevice::AddRef()
-{
-	return InterlockedIncrement(&m_reference_count);
-}
-
-ULONG WASAPIDevice::Release()
-{
-	ULONG reference_count = InterlockedDecrement(&m_reference_count);
-
-	if(0 == reference_count)
-		delete this;
-
-	return reference_count;
-}
-
-HRESULT WASAPIDevice::QueryInterface(REFIID riid, void **ppvObject)
-{
-	if(riid == __uuidof(IMMNotificationClient))
-	{
-		*ppvObject = reinterpret_cast<IMMNotificationClient*>(this);
-		AddRef();
-	}
-	else if(riid == IID_IUnknown)
-	{
-		*ppvObject = reinterpret_cast<IUnknown*>(this);
-		AddRef();
-	}
-	else
-	{
-		*ppvObject = nullptr;
-		return E_NOINTERFACE;
-	}
-
-	return S_OK;
-}
-
-HRESULT WASAPIDevice::OnDeviceStateChanged(LPCWSTR pwstrDeviceId, DWORD dwNewState)
-{
-	return S_OK;
-}
-
-HRESULT WASAPIDevice::OnDeviceAdded(LPCWSTR pwstrDeviceId)
-{
-	return S_OK;
-}
-
-HRESULT WASAPIDevice::OnDeviceRemoved(LPCWSTR pwstrDeviceId)
-{
-	return S_OK;
-}
-
-HRESULT WASAPIDevice::OnDefaultDeviceChanged(EDataFlow flow, ERole role, LPCWSTR pwstrDeviceId)
-{
-	if(flow != EDataFlow::eCapture)
-		m_default_device_changed = true;
-
-	return S_OK;
-}
-
-HRESULT WASAPIDevice::OnPropertyValueChanged(LPCWSTR pwstrDeviceId, const PROPERTYKEY key)
-{
-	return S_OK;
-}
-
 WASAPIDevice::WASAPIDevice(DeviceSpecs specs, int buffersize) :
 	m_buffersize(buffersize),
 	m_imm_device_enumerator(nullptr),
 	m_imm_device(nullptr),
 	m_audio_client(nullptr),
-	m_wave_format_extensible({}),
-	m_default_device_changed(false),
-	m_reference_count(1)
+
+	m_wave_format_extensible({})
 {
 	// initialize COM if it hasn't happened yet
 	CoInitializeEx(nullptr, COINIT_MULTITHREADED);
@@ -399,8 +327,6 @@ WASAPIDevice::WASAPIDevice(DeviceSpecs specs, int buffersize) :

 	create();

-	m_imm_device_enumerator->RegisterEndpointNotificationCallback(this);
-
 	return;

 	error:
@@ -414,8 +340,6 @@ WASAPIDevice::~WASAPIDevice()
 {
 	stopMixingThread();

-	m_imm_device_enumerator->UnregisterEndpointNotificationCallback(this);
-
 	SafeRelease(&m_audio_client);
 	SafeRelease(&m_imm_device);
 	SafeRelease(&m_imm_device_enumerator);
--- a/extern/audaspace/plugins/wasapi/WASAPIDevice.h
+++ b/extern/audaspace/plugins/wasapi/WASAPIDevice.h
@@ -40,7 +40,7 @@ AUD_NAMESPACE_BEGIN
 /**
 * This device plays back through WASAPI, the Windows audio API.
 */
-class AUD_PLUGIN_API WASAPIDevice : IMMNotificationClient, public ThreadedDevice
+class AUD_PLUGIN_API WASAPIDevice : public ThreadedDevice
 {
 private:
 	int m_buffersize;
@@ -48,8 +48,6 @@ private:
 	IMMDevice* m_imm_device;
 	IAudioClient* m_audio_client;
 	WAVEFORMATEXTENSIBLE m_wave_format_extensible;
-	bool m_default_device_changed;
-	LONG m_reference_count;

 	AUD_LOCAL HRESULT setupRenderClient(IAudioRenderClient*& render_client, UINT32& buffer_size);

@@ -60,17 +58,6 @@ private:

 	AUD_LOCAL bool setupDevice(DeviceSpecs& specs);

-	// IUnknown implementation
-	ULONG STDMETHODCALLTYPE AddRef();
-	ULONG STDMETHODCALLTYPE Release();
-	HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void **ppvObject);
-	// IMMNotificationClient implementation
-	HRESULT STDMETHODCALLTYPE OnDeviceStateChanged(LPCWSTR pwstrDeviceId, DWORD dwNewState);
-	HRESULT STDMETHODCALLTYPE OnDeviceAdded(LPCWSTR pwstrDeviceId);
-	HRESULT STDMETHODCALLTYPE OnDeviceRemoved(LPCWSTR pwstrDeviceId);
-	HRESULT STDMETHODCALLTYPE OnDefaultDeviceChanged(EDataFlow flow, ERole role, LPCWSTR pwstrDeviceId);
-	HRESULT STDMETHODCALLTYPE OnPropertyValueChanged(LPCWSTR pwstrDeviceId, const PROPERTYKEY key);
-
 	// delete copy constructor and operator=
 	WASAPIDevice(const WASAPIDevice&) = delete;
 	WASAPIDevice& operator=(const WASAPIDevice&) = delete;
--- a/extern/hipew/src/hipew.c
+++ b/extern/hipew/src/hipew.c
@@ -257,7 +257,7 @@ static int hipewHipInit(void) {
 #endif
  static int initialized = 0;
  static int result = 0;
-  int error;
+  int error, driver_version;

  if (initialized) {
    return result;
@@ -565,6 +565,8 @@ int hipewCompilerVersion(void) {
  const char *path = hipewCompilerPath();
  const char *marker = "Hip compilation tools, release ";
  FILE *pipe;
+  int major, minor;
+  char *versionstr;
  char buf[128];
  char output[65536] = "\0";
  char command[65536] = "\0";
--- a/intern/CMakeLists.txt
+++ b/intern/CMakeLists.txt
@@ -25,6 +25,7 @@ add_subdirectory(ghost)
 add_subdirectory(guardedalloc)
 add_subdirectory(libmv)
 add_subdirectory(memutil)
+add_subdirectory(numaapi)
 add_subdirectory(opencolorio)
 add_subdirectory(opensubdiv)
 add_subdirectory(mikktspace)
--- a/intern/cycles/app/CMakeLists.txt
+++ b/intern/cycles/app/CMakeLists.txt
@@ -51,6 +51,8 @@ list(APPEND LIBRARIES ${CYCLES_GL_LIBRARIES})

 # Common configuration.

+cycles_link_directories()
+
 add_definitions(${GL_DEFINITIONS})

 include_directories(${INC})
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -60,8 +60,9 @@ def init():

    path = os.path.dirname(__file__)
    user_path = os.path.dirname(os.path.abspath(bpy.utils.user_resource('CONFIG', path='')))
+    temp_path = bpy.app.tempdir

-    _cycles.init(path, user_path, bpy.app.background)
+    _cycles.init(path, user_path, temp_path, bpy.app.background)
    _parse_command_line()


--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -802,7 +802,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
        name="Tile Size",
        default=2048,
        description="",
-        min=8, max=8192,
+        min=8, max=16384,
    )

    # Various fine-tuning debug flags
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -1803,45 +1803,18 @@ class CYCLES_RENDER_PT_bake_output(CyclesButtonsPanel, Panel):
        rd = scene.render

        if rd.use_bake_multires:
+            layout.prop(rd, "bake_margin")
            layout.prop(rd, "use_bake_clear", text="Clear Image")
+
            if rd.bake_type == 'DISPLACEMENT':
                layout.prop(rd, "use_bake_lores_mesh")
        else:
            layout.prop(cbk, "target")
+
            if cbk.target == 'IMAGE_TEXTURES':
+                layout.prop(cbk, "margin")
                layout.prop(cbk, "use_clear", text="Clear Image")

-class CYCLES_RENDER_PT_bake_output_margin(CyclesButtonsPanel, Panel):
-    bl_label = "Margin"
-    bl_context = "render"
-    bl_parent_id = "CYCLES_RENDER_PT_bake_output"
-    COMPAT_ENGINES = {'CYCLES'}
-
-    @classmethod
-    def poll(cls, context):
-        scene = context.scene
-        cbk = scene.render.bake
-        return cbk.target == 'IMAGE_TEXTURES'
-
-    def draw(self, context):
-        layout = self.layout
-        layout.use_property_split = True
-        layout.use_property_decorate = False  # No animation.
-
-        scene = context.scene
-        cscene = scene.cycles
-        cbk = scene.render.bake
-        rd = scene.render
-
-        if rd.use_bake_multires:
-            layout.prop(rd, "bake_margin_type", text="Type")
-            layout.prop(rd, "bake_margin", text="Size")
-        else:
-            if cbk.target == 'IMAGE_TEXTURES':
-                layout.prop(cbk, "margin_type", text="Type")
-                layout.prop(cbk, "margin", text="Size")
-
-

 class CYCLES_RENDER_PT_debug(CyclesDebugButtonsPanel, Panel):
    bl_label = "Debug"
@@ -2210,7 +2183,6 @@ classes = (
    CYCLES_RENDER_PT_bake_influence,
    CYCLES_RENDER_PT_bake_selected_to_active,
    CYCLES_RENDER_PT_bake_output,
-    CYCLES_RENDER_PT_bake_output_margin,
    CYCLES_RENDER_PT_debug,
    node_panel(CYCLES_MATERIAL_PT_settings),
    node_panel(CYCLES_MATERIAL_PT_settings_surface),
--- a/intern/cycles/blender/display_driver.cpp
+++ b/intern/cycles/blender/display_driver.cpp
@@ -272,300 +272,12 @@ uint BlenderDisplaySpaceShader::get_shader_program()
  return shader_program_;
 }

-/* --------------------------------------------------------------------
- * DrawTile.
- */
-
-/* Higher level representation of a texture from the graphics library. */
-class GLTexture {
- public:
-  /* Global counter for all allocated OpenGL textures used by instances of this class. */
-  static inline std::atomic<int> num_used = 0;
-
-  GLTexture() = default;
-
-  ~GLTexture()
-  {
-    assert(gl_id == 0);
-  }
-
-  GLTexture(const GLTexture &other) = delete;
-  GLTexture &operator=(GLTexture &other) = delete;
-
-  GLTexture(GLTexture &&other) noexcept
-      : gl_id(other.gl_id), width(other.width), height(other.height)
-  {
-    other.reset();
-  }
-
-  GLTexture &operator=(GLTexture &&other)
-  {
-    if (this == &other) {
-      return *this;
-    }
-
-    gl_id = other.gl_id;
-    width = other.width;
-    height = other.height;
-
-    other.reset();
-
-    return *this;
-  }
-
-  bool gl_resources_ensure()
-  {
-    if (gl_id) {
-      return true;
-    }
-
-    /* Create texture. */
-    glGenTextures(1, &gl_id);
-    if (!gl_id) {
-      LOG(ERROR) << "Error creating texture.";
-      return false;
-    }
-
-    /* Configure the texture. */
-    glActiveTexture(GL_TEXTURE0);
-    glBindTexture(GL_TEXTURE_2D, gl_id);
-
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-
-    /* Clamp to edge so that precision issues when zoomed out (which forces linear interpolation)
-     * does not cause unwanted repetition. */
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-
-    glBindTexture(GL_TEXTURE_2D, 0);
-
-    ++num_used;
-
-    return true;
-  }
-
-  void gl_resources_destroy()
-  {
-    if (!gl_id) {
-      return;
-    }
-
-    glDeleteTextures(1, &gl_id);
-
-    reset();
-
-    --num_used;
-  }
-
-  /* OpenGL resource IDs of the texture.
-   *
-   * NOTE: Allocated on the render engine's context. */
-  uint gl_id = 0;
-
-  /* Dimensions of the texture in pixels. */
-  int width = 0;
-  int height = 0;
-
- protected:
-  void reset()
-  {
-    gl_id = 0;
-    width = 0;
-    height = 0;
-  }
-};
-
-/* Higher level representation of a Pixel Buffer Object (PBO) from the graphics library. */
-class GLPixelBufferObject {
- public:
-  /* Global counter for all allocated OpenGL PBOs used by instances of this class. */
-  static inline std::atomic<int> num_used = 0;
-
-  GLPixelBufferObject() = default;
-
-  ~GLPixelBufferObject()
-  {
-    assert(gl_id == 0);
-  }
-
-  GLPixelBufferObject(const GLPixelBufferObject &other) = delete;
-  GLPixelBufferObject &operator=(GLPixelBufferObject &other) = delete;
-
-  GLPixelBufferObject(GLPixelBufferObject &&other) noexcept
-      : gl_id(other.gl_id), width(other.width), height(other.height)
-  {
-    other.reset();
-  }
-
-  GLPixelBufferObject &operator=(GLPixelBufferObject &&other)
-  {
-    if (this == &other) {
-      return *this;
-    }
-
-    gl_id = other.gl_id;
-    width = other.width;
-    height = other.height;
-
-    other.reset();
-
-    return *this;
-  }
-
-  bool gl_resources_ensure()
-  {
-    if (gl_id) {
-      return true;
-    }
-
-    glGenBuffers(1, &gl_id);
-    if (!gl_id) {
-      LOG(ERROR) << "Error creating texture pixel buffer object.";
-      return false;
-    }
-
-    ++num_used;
-
-    return true;
-  }
-
-  void gl_resources_destroy()
-  {
-    if (!gl_id) {
-      return;
-    }
-
-    glDeleteBuffers(1, &gl_id);
-
-    reset();
-
-    --num_used;
-  }
-
-  /* OpenGL resource IDs of the PBO.
-   *
-   * NOTE: Allocated on the render engine's context. */
-  uint gl_id = 0;
-
-  /* Dimensions of the PBO. */
-  int width = 0;
-  int height = 0;
-
- protected:
-  void reset()
-  {
-    gl_id = 0;
-    width = 0;
-    height = 0;
-  }
-};
-
-class DrawTile {
- public:
-  DrawTile() = default;
-  ~DrawTile() = default;
-
-  DrawTile(const DrawTile &other) = delete;
-  DrawTile &operator=(const DrawTile &other) = delete;
-
-  DrawTile(DrawTile &&other) noexcept = default;
-
-  DrawTile &operator=(DrawTile &&other) = default;
-
-  bool gl_resources_ensure()
-  {
-    if (!texture.gl_resources_ensure()) {
-      gl_resources_destroy();
-      return false;
-    }
-
-    if (!gl_vertex_buffer) {
-      glGenBuffers(1, &gl_vertex_buffer);
-      if (!gl_vertex_buffer) {
-        LOG(ERROR) << "Error allocating tile VBO.";
-        gl_resources_destroy();
-        return false;
-      }
-    }
-
-    return true;
-  }
-
-  void gl_resources_destroy()
-  {
-    texture.gl_resources_destroy();
-
-    if (gl_vertex_buffer) {
-      glDeleteBuffers(1, &gl_vertex_buffer);
-      gl_vertex_buffer = 0;
-    }
-  }
-
-  inline bool ready_to_draw() const
-  {
-    return texture.gl_id != 0;
-  }
-
-  /* Texture which contains pixels of the tile. */
-  GLTexture texture;
-
-  /* Display parameters the texture of this tile has been updated for. */
-  BlenderDisplayDriver::Params params;
-
-  /* OpenGL resources needed for drawing. */
-  uint gl_vertex_buffer = 0;
-};
-
-class DrawTileAndPBO {
- public:
-  bool gl_resources_ensure()
-  {
-    if (!tile.gl_resources_ensure() || !buffer_object.gl_resources_ensure()) {
-      gl_resources_destroy();
-      return false;
-    }
-
-    return true;
-  }
-
-  void gl_resources_destroy()
-  {
-    tile.gl_resources_destroy();
-    buffer_object.gl_resources_destroy();
-  }
-
-  DrawTile tile;
-  GLPixelBufferObject buffer_object;
-};
-
 /* --------------------------------------------------------------------
 * BlenderDisplayDriver.
 */

-struct BlenderDisplayDriver::Tiles {
-  /* Resources of a tile which is being currently rendered. */
-  DrawTileAndPBO current_tile;
-
-  /* All tiles which rendering is finished and which content will not be changed. */
-  struct {
-    vector<DrawTile> tiles;
-
-    void gl_resources_destroy_and_clear()
-    {
-      for (DrawTile &tile : tiles) {
-        tile.gl_resources_destroy();
-      }
-
-      tiles.clear();
-    }
-  } finished_tiles;
-};
-
 BlenderDisplayDriver::BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene)
-    : b_engine_(b_engine),
-      display_shader_(BlenderDisplayShader::create(b_engine, b_scene)),
-      tiles_(make_unique<Tiles>())
+    : b_engine_(b_engine), display_shader_(BlenderDisplayShader::create(b_engine, b_scene))
 {
  /* Create context while on the main thread. */
  gl_context_create();
@@ -580,21 +292,6 @@ BlenderDisplayDriver::~BlenderDisplayDriver()
 * Update procedure.
 */

-void BlenderDisplayDriver::next_tile_begin()
-{
-  if (!tiles_->current_tile.tile.ready_to_draw()) {
-    LOG(ERROR)
-        << "Unexpectedly moving to the next tile without any data provided for current tile.";
-    return;
-  }
-
-  /* Moving to the next tile without giving render data for the current tile is not an expected
-   * situation. */
-  DCHECK(!need_clear_);
-
-  tiles_->finished_tiles.tiles.emplace_back(std::move(tiles_->current_tile.tile));
-}
-
 bool BlenderDisplayDriver::update_begin(const Params &params,
                                        int texture_width,
                                        int texture_height)
@@ -615,33 +312,24 @@ bool BlenderDisplayDriver::update_begin(const Params &params,
    glWaitSync((GLsync)gl_render_sync_, 0, GL_TIMEOUT_IGNORED);
  }

-  DrawTile &current_tile = tiles_->current_tile.tile;
-  GLPixelBufferObject &current_tile_buffer_object = tiles_->current_tile.buffer_object;
-
-  /* Clear storage of all finished tiles when display clear is requested.
-   * Do it when new tile data is provided to handle the display clear flag in a single place.
-   * It also makes the logic reliable from the whether drawing did happen or not point of view. */
-  if (need_clear_) {
-    tiles_->finished_tiles.gl_resources_destroy_and_clear();
-    need_clear_ = false;
-  }
-
-  if (!tiles_->current_tile.gl_resources_ensure()) {
-    tiles_->current_tile.gl_resources_destroy();
+  if (!gl_texture_resources_ensure()) {
    gl_context_disable();
    return false;
  }

  /* Update texture dimensions if needed. */
-  if (current_tile.texture.width != texture_width ||
-      current_tile.texture.height != texture_height) {
+  if (texture_.width != texture_width || texture_.height != texture_height) {
    glActiveTexture(GL_TEXTURE0);
-    glBindTexture(GL_TEXTURE_2D, current_tile.texture.gl_id);
+    glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
    glTexImage2D(
        GL_TEXTURE_2D, 0, GL_RGBA16F, texture_width, texture_height, 0, GL_RGBA, GL_HALF_FLOAT, 0);
-    current_tile.texture.width = texture_width;
-    current_tile.texture.height = texture_height;
+    texture_.width = texture_width;
+    texture_.height = texture_height;
    glBindTexture(GL_TEXTURE_2D, 0);
+
+    /* Texture did change, and no pixel storage was provided. Tag for an explicit zeroing out to
+     * avoid undefined content. */
+    texture_.need_clear = true;
  }

  /* Update PBO dimensions if needed.
@@ -653,58 +341,29 @@ bool BlenderDisplayDriver::update_begin(const Params &params,
   * sending too much data to GPU when resolution divider is not 1. */
  /* TODO(sergey): Investigate whether keeping the PBO exact size of the texture makes non-interop
   * mode faster. */
-  const int buffer_width = params.size.x;
-  const int buffer_height = params.size.y;
-  if (current_tile_buffer_object.width != buffer_width ||
-      current_tile_buffer_object.height != buffer_height) {
+  const int buffer_width = params.full_size.x;
+  const int buffer_height = params.full_size.y;
+  if (texture_.buffer_width != buffer_width || texture_.buffer_height != buffer_height) {
    const size_t size_in_bytes = sizeof(half4) * buffer_width * buffer_height;
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, current_tile_buffer_object.gl_id);
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
    glBufferData(GL_PIXEL_UNPACK_BUFFER, size_in_bytes, 0, GL_DYNAMIC_DRAW);
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);

-    current_tile_buffer_object.width = buffer_width;
-    current_tile_buffer_object.height = buffer_height;
+    texture_.buffer_width = buffer_width;
+    texture_.buffer_height = buffer_height;
  }

-  /* Store an updated parameters of the current tile.
-   * In theory it is only needed once per update of the tile, but doing it on every update is
-   * the easiest and is not expensive. */
-  tiles_->current_tile.tile.params = params;
+  /* New content will be provided to the texture in one way or another, so mark this in a
+   * centralized place. */
+  texture_.need_update = true;
+
+  texture_.params = params;

  return true;
 }

-static void update_tile_texture_pixels(const DrawTileAndPBO &tile)
-{
-  const GLTexture &texture = tile.tile.texture;
-
-  DCHECK_NE(tile.buffer_object.gl_id, 0);
-
-  glActiveTexture(GL_TEXTURE0);
-  glBindTexture(GL_TEXTURE_2D, texture.gl_id);
-  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, tile.buffer_object.gl_id);
-
-  glTexSubImage2D(
-      GL_TEXTURE_2D, 0, 0, 0, texture.width, texture.height, GL_RGBA, GL_HALF_FLOAT, 0);
-
-  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-  glBindTexture(GL_TEXTURE_2D, 0);
-}
-
 void BlenderDisplayDriver::update_end()
 {
-  /* Unpack the PBO into the texture as soon as the new content is provided.
-   *
-   * This allows to ensure that the unpacking happens while resources like graphics interop (which
-   * lifetime is outside of control of the display driver) are still valid, as well as allows to
-   * move the tile from being current to finished immediately after this call.
-   *
-   * One concern with this approach is that if the update happens more often than drawing then
-   * doing the unpack here occupies GPU transfer for no good reason. However, the render scheduler
-   * takes care of ensuring updates don't happen that often. In regular applications redraw will
-   * happen much more often than this update. */
-  update_tile_texture_pixels(tiles_->current_tile);
-
  gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
  glFlush();

@@ -717,11 +376,7 @@ void BlenderDisplayDriver::update_end()

 half4 *BlenderDisplayDriver::map_texture_buffer()
 {
-  const uint pbo_gl_id = tiles_->current_tile.buffer_object.gl_id;
-
-  DCHECK_NE(pbo_gl_id, 0);
-
-  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_gl_id);
+  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);

  half4 *mapped_rgba_pixels = reinterpret_cast<half4 *>(
      glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY));
@@ -729,6 +384,15 @@ half4 *BlenderDisplayDriver::map_texture_buffer()
    LOG(ERROR) << "Error mapping BlenderDisplayDriver pixel buffer object.";
  }

+  if (texture_.need_clear) {
+    const int64_t texture_width = texture_.width;
+    const int64_t texture_height = texture_.height;
+    memset(reinterpret_cast<void *>(mapped_rgba_pixels),
+           0,
+           texture_width * texture_height * sizeof(half4));
+    texture_.need_clear = false;
+  }
+
  return mapped_rgba_pixels;
 }

@@ -747,9 +411,12 @@ BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get
 {
  GraphicsInterop interop_dst;

-  interop_dst.buffer_width = tiles_->current_tile.buffer_object.width;
-  interop_dst.buffer_height = tiles_->current_tile.buffer_object.height;
-  interop_dst.opengl_pbo_id = tiles_->current_tile.buffer_object.gl_id;
+  interop_dst.buffer_width = texture_.buffer_width;
+  interop_dst.buffer_height = texture_.buffer_height;
+  interop_dst.opengl_pbo_id = texture_.gl_pbo_id;
+
+  interop_dst.need_clear = texture_.need_clear;
+  texture_.need_clear = false;

  return interop_dst;
 }
@@ -770,7 +437,7 @@ void BlenderDisplayDriver::graphics_interop_deactivate()

 void BlenderDisplayDriver::clear()
 {
-  need_clear_ = true;
+  texture_.need_clear = true;
 }

 void BlenderDisplayDriver::set_zoom(float zoom_x, float zoom_y)
@@ -778,155 +445,26 @@ void BlenderDisplayDriver::set_zoom(float zoom_x, float zoom_y)
  zoom_ = make_float2(zoom_x, zoom_y);
 }

-/* Update vertex buffer with new coordinates of vertex positions and texture coordinates.
- * This buffer is used to render texture in the viewport.
- *
- * NOTE: The buffer needs to be bound. */
-static void vertex_buffer_update(const DisplayDriver::Params &params)
-{
-  const int x = params.full_offset.x;
-  const int y = params.full_offset.y;
-
-  const int width = params.size.x;
-  const int height = params.size.y;
-
-  /* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be
-   * rendered. */
-  glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
-
-  float *vpointer = reinterpret_cast<float *>(glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY));
-  if (!vpointer) {
-    return;
-  }
-
-  vpointer[0] = 0.0f;
-  vpointer[1] = 0.0f;
-  vpointer[2] = x;
-  vpointer[3] = y;
-
-  vpointer[4] = 1.0f;
-  vpointer[5] = 0.0f;
-  vpointer[6] = x + width;
-  vpointer[7] = y;
-
-  vpointer[8] = 1.0f;
-  vpointer[9] = 1.0f;
-  vpointer[10] = x + width;
-  vpointer[11] = y + height;
-
-  vpointer[12] = 0.0f;
-  vpointer[13] = 1.0f;
-  vpointer[14] = x;
-  vpointer[15] = y + height;
-
-  glUnmapBuffer(GL_ARRAY_BUFFER);
-}
-
-static void draw_tile(const float2 &zoom,
-                      const int texcoord_attribute,
-                      const int position_attribute,
-                      const DrawTile &draw_tile)
-{
-  if (!draw_tile.ready_to_draw()) {
-    return;
-  }
-
-  const GLTexture &texture = draw_tile.texture;
-
-  DCHECK_NE(texture.gl_id, 0);
-  DCHECK_NE(draw_tile.gl_vertex_buffer, 0);
-
-  glBindBuffer(GL_ARRAY_BUFFER, draw_tile.gl_vertex_buffer);
-
-  /* Draw at the parameters for which the texture has been updated for. This allows to always draw
-   * texture during bordered-rendered camera view without flickering. The validness of the display
-   * parameters for a texture is guaranteed by the initial "clear" state which makes drawing to
-   * have an early output.
-   *
-   * Such approach can cause some extra "jelly" effect during panning, but it is not more jelly
-   * than overlay of selected objects. Also, it's possible to redraw texture at an intersection of
-   * the texture draw parameters and the latest updated draw parameters (although, complexity of
-   * doing it might not worth it. */
-  vertex_buffer_update(draw_tile.params);
-
-  glBindTexture(GL_TEXTURE_2D, texture.gl_id);
-
-  /* Trick to keep sharp rendering without jagged edges on all GPUs.
-   *
-   * The idea here is to enforce driver to use linear interpolation when the image is not zoomed
-   * in.
-   * For the render result with a resolution divider in effect we always use nearest interpolation.
-   *
-   * Use explicit MIN assignment to make sure the driver does not have an undefined behavior at
-   * the zoom level 1. The MAG filter is always NEAREST. */
-  const float zoomed_width = draw_tile.params.size.x * zoom.x;
-  const float zoomed_height = draw_tile.params.size.y * zoom.y;
-  if (texture.width != draw_tile.params.size.x || texture.height != draw_tile.params.size.y) {
-    /* Resolution divider is different from 1, force nearest interpolation. */
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-  }
-  else if (zoomed_width - draw_tile.params.size.x > 0.5f ||
-           zoomed_height - draw_tile.params.size.y > 0.5f) {
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-  }
-  else {
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-  }
-
-  glVertexAttribPointer(
-      texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
-  glVertexAttribPointer(position_attribute,
-                        2,
-                        GL_FLOAT,
-                        GL_FALSE,
-                        4 * sizeof(float),
-                        (const GLvoid *)(sizeof(float) * 2));
-
-  glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
-}
-
-void BlenderDisplayDriver::flush()
-{
-  /* This is called from the render thread that also calls update_begin/end, right before ending
-   * the render loop. We wait for any queued PBO and render commands to be done, before destroying
-   * the render thread and activating the context in the main thread to destroy resources.
-   *
-   * If we don't do this, the NVIDIA driver hangs for a few seconds for when ending 3D viewport
-   * rendering, for unknown reasons. This was found with NVIDIA driver version 470.73 and a Quadro
-   * RTX 6000 on Linux. */
-  if (!gl_context_enable()) {
-    return;
-  }
-
-  if (gl_upload_sync_) {
-    glWaitSync((GLsync)gl_upload_sync_, 0, GL_TIMEOUT_IGNORED);
-  }
-
-  if (gl_render_sync_) {
-    glWaitSync((GLsync)gl_render_sync_, 0, GL_TIMEOUT_IGNORED);
-  }
-
-  gl_context_disable();
-}
-
 void BlenderDisplayDriver::draw(const Params &params)
 {
  /* See do_update_begin() for why no locking is required here. */
  const bool transparent = true;  // TODO(sergey): Derive this from Film.

+  if (!gl_draw_resources_ensure()) {
+    return;
+  }
+
  if (use_gl_context_) {
    gl_context_mutex_.lock();
  }

-  if (need_clear_) {
+  if (texture_.need_clear) {
    /* Texture is requested to be cleared and was not yet cleared.
     *
     * Do early return which should be equivalent of drawing all-zero texture.
     * Watch out for the lock though so that the clear happening during update is properly
     * synchronized here. */
-    if (use_gl_context_) {
-      gl_context_mutex_.unlock();
-    }
+    gl_context_mutex_.unlock();
    return;
  }

@@ -939,37 +477,66 @@ void BlenderDisplayDriver::draw(const Params &params)
    glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
  }

-  glActiveTexture(GL_TEXTURE0);
+  display_shader_->bind(params.full_size.x, params.full_size.y);

-  /* NOTE: The VAO is to be allocated on the drawing context as it is not shared across contexts.
-   * Simplest is to allocate it on every redraw so that it is possible to destroy it from a
-   * correct context. */
+  glActiveTexture(GL_TEXTURE0);
+  glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
+
+  /* Trick to keep sharp rendering without jagged edges on all GPUs.
+   *
+   * The idea here is to enforce driver to use linear interpolation when the image is not zoomed
+   * in.
+   * For the render result with a resolution divider in effect we always use nearest interpolation.
+   *
+   * Use explicit MIN assignment to make sure the driver does not have an undefined behavior at
+   * the zoom level 1. The MAG filter is always NEAREST. */
+  const float zoomed_width = params.size.x * zoom_.x;
+  const float zoomed_height = params.size.y * zoom_.y;
+  if (texture_.width != params.size.x || texture_.height != params.size.y) {
+    /* Resolution divider is different from 1, force nearest interpolation. */
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+  }
+  else if (zoomed_width - params.size.x > 0.5f || zoomed_height - params.size.y > 0.5f) {
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+  }
+  else {
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+  }
+
+  glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_);
+
+  texture_update_if_needed();
+  vertex_buffer_update(params);
+
+  /* TODO(sergey): Does it make sense/possible to cache/reuse the VAO? */
  GLuint vertex_array_object;
  glGenVertexArrays(1, &vertex_array_object);
  glBindVertexArray(vertex_array_object);

-  display_shader_->bind(params.full_size.x, params.full_size.y);
-
  const int texcoord_attribute = display_shader_->get_tex_coord_attrib_location();
  const int position_attribute = display_shader_->get_position_attrib_location();

  glEnableVertexAttribArray(texcoord_attribute);
  glEnableVertexAttribArray(position_attribute);

-  draw_tile(zoom_, texcoord_attribute, position_attribute, tiles_->current_tile.tile);
+  glVertexAttribPointer(
+      texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
+  glVertexAttribPointer(position_attribute,
+                        2,
+                        GL_FLOAT,
+                        GL_FALSE,
+                        4 * sizeof(float),
+                        (const GLvoid *)(sizeof(float) * 2));

-  for (const DrawTile &tile : tiles_->finished_tiles.tiles) {
-    draw_tile(zoom_, texcoord_attribute, position_attribute, tile);
-  }
+  glDrawArrays(GL_TRIANGLE_FAN, 0, 4);

-  display_shader_->unbind();
-
-  glBindTexture(GL_TEXTURE_2D, 0);
-  glBindVertexArray(0);
  glBindBuffer(GL_ARRAY_BUFFER, 0);
+  glBindTexture(GL_TEXTURE_2D, 0);

  glDeleteVertexArrays(1, &vertex_array_object);

+  display_shader_->unbind();
+
  if (transparent) {
    glDisable(GL_BLEND);
  }
@@ -977,11 +544,6 @@ void BlenderDisplayDriver::draw(const Params &params)
  gl_render_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
  glFlush();

-  if (VLOG_IS_ON(5)) {
-    VLOG(5) << "Number of textures: " << GLTexture::num_used;
-    VLOG(5) << "Number of PBOs: " << GLPixelBufferObject::num_used;
-  }
-
  if (use_gl_context_) {
    gl_context_mutex_.unlock();
  }
@@ -1056,16 +618,154 @@ void BlenderDisplayDriver::gl_context_dispose()
  }
 }

+bool BlenderDisplayDriver::gl_draw_resources_ensure()
+{
+  if (!texture_.gl_id) {
+    /* If there is no texture allocated, there is nothing to draw. Inform the draw call that it can
+     * can not continue. Note that this is not an unrecoverable error, so once the texture is known
+     * we will come back here and create all the GPU resources needed for draw. */
+    return false;
+  }
+
+  if (gl_draw_resource_creation_attempted_) {
+    return gl_draw_resources_created_;
+  }
+  gl_draw_resource_creation_attempted_ = true;
+
+  if (!vertex_buffer_) {
+    glGenBuffers(1, &vertex_buffer_);
+    if (!vertex_buffer_) {
+      LOG(ERROR) << "Error creating vertex buffer.";
+      return false;
+    }
+  }
+
+  gl_draw_resources_created_ = true;
+
+  return true;
+}
+
 void BlenderDisplayDriver::gl_resources_destroy()
 {
  gl_context_enable();

-  tiles_->current_tile.gl_resources_destroy();
-  tiles_->finished_tiles.gl_resources_destroy_and_clear();
+  if (vertex_buffer_ != 0) {
+    glDeleteBuffers(1, &vertex_buffer_);
+  }
+
+  if (texture_.gl_pbo_id) {
+    glDeleteBuffers(1, &texture_.gl_pbo_id);
+    texture_.gl_pbo_id = 0;
+  }
+
+  if (texture_.gl_id) {
+    glDeleteTextures(1, &texture_.gl_id);
+    texture_.gl_id = 0;
+  }

  gl_context_disable();

  gl_context_dispose();
 }

+bool BlenderDisplayDriver::gl_texture_resources_ensure()
+{
+  if (texture_.creation_attempted) {
+    return texture_.is_created;
+  }
+  texture_.creation_attempted = true;
+
+  DCHECK(!texture_.gl_id);
+  DCHECK(!texture_.gl_pbo_id);
+
+  /* Create texture. */
+  glGenTextures(1, &texture_.gl_id);
+  if (!texture_.gl_id) {
+    LOG(ERROR) << "Error creating texture.";
+    return false;
+  }
+
+  /* Configure the texture. */
+  glActiveTexture(GL_TEXTURE0);
+  glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
+  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+  glBindTexture(GL_TEXTURE_2D, 0);
+
+  /* Create PBO for the texture. */
+  glGenBuffers(1, &texture_.gl_pbo_id);
+  if (!texture_.gl_pbo_id) {
+    LOG(ERROR) << "Error creating texture pixel buffer object.";
+    return false;
+  }
+
+  /* Creation finished with a success. */
+  texture_.is_created = true;
+
+  return true;
+}
+
+void BlenderDisplayDriver::texture_update_if_needed()
+{
+  if (!texture_.need_update) {
+    return;
+  }
+
+  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
+  glTexSubImage2D(
+      GL_TEXTURE_2D, 0, 0, 0, texture_.width, texture_.height, GL_RGBA, GL_HALF_FLOAT, 0);
+  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+  texture_.need_update = false;
+}
+
+void BlenderDisplayDriver::vertex_buffer_update(const Params & /*params*/)
+{
+  /* Draw at the parameters for which the texture has been updated for. This allows to always draw
+   * texture during bordered-rendered camera view without flickering. The validness of the display
+   * parameters for a texture is guaranteed by the initial "clear" state which makes drawing to
+   * have an early output.
+   *
+   * Such approach can cause some extra "jelly" effect during panning, but it is not more jelly
+   * than overlay of selected objects. Also, it's possible to redraw texture at an intersection of
+   * the texture draw parameters and the latest updated draw parameters (although, complexity of
+   * doing it might not worth it. */
+  const int x = texture_.params.full_offset.x;
+  const int y = texture_.params.full_offset.y;
+
+  const int width = texture_.params.size.x;
+  const int height = texture_.params.size.y;
+
+  /* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be
+   * rendered. */
+  glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
+
+  float *vpointer = reinterpret_cast<float *>(glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY));
+  if (!vpointer) {
+    return;
+  }
+
+  vpointer[0] = 0.0f;
+  vpointer[1] = 0.0f;
+  vpointer[2] = x;
+  vpointer[3] = y;
+
+  vpointer[4] = 1.0f;
+  vpointer[5] = 0.0f;
+  vpointer[6] = x + width;
+  vpointer[7] = y;
+
+  vpointer[8] = 1.0f;
+  vpointer[9] = 1.0f;
+  vpointer[10] = x + width;
+  vpointer[11] = y + height;
+
+  vpointer[12] = 0.0f;
+  vpointer[13] = 1.0f;
+  vpointer[14] = x;
+  vpointer[15] = y + height;
+
+  glUnmapBuffer(GL_ARRAY_BUFFER);
+}
+
 CCL_NAMESPACE_END
--- a/intern/cycles/blender/display_driver.h
+++ b/intern/cycles/blender/display_driver.h
@@ -26,7 +26,6 @@

 #include "util/thread.h"
 #include "util/unique_ptr.h"
-#include "util/vector.h"

 CCL_NAMESPACE_BEGIN

@@ -113,8 +112,6 @@ class BlenderDisplayDriver : public DisplayDriver {
  void set_zoom(float zoom_x, float zoom_y);

 protected:
-  virtual void next_tile_begin() override;
-
  virtual bool update_begin(const Params &params, int texture_width, int texture_height) override;
  virtual void update_end() override;

@@ -125,17 +122,33 @@ class BlenderDisplayDriver : public DisplayDriver {

  virtual void draw(const Params &params) override;

-  virtual void flush() override;
-
  /* Helper function which allocates new GPU context. */
  void gl_context_create();
  bool gl_context_enable();
  void gl_context_disable();
  void gl_context_dispose();

+  /* Make sure texture is allocated and its initial configuration is performed. */
+  bool gl_texture_resources_ensure();
+
+  /* Ensure all runtime GPU resources needed for drawing are allocated.
+   * Returns true if all resources needed for drawing are available. */
+  bool gl_draw_resources_ensure();
+
  /* Destroy all GPU resources which are being used by this object. */
  void gl_resources_destroy();

+  /* Update GPU texture dimensions and content if needed (new pixel data was provided).
+   *
+   * NOTE: The texture needs to be bound. */
+  void texture_update_if_needed();
+
+  /* Update vertex buffer with new coordinates of vertex positions and texture coordinates.
+   * This buffer is used to render texture in the viewport.
+   *
+   * NOTE: The buffer needs to be bound. */
+  void vertex_buffer_update(const Params &params);
+
  BL::RenderEngine b_engine_;

  /* OpenGL context which is used the render engine doesn't have its own. */
@@ -146,14 +159,50 @@ class BlenderDisplayDriver : public DisplayDriver {
  /* Mutex used to guard the `gl_context_`. */
  thread_mutex gl_context_mutex_;

-  /* Content of the display is to be filled with zeroes. */
-  std::atomic<bool> need_clear_ = true;
+  /* Texture which contains pixels of the render result. */
+  struct {
+    /* Indicates whether texture creation was attempted and succeeded.
+     * Used to avoid multiple attempts of texture creation on GPU issues or GPU context
+     * misconfiguration. */
+    bool creation_attempted = false;
+    bool is_created = false;
+
+    /* OpenGL resource IDs of the texture itself and Pixel Buffer Object (PBO) used to write
+     * pixels to it.
+     *
+     * NOTE: Allocated on the engine's context. */
+    uint gl_id = 0;
+    uint gl_pbo_id = 0;
+
+    /* Is true when new data was written to the PBO, meaning, the texture might need to be resized
+     * and new data is to be uploaded to the GPU. */
+    bool need_update = false;
+
+    /* Content of the texture is to be filled with zeroes. */
+    std::atomic<bool> need_clear = true;
+
+    /* Dimensions of the texture in pixels. */
+    int width = 0;
+    int height = 0;
+
+    /* Dimensions of the underlying PBO. */
+    int buffer_width = 0;
+    int buffer_height = 0;
+
+    /* Display parameters the texture has been updated for. */
+    Params params;
+  } texture_;

  unique_ptr<BlenderDisplayShader> display_shader_;

-  /* Opaque storage for an internal state and data for tiles. */
-  struct Tiles;
-  unique_ptr<Tiles> tiles_;
+  /* Special track of whether GPU resources were attempted to be created, to avoid attempts of
+   * their re-creation on failure on every redraw. */
+  bool gl_draw_resource_creation_attempted_ = false;
+  bool gl_draw_resources_created_ = false;
+
+  /* Vertex buffer which hold vertices of a triangle fan which is textures with the texture
+   * holding the render result. */
+  uint vertex_buffer_ = 0;

  void *gl_render_sync_ = nullptr;
  void *gl_upload_sync_ = nullptr;
--- a/intern/cycles/blender/object.cpp
+++ b/intern/cycles/blender/object.cpp
@@ -529,17 +529,6 @@ void BlenderSync::sync_procedural(BL::Object &b_ob,
  string absolute_path = blender_absolute_path(b_data, b_ob, b_mesh_cache.cache_file().filepath());
  procedural->set_filepath(ustring(absolute_path));

-  array<ustring> layers;
-  for (BL::CacheFileLayer &layer : cache_file.layers) {
-    if (layer.hide_layer()) {
-      continue;
-    }
-
-    absolute_path = blender_absolute_path(b_data, b_ob, layer.filepath());
-    layers.push_back_slow(ustring(absolute_path));
-  }
-  procedural->set_layers(layers);
-
  procedural->set_scale(cache_file.scale());

  procedural->set_use_prefetch(cache_file.use_prefetch());
--- a/intern/cycles/blender/python.cpp
+++ b/intern/cycles/blender/python.cpp
@@ -138,18 +138,20 @@ static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce)

 static PyObject *init_func(PyObject * /*self*/, PyObject *args)
 {
-  PyObject *path, *user_path;
+  PyObject *path, *user_path, *temp_path;
  int headless;

-  if (!PyArg_ParseTuple(args, "OOi", &path, &user_path, &headless)) {
+  if (!PyArg_ParseTuple(args, "OOOi", &path, &user_path, &temp_path, &headless)) {
    return nullptr;
  }

-  PyObject *path_coerce = nullptr, *user_path_coerce = nullptr;
+  PyObject *path_coerce = nullptr, *user_path_coerce = nullptr, *temp_path_coerce = nullptr;
  path_init(PyC_UnicodeAsByte(path, &path_coerce),
-            PyC_UnicodeAsByte(user_path, &user_path_coerce));
+            PyC_UnicodeAsByte(user_path, &user_path_coerce),
+            PyC_UnicodeAsByte(temp_path, &temp_path_coerce));
  Py_XDECREF(path_coerce);
  Py_XDECREF(user_path_coerce);
+  Py_XDECREF(temp_path_coerce);

  BlenderSession::headless = headless;

--- a/intern/cycles/blender/session.cpp
+++ b/intern/cycles/blender/session.cpp
@@ -502,15 +502,10 @@ void BlenderSession::render_frame_finish()
    path_remove(filename);
  }

-  /* Clear output driver. */
+  /* Clear driver. */
  session->set_output_driver(nullptr);
  session->full_buffer_written_cb = function_null;

-  /* The display driver holds OpenGL resources which belong to an OpenGL context held by the render
-   * engine on Blender side. Force destruction of those resources. */
-  display_driver_ = nullptr;
-  session->set_display_driver(nullptr);
-
  /* All the files are handled.
   * Clear the list so that this session can be re-used by Persistent Data. */
  full_buffer_files_.clear();
--- a/intern/cycles/blender/sync.cpp
+++ b/intern/cycles/blender/sync.cpp
@@ -832,14 +832,6 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
  SessionParams params;
  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");

-  if (background && !b_engine.is_preview()) {
-    /* Viewport and preview renders do not require temp directory and do request session
-     * parameters more often than the background render.
-     * Optimize RNA-C++ usage and memory allocation a bit by saving string access which we know is
-     * not needed for viewport render. */
-    params.temp_dir = b_engine.temporary_directory();
-  }
-
  /* feature set */
  params.experimental = (get_enum(cscene, "feature_set") != 0);

--- a/intern/cycles/bvh/build.cpp
+++ b/intern/cycles/bvh/build.cpp
@@ -935,7 +935,7 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start,

 BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector<BVHReference> &references)
 {
-  /* This is a bit over-allocating here (considering leaf size into account),
+  /* This is a bit overallocating here (considering leaf size into account),
   * but chunk-based re-allocation in vector makes it difficult to use small
   * size of stack storage here. Some tweaks are possible tho.
   *
--- a/intern/cycles/cmake/macros.cmake
+++ b/intern/cycles/cmake/macros.cmake
@@ -84,6 +84,39 @@ macro(cycles_add_library target library_deps)
  cycles_set_solution_folder(${target})
 endmacro()

+# Cycles library dependencies common to all executables
+
+function(cycles_link_directories)
+  if(APPLE)
+    # APPLE platform uses full paths for linking libraries, and avoids link_directories.
+    return()
+  endif()
+
+  if(WITH_OPENCOLORIO)
+    link_directories(${OPENCOLORIO_LIBPATH})
+  endif()
+  if(WITH_OPENVDB)
+    link_directories(${OPENVDB_LIBPATH} ${BLOSC_LIBPATH})
+  endif()
+  if(WITH_OPENSUBDIV)
+    link_directories(${OPENSUBDIV_LIBPATH})
+  endif()
+  if(WITH_OPENIMAGEDENOISE)
+    link_directories(${OPENIMAGEDENOISE_LIBPATH})
+  endif()
+
+  link_directories(
+    ${OPENIMAGEIO_LIBPATH}
+    ${BOOST_LIBPATH}
+    ${PNG_LIBPATH}
+    ${JPEG_LIBPATH}
+    ${ZLIB_LIBPATH}
+    ${TIFF_LIBPATH}
+    ${OPENEXR_LIBPATH}
+    ${OPENJPEG_LIBPATH}
+  )
+endfunction()
+
 macro(cycles_target_link_libraries target)
  if(WITH_CYCLES_LOGGING)
    target_link_libraries(${target} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES})
@@ -135,6 +168,12 @@ macro(cycles_target_link_libraries target)
    target_link_libraries(${target} extern_hipew)
  endif()

+  if(CYCLES_STANDALONE_REPOSITORY)
+    target_link_libraries(${target} extern_numaapi)
+  else()
+    target_link_libraries(${target} bf_intern_numaapi)
+  endif()
+
  if(UNIX AND NOT APPLE)
    if(CYCLES_STANDALONE_REPOSITORY)
      target_link_libraries(${target} extern_libc_compat)
--- a/intern/cycles/device/cpu/device_impl.cpp
+++ b/intern/cycles/device/cpu/device_impl.cpp
@@ -72,7 +72,7 @@ CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_
          << " CPU kernels.";

  if (info.cpu_threads == 0) {
-    info.cpu_threads = TaskScheduler::max_concurrency();
+    info.cpu_threads = TaskScheduler::num_threads();
  }

 #ifdef WITH_OSL
--- a/intern/cycles/device/cuda/graphics_interop.cpp
+++ b/intern/cycles/device/cuda/graphics_interop.cpp
@@ -45,10 +45,8 @@ void CUDADeviceGraphicsInterop::set_display_interop(

  need_clear_ = display_interop.need_clear;

-  if (!display_interop.need_recreate) {
-    if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
-      return;
-    }
+  if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
+    return;
  }

  CUDAContextScope scope(device_);
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -37,7 +37,6 @@
 #include "util/math.h"
 #include "util/string.h"
 #include "util/system.h"
-#include "util/task.h"
 #include "util/time.h"
 #include "util/types.h"
 #include "util/vector.h"
@@ -334,7 +333,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
    /* Ensure CPU device does not slow down GPU. */
    if (device.type == DEVICE_CPU && subdevices.size() > 1) {
      if (background) {
-        int orig_cpu_threads = (threads) ? threads : TaskScheduler::max_concurrency();
+        int orig_cpu_threads = (threads) ? threads : system_cpu_thread_count();
        int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);

        VLOG(1) << "CPU render threads reduced from " << orig_cpu_threads << " to " << cpu_threads
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -115,9 +115,7 @@ bool PathTrace::ready_to_reset()
  return false;
 }

-void PathTrace::reset(const BufferParams &full_params,
-                      const BufferParams &big_tile_params,
-                      const bool reset_rendering)
+void PathTrace::reset(const BufferParams &full_params, const BufferParams &big_tile_params)
 {
  if (big_tile_params_.modified(big_tile_params)) {
    big_tile_params_ = big_tile_params;
@@ -130,7 +128,7 @@ void PathTrace::reset(const BufferParams &full_params,
   * It is requires to inform about reset whenever it happens, so that the redraw state tracking is
   * properly updated. */
  if (display_) {
-    display_->reset(big_tile_params, reset_rendering);
+    display_->reset(full_params);
  }

  render_state_.has_denoised_result = false;
@@ -596,15 +594,6 @@ void PathTrace::draw()
  did_draw_after_reset_ |= display_->draw();
 }

-void PathTrace::flush_display()
-{
-  if (!display_) {
-    return;
-  }
-
-  display_->flush();
-}
-
 void PathTrace::update_display(const RenderWork &render_work)
 {
  if (!render_work.display.update) {
@@ -633,8 +622,9 @@ void PathTrace::update_display(const RenderWork &render_work)
  if (display_) {
    VLOG(3) << "Perform copy to GPUDisplay work.";

-    const int texture_width = render_state_.effective_big_tile_params.window_width;
-    const int texture_height = render_state_.effective_big_tile_params.window_height;
+    const int resolution_divider = render_work.resolution_divider;
+    const int texture_width = max(1, full_params_.width / resolution_divider);
+    const int texture_height = max(1, full_params_.height / resolution_divider);
    if (!display_->update_begin(texture_width, texture_height)) {
      LOG(ERROR) << "Error beginning GPUDisplay update.";
      return;
--- a/intern/cycles/integrator/path_trace.h
+++ b/intern/cycles/integrator/path_trace.h
@@ -72,9 +72,7 @@ class PathTrace {
   * render result. */
  bool ready_to_reset();

-  void reset(const BufferParams &full_params,
-             const BufferParams &big_tile_params,
-             bool reset_rendering);
+  void reset(const BufferParams &full_params, const BufferParams &big_tile_params);

  void device_free();

@@ -114,9 +112,6 @@ class PathTrace {
  /* Perform drawing of the current state of the DisplayDriver. */
  void draw();

-  /* Flush outstanding display commands before ending the render loop. */
-  void flush_display();
-
  /* Cancel rendering process as soon as possible, without waiting for full tile to be sampled.
   * Used in cases like reset of render session.
   *
--- a/intern/cycles/integrator/path_trace_display.cpp
+++ b/intern/cycles/integrator/path_trace_display.cpp
@@ -26,20 +26,15 @@ PathTraceDisplay::PathTraceDisplay(unique_ptr<DisplayDriver> driver) : driver_(m
 {
 }

-void PathTraceDisplay::reset(const BufferParams &buffer_params, const bool reset_rendering)
+void PathTraceDisplay::reset(const BufferParams &buffer_params)
 {
  thread_scoped_lock lock(mutex_);

-  params_.full_offset = make_int2(buffer_params.full_x + buffer_params.window_x,
-                                  buffer_params.full_y + buffer_params.window_y);
+  params_.full_offset = make_int2(buffer_params.full_x, buffer_params.full_y);
  params_.full_size = make_int2(buffer_params.full_width, buffer_params.full_height);
-  params_.size = make_int2(buffer_params.window_width, buffer_params.window_height);
+  params_.size = make_int2(buffer_params.width, buffer_params.height);

  texture_state_.is_outdated = true;
-
-  if (!reset_rendering) {
-    driver_->next_tile_begin();
-  }
 }

 void PathTraceDisplay::mark_texture_updated()
@@ -253,9 +248,4 @@ bool PathTraceDisplay::draw()
  return !is_outdated;
 }

-void PathTraceDisplay::flush()
-{
-  driver_->flush();
-}
-
 CCL_NAMESPACE_END
--- a/intern/cycles/integrator/path_trace_display.h
+++ b/intern/cycles/integrator/path_trace_display.h
@@ -38,17 +38,14 @@ class BufferParams;

 class PathTraceDisplay {
 public:
-  explicit PathTraceDisplay(unique_ptr<DisplayDriver> driver);
+  PathTraceDisplay(unique_ptr<DisplayDriver> driver);
  virtual ~PathTraceDisplay() = default;

  /* Reset the display for the new state of render session. Is called whenever session is reset,
   * which happens on changes like viewport navigation or viewport dimension change.
   *
-   * This call will configure parameters for a changed buffer and reset the texture state.
-   *
-   * When the `reset_rendering` a complete display reset happens. When it is false reset happens
-   * for a new state of the buffer parameters which is assumed to correspond to the next tile. */
-  void reset(const BufferParams &buffer_params, bool reset_rendering);
+   * This call will configure parameters for a changed buffer and reset the texture state. */
+  void reset(const BufferParams &buffer_params);

  /* --------------------------------------------------------------------
   * Update procedure.
@@ -154,9 +151,6 @@ class PathTraceDisplay {
   * Returns true if this call did draw an updated state of the texture. */
  bool draw();

-  /* Flush outstanding display commands before ending the render loop. */
-  void flush();
-
 private:
  /* Display driver implemented by the host application. */
  unique_ptr<DisplayDriver> driver_;
--- a/intern/cycles/integrator/path_trace_work.cpp
+++ b/intern/cycles/integrator/path_trace_work.cpp
@@ -194,10 +194,10 @@ PassAccessor::Destination PathTraceWork::get_display_destination_template(
  PassAccessor::Destination destination(film_->get_display_pass());

  const int2 display_texture_size = display->get_texture_size();
-  const int texture_x = effective_buffer_params_.full_x - effective_big_tile_params_.full_x +
-                        effective_buffer_params_.window_x - effective_big_tile_params_.window_x;
-  const int texture_y = effective_buffer_params_.full_y - effective_big_tile_params_.full_y +
-                        effective_buffer_params_.window_y - effective_big_tile_params_.window_y;
+  const int texture_x = effective_buffer_params_.full_x - effective_full_params_.full_x +
+                        effective_buffer_params_.window_x;
+  const int texture_y = effective_buffer_params_.full_y - effective_full_params_.full_y +
+                        effective_buffer_params_.window_y;

  destination.offset = texture_y * display_texture_size.x + texture_x;
  destination.stride = display_texture_size.x;
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -875,10 +875,8 @@ void PathTraceWorkGPU::copy_to_display_naive(PathTraceDisplay *display,
  const int final_width = buffers_->params.window_width;
  const int final_height = buffers_->params.window_height;

-  const int texture_x = full_x - effective_big_tile_params_.full_x +
-                        effective_buffer_params_.window_x - effective_big_tile_params_.window_x;
-  const int texture_y = full_y - effective_big_tile_params_.full_y +
-                        effective_buffer_params_.window_y - effective_big_tile_params_.window_y;
+  const int texture_x = full_x - effective_full_params_.full_x + effective_buffer_params_.window_x;
+  const int texture_y = full_y - effective_full_params_.full_y + effective_buffer_params_.window_y;

  /* Re-allocate display memory if needed, and make sure the device pointer is allocated.
   *
--- a/intern/cycles/integrator/render_scheduler.cpp
+++ b/intern/cycles/integrator/render_scheduler.cpp
@@ -406,6 +406,9 @@ bool RenderScheduler::set_postprocess_render_work(RenderWork *render_work)
    any_scheduled = true;
  }

+  /* Force update. */
+  any_scheduled = true;
+
  if (any_scheduled) {
    render_work->display.update = true;
  }
--- a/intern/cycles/integrator/render_scheduler.h
+++ b/intern/cycles/integrator/render_scheduler.h
@@ -283,7 +283,7 @@ class RenderScheduler {
  /* Check whether timing report about the given work need to reset accumulated average time. */
  bool work_report_reset_average(const RenderWork &render_work);

-  /* Check whether render time limit has been reached (or exceeded), and if so store related
+  /* CHeck whether render time limit has been reached (or exceeded), and if so store related
   * information in the state so that rendering is considered finished, and is possible to report
   * average render time information. */
  void check_time_limit_reached();
--- a/intern/cycles/kernel/bvh/local.h
+++ b/intern/cycles/kernel/bvh/local.h
@@ -148,23 +148,12 @@ ccl_device_inline
            /* intersect ray against primitive */
            for (; prim_addr < prim_addr2; prim_addr++) {
              kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-
-              /* Only intersect with matching object, for instanced objects we
-               * already know we are only intersecting the right object. */
-              if (object == OBJECT_NONE) {
-                if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) {
-                  continue;
-                }
-              }
-
-              const int prim = kernel_tex_fetch(__prim_index, prim_addr);
-
              if (triangle_intersect_local(kg,
                                           local_isect,
                                           P,
                                           dir,
+                                           object,
                                           local_object,
-                                           prim,
                                           prim_addr,
                                           isect_t,
                                           lcg_state,
@@ -179,24 +168,13 @@ ccl_device_inline
            /* intersect ray against primitive */
            for (; prim_addr < prim_addr2; prim_addr++) {
              kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-
-              /* Only intersect with matching object, for instanced objects we
-               * already know we are only intersecting the right object. */
-              if (object == OBJECT_NONE) {
-                if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) {
-                  continue;
-                }
-              }
-
-              const int prim = kernel_tex_fetch(__prim_index, prim_addr);
-
              if (motion_triangle_intersect_local(kg,
                                                  local_isect,
                                                  P,
                                                  dir,
                                                  ray->time,
+                                                  object,
                                                  local_object,
-                                                  prim,
                                                  prim_addr,
                                                  isect_t,
                                                  lcg_state,
--- a/intern/cycles/kernel/bvh/shadow_all.h
+++ b/intern/cycles/kernel/bvh/shadow_all.h
@@ -146,7 +146,7 @@ ccl_device_inline
          --stack_ptr;

          /* primitive intersection */
-          for (; prim_addr < prim_addr2; prim_addr++) {
+          while (prim_addr < prim_addr2) {
            kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) ==
                          (type & PRIMITIVE_ALL));
            bool hit;
@@ -156,29 +156,16 @@ ccl_device_inline
             * might give a few % performance improvement */
            Intersection isect ccl_optional_struct_init;

-            const int prim_object = (object == OBJECT_NONE) ?
-                                        kernel_tex_fetch(__prim_object, prim_addr) :
-                                        object;
-            const int prim = kernel_tex_fetch(__prim_index, prim_addr);
-
            switch (type & PRIMITIVE_ALL) {
              case PRIMITIVE_TRIANGLE: {
                hit = triangle_intersect(
-                    kg, &isect, P, dir, t_max_current, visibility, prim_object, prim, prim_addr);
+                    kg, &isect, P, dir, t_max_current, visibility, object, prim_addr);
                break;
              }
 #if BVH_FEATURE(BVH_MOTION)
              case PRIMITIVE_MOTION_TRIANGLE: {
-                hit = motion_triangle_intersect(kg,
-                                                &isect,
-                                                P,
-                                                dir,
-                                                t_max_current,
-                                                ray->time,
-                                                visibility,
-                                                prim_object,
-                                                prim,
-                                                prim_addr);
+                hit = motion_triangle_intersect(
+                    kg, &isect, P, dir, t_max_current, ray->time, visibility, object, prim_addr);
                break;
              }
 #endif
@@ -195,9 +182,20 @@ ccl_device_inline
                  }
                }

+                const int curve_object = (object == OBJECT_NONE) ?
+                                             kernel_tex_fetch(__prim_object, prim_addr) :
+                                             object;
                const int curve_type = kernel_tex_fetch(__prim_type, prim_addr);
-                hit = curve_intersect(
-                    kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, curve_type);
+                const int curve_prim = kernel_tex_fetch(__prim_index, prim_addr);
+                hit = curve_intersect(kg,
+                                      &isect,
+                                      P,
+                                      dir,
+                                      t_max_current,
+                                      curve_object,
+                                      curve_prim,
+                                      ray->time,
+                                      curve_type);

                break;
              }
@@ -213,9 +211,20 @@ ccl_device_inline
                  }
                }

+                const int point_object = (object == OBJECT_NONE) ?
+                                             kernel_tex_fetch(__prim_object, prim_addr) :
+                                             object;
+                const int point_prim = kernel_tex_fetch(__prim_index, prim_addr);
                const int point_type = kernel_tex_fetch(__prim_type, prim_addr);
-                hit = point_intersect(
-                    kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, point_type);
+                hit = point_intersect(kg,
+                                      &isect,
+                                      P,
+                                      dir,
+                                      t_max_current,
+                                      point_object,
+                                      point_prim,
+                                      ray->time,
+                                      point_type);
                break;
              }
 #endif /* BVH_FEATURE(BVH_POINTCLOUD) */
@@ -292,6 +301,8 @@ ccl_device_inline
                integrator_state_write_shadow_isect(state, &isect, record_index);
              }
            }
+
+            prim_addr++;
          }
        }
        else {
--- a/intern/cycles/kernel/bvh/traversal.h
+++ b/intern/cycles/kernel/bvh/traversal.h
@@ -137,14 +137,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
            case PRIMITIVE_TRIANGLE: {
              for (; prim_addr < prim_addr2; prim_addr++) {
                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-
-                const int prim_object = (object == OBJECT_NONE) ?
-                                            kernel_tex_fetch(__prim_object, prim_addr) :
-                                            object;
-                const int prim = kernel_tex_fetch(__prim_index, prim_addr);
-
                if (triangle_intersect(
-                        kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr)) {
+                        kg, isect, P, dir, isect->t, visibility, object, prim_addr)) {
                  /* shadow ray early termination */
                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
                    return true;
@@ -156,22 +150,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
            case PRIMITIVE_MOTION_TRIANGLE: {
              for (; prim_addr < prim_addr2; prim_addr++) {
                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-
-                const int prim_object = (object == OBJECT_NONE) ?
-                                            kernel_tex_fetch(__prim_object, prim_addr) :
-                                            object;
-                const int prim = kernel_tex_fetch(__prim_index, prim_addr);
-
-                if (motion_triangle_intersect(kg,
-                                              isect,
-                                              P,
-                                              dir,
-                                              isect->t,
-                                              ray->time,
-                                              visibility,
-                                              prim_object,
-                                              prim,
-                                              prim_addr)) {
+                if (motion_triangle_intersect(
+                        kg, isect, P, dir, isect->t, ray->time, visibility, object, prim_addr)) {
                  /* shadow ray early termination */
                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
                    return true;
@@ -193,14 +173,13 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
                  }
                }

-                const int prim_object = (object == OBJECT_NONE) ?
-                                            kernel_tex_fetch(__prim_object, prim_addr) :
-                                            object;
-                const int prim = kernel_tex_fetch(__prim_index, prim_addr);
-
+                const int curve_object = (object == OBJECT_NONE) ?
+                                             kernel_tex_fetch(__prim_object, prim_addr) :
+                                             object;
+                const int curve_prim = kernel_tex_fetch(__prim_index, prim_addr);
                const int curve_type = kernel_tex_fetch(__prim_type, prim_addr);
                const bool hit = curve_intersect(
-                    kg, isect, P, dir, isect->t, prim_object, prim, ray->time, curve_type);
+                    kg, isect, P, dir, isect->t, curve_object, curve_prim, ray->time, curve_type);
                if (hit) {
                  /* shadow ray early termination */
                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
@@ -221,14 +200,13 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
                  }
                }

-                const int prim_object = (object == OBJECT_NONE) ?
-                                            kernel_tex_fetch(__prim_object, prim_addr) :
-                                            object;
-                const int prim = kernel_tex_fetch(__prim_index, prim_addr);
-
+                const int point_object = (object == OBJECT_NONE) ?
+                                             kernel_tex_fetch(__prim_object, prim_addr) :
+                                             object;
+                const int point_prim = kernel_tex_fetch(__prim_index, prim_addr);
                const int point_type = kernel_tex_fetch(__prim_type, prim_addr);
                const bool hit = point_intersect(
-                    kg, isect, P, dir, isect->t, prim_object, prim, ray->time, point_type);
+                    kg, isect, P, dir, isect->t, point_object, point_prim, ray->time, point_type);
                if (hit) {
                  /* shadow ray early termination */
                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
--- a/intern/cycles/kernel/bvh/volume.h
+++ b/intern/cycles/kernel/bvh/volume.h
@@ -140,17 +140,14 @@ ccl_device_inline
              for (; prim_addr < prim_addr2; prim_addr++) {
                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
                /* only primitives from volume object */
-                const int prim_object = (object == OBJECT_NONE) ?
-                                            kernel_tex_fetch(__prim_object, prim_addr) :
-                                            object;
-                const int prim = kernel_tex_fetch(__prim_index, prim_addr);
-
-                int object_flag = kernel_tex_fetch(__object_flag, prim_object);
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
                  continue;
                }
-                triangle_intersect(
-                    kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr);
+                triangle_intersect(kg, isect, P, dir, isect->t, visibility, object, prim_addr);
              }
              break;
            }
@@ -160,24 +157,15 @@ ccl_device_inline
              for (; prim_addr < prim_addr2; prim_addr++) {
                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
                /* only primitives from volume object */
-                const int prim_object = (object == OBJECT_NONE) ?
-                                            kernel_tex_fetch(__prim_object, prim_addr) :
-                                            object;
-                const int prim = kernel_tex_fetch(__prim_index, prim_addr);
-                int object_flag = kernel_tex_fetch(__object_flag, prim_object);
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
                  continue;
                }
-                motion_triangle_intersect(kg,
-                                          isect,
-                                          P,
-                                          dir,
-                                          isect->t,
-                                          ray->time,
-                                          visibility,
-                                          prim_object,
-                                          prim,
-                                          prim_addr);
+                motion_triangle_intersect(
+                    kg, isect, P, dir, isect->t, ray->time, visibility, object, prim_addr);
              }
              break;
            }
--- a/intern/cycles/kernel/bvh/volume_all.h
+++ b/intern/cycles/kernel/bvh/volume_all.h
@@ -143,16 +143,15 @@ ccl_device_inline
              for (; prim_addr < prim_addr2; prim_addr++) {
                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
                /* only primitives from volume object */
-                const int prim_object = (object == OBJECT_NONE) ?
-                                            kernel_tex_fetch(__prim_object, prim_addr) :
-                                            object;
-                const int prim = kernel_tex_fetch(__prim_index, prim_addr);
-                int object_flag = kernel_tex_fetch(__object_flag, prim_object);
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
                  continue;
                }
                hit = triangle_intersect(
-                    kg, isect_array, P, dir, isect_t, visibility, prim_object, prim, prim_addr);
+                    kg, isect_array, P, dir, isect_t, visibility, object, prim_addr);
                if (hit) {
                  /* Move on to next entry in intersections array. */
                  isect_array++;
@@ -184,24 +183,15 @@ ccl_device_inline
              for (; prim_addr < prim_addr2; prim_addr++) {
                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
                /* only primitives from volume object */
-                const int prim_object = (object == OBJECT_NONE) ?
-                                            kernel_tex_fetch(__prim_object, prim_addr) :
-                                            object;
-                const int prim = kernel_tex_fetch(__prim_index, prim_addr);
-                int object_flag = kernel_tex_fetch(__object_flag, prim_object);
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
                  continue;
                }
-                hit = motion_triangle_intersect(kg,
-                                                isect_array,
-                                                P,
-                                                dir,
-                                                isect_t,
-                                                ray->time,
-                                                visibility,
-                                                prim_object,
-                                                prim,
-                                                prim_addr);
+                hit = motion_triangle_intersect(
+                    kg, isect_array, P, dir, isect_t, ray->time, visibility, object, prim_addr);
                if (hit) {
                  /* Move on to next entry in intersections array. */
                  isect_array++;
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -243,10 +243,6 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
  }
 }

-#ifdef __KERNEL_METAL__
-constant int __dummy_constant [[function_constant(0)]];
-#endif
-
 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
    ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
                             ccl_global const int *path_index_array,
@@ -257,16 +253,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)

  if (global_index < work_size) {
    const int state = (path_index_array) ? path_index_array[global_index] : global_index;
-
-#ifdef __KERNEL_METAL__
-    KernelGlobals kg = NULL;
-    /* Workaround Ambient Occlusion and Bevel nodes not working with Metal.
-     * Dummy offset should not affect result, but somehow fixes bug! */
-    kg += __dummy_constant;
-    ccl_gpu_kernel_call(integrator_shade_surface_raytrace(kg, state, render_buffer));
-#else
    ccl_gpu_kernel_call(integrator_shade_surface_raytrace(NULL, state, render_buffer));
-#endif
  }
 }

@@ -834,8 +821,8 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
  if (guiding_pass_flow != PASS_UNUSED) {
    kernel_assert(render_pass_motion != PASS_UNUSED);

-    ccl_global const float *motion_in = buffer + render_pass_motion;
-    ccl_global float *flow_out = guiding_pixel + guiding_pass_flow;
+    const float *motion_in = buffer + render_pass_motion;
+    float *flow_out = guiding_pixel + guiding_pass_flow;

    flow_out[0] = -motion_in[0] * pixel_scale;
    flow_out[1] = -motion_in[1] * pixel_scale;
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -98,12 +98,8 @@ using namespace metal::raytracing;
 #define FN14(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14;
 #define FN15(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14; p15;
 #define FN16(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14; p15; p16;
-#define FN17(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14; p15; p16; p17;
-#define FN18(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14; p15; p16; p17; p18;
-#define FN19(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14; p15; p16; p17; p18; p19;
-#define FN20(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14; p15; p16; p17; p18; p19; p20;
-#define GET_LAST_ARG(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, ...) p20
-#define PARAMS_MAKER(...) GET_LAST_ARG(__VA_ARGS__, FN20, FN19, FN18, FN17, FN16, FN15, FN14, FN13, FN12, FN11, FN10, FN9, FN8, FN7, FN6, FN5, FN4, FN3, FN2, FN1, FN0)
+#define GET_LAST_ARG(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, ...) p16
+#define PARAMS_MAKER(...) GET_LAST_ARG(__VA_ARGS__, FN16, FN15, FN14, FN13, FN12, FN11, FN10, FN9, FN8, FN7, FN6, FN5, FN4, FN3, FN2, FN1, FN0)

 /* Generate a struct containing the entry-point parameters and a "run"
 * method which can access them implicitly via this-> */
--- a/intern/cycles/kernel/film/passes.h
+++ b/intern/cycles/kernel/film/passes.h
@@ -92,14 +92,6 @@ ccl_device_forceinline void kernel_write_denoising_features_surface(
    else if (sc->type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID) {
      closure_albedo *= bsdf_principled_hair_albedo(sc);
    }
-    else if (sc->type == CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID) {
-      /* BSSRDF already accounts for weight, retro-reflection would double up. */
-      ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *)
-          sc;
-      if (bsdf->components == PRINCIPLED_DIFFUSE_RETRO_REFLECTION) {
-        continue;
-      }
-    }

    if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) {
      diffuse_albedo += closure_albedo;
--- a/intern/cycles/kernel/geom/motion_triangle.h
+++ b/intern/cycles/kernel/geom/motion_triangle.h
@@ -116,52 +116,6 @@ ccl_device_inline void motion_triangle_vertices(
  verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
 }

-ccl_device_inline void motion_triangle_vertices_and_normals(
-    KernelGlobals kg, int object, int prim, float time, float3 verts[3], float3 normals[3])
-{
-  /* get motion info */
-  int numsteps, numverts;
-  object_motion_info(kg, object, &numsteps, &numverts, NULL);
-
-  /* Figure out which steps we need to fetch and their interpolation factor. */
-  int maxstep = numsteps * 2;
-  int step = min((int)(time * maxstep), maxstep - 1);
-  float t = time * maxstep - step;
-
-  /* Find attribute. */
-  int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION);
-  kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
-  /* Fetch vertex coordinates. */
-  float3 next_verts[3];
-  uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-
-  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
-  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts);
-
-  /* Interpolate between steps. */
-  verts[0] = (1.0f - t) * verts[0] + t * next_verts[0];
-  verts[1] = (1.0f - t) * verts[1] + t * next_verts[1];
-  verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
-
-  /* Compute smooth normal. */
-
-  /* Find attribute. */
-  offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL);
-  kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
-  /* Fetch vertex coordinates. */
-  float3 next_normals[3];
-  motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
-  motion_triangle_normals_for_step(
-      kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals);
-
-  /* Interpolate between steps. */
-  normals[0] = (1.0f - t) * normals[0] + t * next_normals[0];
-  normals[1] = (1.0f - t) * normals[1] + t * next_normals[1];
-  normals[2] = (1.0f - t) * normals[2] + t * next_normals[2];
-}
-
 ccl_device_inline float3 motion_triangle_smooth_normal(
    KernelGlobals kg, float3 Ng, int object, int prim, float u, float v, float time)
 {
--- a/intern/cycles/kernel/geom/motion_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/motion_triangle_intersect.h
@@ -153,12 +153,14 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg,
                                                 float time,
                                                 uint visibility,
                                                 int object,
-                                                 int prim,
                                                 int prim_addr)
 {
+  /* Primitive index for vertex location lookup. */
+  int prim = kernel_tex_fetch(__prim_index, prim_addr);
+  int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) : object;
  /* Get vertex locations for intersection. */
  float3 verts[3];
-  motion_triangle_vertices(kg, object, prim, time, verts);
+  motion_triangle_vertices(kg, fobject, prim, time, verts);
  /* Ray-triangle intersection, unoptimized. */
  float t, u, v;
  if (ray_triangle_intersect(P, dir, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) {
@@ -173,7 +175,8 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg,
      isect->u = u;
      isect->v = v;
      isect->prim = prim;
-      isect->object = object;
+      isect->object = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) :
+                                                object;
      isect->type = PRIMITIVE_MOTION_TRIANGLE;
      return true;
    }
@@ -193,15 +196,25 @@ ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg,
                                                       float3 dir,
                                                       float time,
                                                       int object,
-                                                       int prim,
+                                                       int local_object,
                                                       int prim_addr,
                                                       float tmax,
                                                       ccl_private uint *lcg_state,
                                                       int max_hits)
 {
+  /* Only intersect with matching object, for instanced objects we
+   * already know we are only intersecting the right object. */
+  if (object == OBJECT_NONE) {
+    if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) {
+      return false;
+    }
+  }
+
+  /* Primitive index for vertex location lookup. */
+  int prim = kernel_tex_fetch(__prim_index, prim_addr);
  /* Get vertex locations for intersection. */
  float3 verts[3];
-  motion_triangle_vertices(kg, object, prim, time, verts);
+  motion_triangle_vertices(kg, local_object, prim, time, verts);
  /* Ray-triangle intersection, unoptimized. */
  float t, u, v;
  if (!ray_triangle_intersect(P, dir, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) {
@@ -253,7 +266,7 @@ ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg,
  isect->u = u;
  isect->v = v;
  isect->prim = prim;
-  isect->object = object;
+  isect->object = local_object;
  isect->type = PRIMITIVE_MOTION_TRIANGLE;

  /* Record geometric normal. */
--- a/intern/cycles/kernel/geom/triangle_intersect.h
+++ b/intern/cycles/kernel/geom/triangle_intersect.h
@@ -33,9 +33,9 @@ ccl_device_inline bool triangle_intersect(KernelGlobals kg,
                                          float tmax,
                                          uint visibility,
                                          int object,
-                                          int prim,
                                          int prim_addr)
 {
+  const int prim = kernel_tex_fetch(__prim_index, prim_addr);
  const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w;
  const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
               tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
@@ -49,7 +49,8 @@ ccl_device_inline bool triangle_intersect(KernelGlobals kg,
    if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
 #endif
    {
-      isect->object = object;
+      isect->object = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) :
+                                                object;
      isect->prim = prim;
      isect->type = PRIMITIVE_TRIANGLE;
      isect->u = u;
@@ -73,12 +74,21 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg,
                                                float3 P,
                                                float3 dir,
                                                int object,
-                                                int prim,
+                                                int local_object,
                                                int prim_addr,
                                                float tmax,
                                                ccl_private uint *lcg_state,
                                                int max_hits)
 {
+  /* Only intersect with matching object, for instanced objects we
+   * already know we are only intersecting the right object. */
+  if (object == OBJECT_NONE) {
+    if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) {
+      return false;
+    }
+  }
+
+  const int prim = kernel_tex_fetch(__prim_index, prim_addr);
  const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w;
  const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
               tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
@@ -129,7 +139,7 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg,
  /* Record intersection. */
  ccl_private Intersection *isect = &local_isect->hits[hit];
  isect->prim = prim;
-  isect->object = object;
+  isect->object = local_object;
  isect->type = PRIMITIVE_TRIANGLE;
  isect->u = u;
  isect->v = v;
--- a/intern/cycles/kernel/light/sample.h
+++ b/intern/cycles/kernel/light/sample.h
@@ -141,23 +141,14 @@ ccl_device_inline float3 shadow_ray_smooth_surface_offset(
    KernelGlobals kg, ccl_private const ShaderData *ccl_restrict sd, float3 Ng)
 {
  float3 V[3], N[3];
-
-  if (sd->type == PRIMITIVE_MOTION_TRIANGLE) {
-    motion_triangle_vertices_and_normals(kg, sd->object, sd->prim, sd->time, V, N);
-  }
-  else {
-    kernel_assert(sd->type == PRIMITIVE_TRIANGLE);
-    triangle_vertices_and_normals(kg, sd->prim, V, N);
-  }
+  triangle_vertices_and_normals(kg, sd->prim, V, N);

  const float u = sd->u, v = sd->v;
  const float w = 1 - u - v;
  float3 P = V[0] * u + V[1] * v + V[2] * w; /* Local space */
  float3 n = N[0] * u + N[1] * v + N[2] * w; /* We get away without normalization */

-  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-    object_normal_transform(kg, sd, &n); /* Normal x scale, world space */
-  }
+  object_normal_transform(kg, sd, &n); /* Normal x scale, world space */

  /* Parabolic approximation */
  float a = dot(N[2] - N[0], V[0] - V[2]);
--- a/intern/cycles/kernel/osl/shaders/node_normal_map.osl
+++ b/intern/cycles/kernel/osl/shaders/node_normal_map.osl
@@ -85,4 +85,6 @@ shader node_normal_map(normal NormalIn = N,

  if (Strength != 1.0)
    Normal = normalize(NormalIn + (Normal - NormalIn) * max(Strength, 0.0));
+
+  Normal = ensure_valid_reflection(Ng, I, Normal);
 }
--- a/intern/cycles/kernel/svm/magic.h
+++ b/intern/cycles/kernel/svm/magic.h
@@ -25,7 +25,7 @@ ccl_device_noinline_cpu float3 svm_magic(float3 p, float scale, int n, float dis
  /*
   * Prevent NaNs due to input p
   * Sin and Cosine are periodic about [0 2*PI) so the following
-   * will yield a more accurate result. As it stops the input values
+   * will yeild a more accurate result. As it stops the input values
   * going out of range for floats which caused a NaN. The
   * calculation of (px + py + pz)*5 can cause an Inf when one or more
   * values are very large the cos or sin of this results in a NaN
--- a/intern/cycles/kernel/svm/types.h
+++ b/intern/cycles/kernel/svm/types.h
@@ -124,7 +124,7 @@ typedef enum ShaderNodeType {
  NODE_AOV_VALUE,
  NODE_FLOAT_CURVE,
  /* NOTE: for best OpenCL performance, item definition in the enum must
-   * match the switch case order in `svm.h`. */
+   * match the switch case order in svm.h. */
 } ShaderNodeType;

 typedef enum NodeAttributeOutputType {
--- a/intern/cycles/scene/alembic.cpp
+++ b/intern/cycles/scene/alembic.cpp
@@ -742,7 +742,6 @@ NODE_DEFINE(AlembicProcedural)
  NodeType *type = NodeType::add("alembic", create);

  SOCKET_STRING(filepath, "Filename", ustring());
-  SOCKET_STRING_ARRAY(layers, "Layers", array<ustring>());
  SOCKET_FLOAT(frame, "Frame", 1.0f);
  SOCKET_FLOAT(start_frame, "Start Frame", 1.0f);
  SOCKET_FLOAT(end_frame, "End Frame", 1.0f);
@@ -840,26 +839,14 @@ void AlembicProcedural::generate(Scene *scene, Progress &progress)
    return;
  }

-  if (!archive.valid() || filepath_is_modified() || layers_is_modified()) {
+  if (!archive.valid()) {
    Alembic::AbcCoreFactory::IFactory factory;
    factory.setPolicy(Alembic::Abc::ErrorHandler::kQuietNoopPolicy);
-
-    std::vector<std::string> filenames;
-    filenames.push_back(filepath.c_str());
-
-    for (const ustring &layer : layers) {
-      filenames.push_back(layer.c_str());
-    }
-
-    /* We need to reverse the order as overriding archives should come first. */
-    std::reverse(filenames.begin(), filenames.end());
-
-    archive = factory.getArchive(filenames);
+    archive = factory.getArchive(filepath.c_str());

    if (!archive.valid()) {
      /* avoid potential infinite update loops in viewport synchronization */
      filepath.clear();
-      layers.clear();
      clear_modified();
      return;
    }
--- a/intern/cycles/scene/alembic.h
+++ b/intern/cycles/scene/alembic.h
@@ -479,10 +479,6 @@ class AlembicProcedural : public Procedural {
  /* The file path to the Alembic archive */
  NODE_SOCKET_API(ustring, filepath)

-  /* Layers for the Alembic archive. Layers are in the order in which they override data, with the
-   * latter elements overriding the former ones. */
-  NODE_SOCKET_API_ARRAY(array<ustring>, layers)
-
  /* The current frame to render. */
  NODE_SOCKET_API(float, frame)

--- a/intern/cycles/session/display_driver.h
+++ b/intern/cycles/session/display_driver.h
@@ -54,8 +54,6 @@ class DisplayDriver {
    }
  };

-  virtual void next_tile_begin() = 0;
-
  /* Update the render from the rendering thread.
   *
   * Cycles periodically updates the render to be displayed. For multithreaded updates with
@@ -82,9 +80,6 @@ class DisplayDriver {
  virtual bool update_begin(const Params &params, int width, int height) = 0;
  virtual void update_end() = 0;

-  /* Optionally flush outstanding display commands before ending the render loop. */
-  virtual void flush(){};
-
  virtual half4 *map_texture_buffer() = 0;
  virtual void unmap_texture_buffer() = 0;

@@ -102,17 +97,6 @@ class DisplayDriver {

    /* Clear the entire buffer before doing partial write to it. */
    bool need_clear = false;
-
-    /* Enforce re-creation of the graphics interop object.
-     *
-     * When this field is true then the graphics interop will be re-created no matter what the
-     * rest of the configuration is.
-     * When this field is false the graphics interop will be re-created if the PBO or buffer size
-     * did change.
-     *
-     * This allows to ensure graphics interop is re-created when there is a possibility that an
-     * underlying PBO was re-allocated but did not change its ID. */
-    bool need_recreate = false;
  };

  virtual GraphicsInterop graphics_interop_get()
--- a/intern/cycles/session/session.cpp
+++ b/intern/cycles/session/session.cpp
@@ -192,8 +192,6 @@ void Session::run_main_render_loop()
      break;
    }
  }
-
-  path_trace_->flush_display();
 }

 void Session::run()
@@ -305,7 +303,7 @@ RenderWork Session::run_update_for_next_iteration()

      tile_params.update_offset_stride();

-      path_trace_->reset(buffer_params_, tile_params, did_reset);
+      path_trace_->reset(buffer_params_, tile_params);
    }

    const int resolution = render_work.resolution_divider;
@@ -386,8 +384,7 @@ int2 Session::get_effective_tile_size() const
  const int tile_size = tile_manager_.compute_render_tile_size(params.tile_size);
  const int64_t actual_tile_area = static_cast<int64_t>(tile_size) * tile_size;

-  if (actual_tile_area >= image_area && image_width <= TileManager::MAX_TILE_SIZE &&
-      image_height <= TileManager::MAX_TILE_SIZE) {
+  if (actual_tile_area >= image_area) {
    return make_int2(image_width, image_height);
  }

@@ -426,11 +423,6 @@ void Session::do_delayed_reset()
  buffer_params_.update_passes(scene->passes);
  tile_manager_.update(buffer_params_, scene);

-  /* Update temp directory on reset.
-   * This potentially allows to finish the existing rendering with a previously configure temporary
-   * directory in the host software and switch to a new temp directory when new render starts. */
-  tile_manager_.set_temp_dir(params.temp_dir);
-
  /* Progress. */
  progress.reset_sample();
  progress.set_total_pixel_samples(static_cast<uint64_t>(buffer_params_.width) *
--- a/intern/cycles/session/session.h
+++ b/intern/cycles/session/session.h
@@ -69,9 +69,6 @@ class SessionParams {

  ShadingSystem shadingsystem;

-  /* Session-specific temporary directory to store in-progress EXR files in. */
-  string temp_dir;
-
  SessionParams()
  {
    headless = false;
--- a/intern/cycles/session/tile.cpp
+++ b/intern/cycles/session/tile.cpp
@@ -23,7 +23,6 @@
 #include "scene/film.h"
 #include "scene/integrator.h"
 #include "scene/scene.h"
-#include "session/session.h"
 #include "util/algorithm.h"
 #include "util/foreach.h"
 #include "util/log.h"
@@ -342,10 +341,8 @@ int TileManager::compute_render_tile_size(const int suggested_tile_size) const
  /* Must be a multiple of IMAGE_TILE_SIZE so that we can write render tiles into the image file
   * aligned on image tile boundaries. We can't set IMAGE_TILE_SIZE equal to the render tile size
   * because too big tile size leads to integer overflow inside OpenEXR. */
-  const int computed_tile_size = (suggested_tile_size <= IMAGE_TILE_SIZE) ?
-                                     suggested_tile_size :
-                                     align_up(suggested_tile_size, IMAGE_TILE_SIZE);
-  return min(computed_tile_size, MAX_TILE_SIZE);
+  return (suggested_tile_size <= IMAGE_TILE_SIZE) ? suggested_tile_size :
+                                                    align_up(suggested_tile_size, IMAGE_TILE_SIZE);
 }

 void TileManager::reset_scheduling(const BufferParams &params, int2 tile_size)
@@ -395,11 +392,6 @@ void TileManager::update(const BufferParams &params, const Scene *scene)
  }
 }

-void TileManager::set_temp_dir(const string &temp_dir)
-{
-  temp_dir_ = temp_dir;
-}
-
 bool TileManager::done()
 {
  return tile_state_.next_tile_index == tile_state_.num_tiles;
@@ -458,8 +450,7 @@ const int2 TileManager::get_size() const

 bool TileManager::open_tile_output()
 {
-  write_state_.filename = path_join(temp_dir_,
-                                    "cycles-tile-buffer-" + tile_file_unique_part_ + "-" +
+  write_state_.filename = path_temp_get("cycles-tile-buffer-" + tile_file_unique_part_ + "-" +
                                        to_string(write_state_.tile_file_index) + ".exr");

  write_state_.tile_out = ImageOutput::create(write_state_.filename);
--- a/intern/cycles/session/tile.h
+++ b/intern/cycles/session/tile.h
@@ -71,8 +71,6 @@ class TileManager {
   * Will store all parameters needed for buffers access outside of the scene graph. */
  void update(const BufferParams &params, const Scene *scene);

-  void set_temp_dir(const string &temp_dir);
-
  inline int get_num_tiles() const
  {
    return tile_state_.num_tiles;
@@ -124,12 +122,6 @@ class TileManager {
  /* Tile size in the image file. */
  static const int IMAGE_TILE_SIZE = 128;

-  /* Maximum supported tile size.
-   * Needs to be safe from allocation on a GPU point of view: the display driver needs to be able
-   * to allocate texture with the side size of this value.
-   * Use conservative value which is safe for most of OpenGL drivers and GPUs. */
-  static const int MAX_TILE_SIZE = 8192;
-
 protected:
  /* Get tile configuration for its index.
   * The tile index must be within [0, state_.tile_state_). */
@@ -138,8 +130,6 @@ class TileManager {
  bool open_tile_output();
  bool close_tile_output();

-  string temp_dir_;
-
  /* Part of an on-disk tile file name which avoids conflicts between several Cycles instances or
   * several sessions. */
  string tile_file_unique_part_;
--- a/intern/cycles/test/CMakeLists.txt
+++ b/intern/cycles/test/CMakeLists.txt
@@ -38,6 +38,8 @@ set(ALL_CYCLES_LIBRARIES
 )
 include_directories(${INC})

+cycles_link_directories()
+
 set(SRC
  integrator_adaptive_sampling_test.cpp
  integrator_render_scheduler_test.cpp
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -53,6 +53,16 @@ if(WITH_CYCLES_STANDALONE)
  endif()
 endif()

+if(CYCLES_STANDALONE_REPOSITORY)
+  list(APPEND INC_SYS
+    ../../third_party/numaapi/include
+  )
+else()
+  list(APPEND INC_SYS
+    ../../numaapi/include
+  )
+endif()
+
 set(SRC_HEADERS
  algorithm.h
  aligned_malloc.h
--- a/intern/cycles/util/array.h
+++ b/intern/cycles/util/array.h
@@ -64,7 +64,7 @@ template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES> class arra
    else {
      data_ = mem_allocate(from.datasize_);
      if (from.datasize_ > 0) {
-        mem_copy(data_, from.data_, from.datasize_);
+        memcpy(data_, from.data_, from.datasize_ * sizeof(T));
      }
      datasize_ = from.datasize_;
      capacity_ = datasize_;
@@ -76,7 +76,7 @@ template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES> class arra
    if (this != &from) {
      resize(from.size());
      if (datasize_ > 0) {
-        mem_copy(data_, from.data_, datasize_);
+        memcpy((void *)data_, from.data_, datasize_ * sizeof(T));
      }
    }

@@ -88,7 +88,7 @@ template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES> class arra
    resize(from.size());

    if (from.size() > 0 && datasize_ > 0) {
-      mem_copy(data_, from.data(), datasize_);
+      memcpy(data_, &from[0], datasize_ * sizeof(T));
    }

    return *this;
@@ -161,7 +161,8 @@ template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES> class arra
          return NULL;
        }
        else if (data_ != NULL) {
-          mem_copy(newdata, data_, ((datasize_ < newsize) ? datasize_ : newsize));
+          memcpy(
+              (void *)newdata, data_, ((datasize_ < newsize) ? datasize_ : newsize) * sizeof(T));
          mem_free(data_, capacity_);
        }
        data_ = newdata;
@@ -245,7 +246,7 @@ template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES> class arra
    if (newcapacity > capacity_) {
      T *newdata = mem_allocate(newcapacity);
      if (data_ != NULL) {
-        mem_copy(newdata, data_, ((datasize_ < newcapacity) ? datasize_ : newcapacity));
+        memcpy(newdata, data_, ((datasize_ < newcapacity) ? datasize_ : newcapacity) * sizeof(T));
        mem_free(data_, capacity_);
      }
      data_ = newdata;
@@ -279,7 +280,7 @@ template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES> class arra
    if (from.size()) {
      size_t old_size = size();
      resize(old_size + from.size());
-      mem_copy(data_ + old_size, from.data(), from.size());
+      memcpy(data_ + old_size, from.data(), sizeof(T) * from.size());
    }
  }

@@ -307,11 +308,6 @@ template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES> class arra
    }
  }

-  inline void mem_copy(T *mem_to, const T *mem_from, const size_t N)
-  {
-    memcpy((void *)mem_to, mem_from, sizeof(T) * N);
-  }
-
  T *data_;
  size_t datasize_;
  size_t capacity_;
--- a/intern/cycles/util/path.cpp
+++ b/intern/cycles/util/path.cpp
@@ -66,6 +66,7 @@ typedef struct stat path_stat_t;

 static string cached_path = "";
 static string cached_user_path = "";
+static string cached_temp_path = "";
 static string cached_xdg_cache_path = "";

 namespace {
@@ -335,10 +336,11 @@ static string path_xdg_cache_get()
 }
 #endif

-void path_init(const string &path, const string &user_path)
+void path_init(const string &path, const string &user_path, const string &temp_path)
 {
  cached_path = path;
  cached_user_path = user_path;
+  cached_temp_path = temp_path;

 #ifdef _MSC_VER
  // workaround for https://svn.boost.org/trac/boost/ticket/6320
@@ -382,6 +384,15 @@ string path_cache_get(const string &sub)
 #endif
 }

+string path_temp_get(const string &sub)
+{
+  if (cached_temp_path == "") {
+    cached_temp_path = Filesystem::temp_directory_path();
+  }
+
+  return path_join(cached_temp_path, sub);
+}
+
 #if defined(__linux__) || defined(__APPLE__)
 string path_xdg_home_get(const string &sub = "");
 #endif
--- a/intern/cycles/util/path.h
+++ b/intern/cycles/util/path.h
@@ -32,9 +32,10 @@
 CCL_NAMESPACE_BEGIN

 /* program paths */
-void path_init(const string &path = "", const string &user_path = "");
+void path_init(const string &path = "", const string &user_path = "", const string &tmp_path = "");
 string path_get(const string &sub = "");
 string path_user_get(const string &sub = "");
+string path_temp_get(const string &sub = "");
 string path_cache_get(const string &sub = "");

 /* path string manipulation */
--- a/intern/cycles/util/system.cpp
+++ b/intern/cycles/util/system.cpp
@@ -20,8 +20,9 @@
 #include "util/string.h"
 #include "util/types.h"

-#include <OpenImageIO/sysutil.h>
+#include <numaapi.h>

+#include <OpenImageIO/sysutil.h>
 OIIO_NAMESPACE_USING

 #ifdef _WIN32
@@ -40,6 +41,83 @@ OIIO_NAMESPACE_USING

 CCL_NAMESPACE_BEGIN

+bool system_cpu_ensure_initialized()
+{
+  static bool is_initialized = false;
+  static bool result = false;
+  if (is_initialized) {
+    return result;
+  }
+  is_initialized = true;
+  const NUMAAPI_Result numa_result = numaAPI_Initialize();
+  result = (numa_result == NUMAAPI_SUCCESS);
+  return result;
+}
+
+/* Fallback solution, which doesn't use NUMA/CPU groups. */
+static int system_cpu_thread_count_fallback()
+{
+#ifdef _WIN32
+  SYSTEM_INFO info;
+  GetSystemInfo(&info);
+  return info.dwNumberOfProcessors;
+#elif defined(__APPLE__)
+  int count;
+  size_t len = sizeof(count);
+  int mib[2] = {CTL_HW, HW_NCPU};
+  sysctl(mib, 2, &count, &len, NULL, 0);
+  return count;
+#else
+  return sysconf(_SC_NPROCESSORS_ONLN);
+#endif
+}
+
+int system_cpu_thread_count()
+{
+  const int num_nodes = system_cpu_num_numa_nodes();
+  int num_threads = 0;
+  for (int node = 0; node < num_nodes; ++node) {
+    if (!system_cpu_is_numa_node_available(node)) {
+      continue;
+    }
+    num_threads += system_cpu_num_numa_node_processors(node);
+  }
+  return num_threads;
+}
+
+int system_cpu_num_numa_nodes()
+{
+  if (!system_cpu_ensure_initialized()) {
+    /* Fallback to a single node with all the threads. */
+    return 1;
+  }
+  return numaAPI_GetNumNodes();
+}
+
+bool system_cpu_is_numa_node_available(int node)
+{
+  if (!system_cpu_ensure_initialized()) {
+    return true;
+  }
+  return numaAPI_IsNodeAvailable(node);
+}
+
+int system_cpu_num_numa_node_processors(int node)
+{
+  if (!system_cpu_ensure_initialized()) {
+    return system_cpu_thread_count_fallback();
+  }
+  return numaAPI_GetNumNodeProcessors(node);
+}
+
+bool system_cpu_run_thread_on_node(int node)
+{
+  if (!system_cpu_ensure_initialized()) {
+    return true;
+  }
+  return numaAPI_RunThreadOnNode(node);
+}
+
 int system_console_width()
 {
  int columns = 0;
@@ -59,6 +137,14 @@ int system_console_width()
  return (columns > 0) ? columns : 80;
 }

+int system_cpu_num_active_group_processors()
+{
+  if (!system_cpu_ensure_initialized()) {
+    return system_cpu_thread_count_fallback();
+  }
+  return numaAPI_GetNumCurrentNodesProcessors();
+}
+
 /* Equivalent of Windows __cpuid for x86 processors on other platforms. */
 #if (!defined(_WIN32) || defined(FREE_WINDOWS)) && (defined(__x86_64__) || defined(__i386__))
 static void __cpuid(int data[4], int selector)
--- a/intern/cycles/util/system.h
+++ b/intern/cycles/util/system.h
@@ -22,9 +22,36 @@

 CCL_NAMESPACE_BEGIN

+/* Make sure CPU groups / NUMA API is initialized. */
+bool system_cpu_ensure_initialized();
+
+/* Get total number of threads in all NUMA nodes / CPU groups. */
+int system_cpu_thread_count();
+
 /* Get width in characters of the current console output. */
 int system_console_width();

+/* Get number of available nodes.
+ *
+ * This is in fact an index of last node plus one and it's not guaranteed
+ * that all nodes up to this one are available. */
+int system_cpu_num_numa_nodes();
+
+/* Returns truth if the given node is available for compute. */
+bool system_cpu_is_numa_node_available(int node);
+
+/* Get number of available processors on a given node. */
+int system_cpu_num_numa_node_processors(int node);
+
+/* Runs the current thread and its children on a specific node.
+ *
+ * Returns truth if affinity has successfully changed. */
+bool system_cpu_run_thread_on_node(int node);
+
+/* Number of processors within the current CPU group (or within active thread
+ * thread affinity). */
+int system_cpu_num_active_group_processors();
+
 string system_cpu_brand_string();
 int system_cpu_bits();
 bool system_cpu_support_sse2();
--- a/intern/cycles/util/task.cpp
+++ b/intern/cycles/util/task.cpp
@@ -89,7 +89,7 @@ void TaskScheduler::init(int num_threads)
    active_num_threads = num_threads;
  }
  else {
-    active_num_threads = tbb::this_task_arena::max_concurrency();
+    active_num_threads = system_cpu_thread_count();
  }
 }

@@ -109,10 +109,9 @@ void TaskScheduler::free_memory()
  assert(users == 0);
 }

-int TaskScheduler::max_concurrency()
+int TaskScheduler::num_threads()
 {
-  thread_scoped_lock lock(mutex);
-  return (users > 0) ? active_num_threads : tbb::this_task_arena::max_concurrency();
+  return active_num_threads;
 }

 /* Dedicated Task Pool */
--- a/intern/cycles/util/task.h
+++ b/intern/cycles/util/task.h
@@ -86,9 +86,10 @@ class TaskScheduler {
  static void exit();
  static void free_memory();

-  /* Maximum number of threads that will work on task. Use as little as
-   * possible and leave scheduling and splitting up tasks to the scheduler. */
-  static int max_concurrency();
+  /* Approximate number of threads that will work on task, which may be lower
+   * or higher than the actual number of threads. Use as little as possible and
+   * leave splitting up tasks to the scheduler. */
+  static int num_threads();

 protected:
  static thread_mutex mutex;
--- a/intern/cycles/util/thread.cpp
+++ b/intern/cycles/util/thread.cpp
@@ -21,7 +21,7 @@

 CCL_NAMESPACE_BEGIN

-thread::thread(function<void()> run_cb) : run_cb_(run_cb), joined_(false)
+thread::thread(function<void()> run_cb, int node) : run_cb_(run_cb), joined_(false), node_(node)
 {
 #ifdef __APPLE__
  /* Set the stack size to 2MB to match Linux. The default 512KB on macOS is
@@ -46,6 +46,9 @@ thread::~thread()
 void *thread::run(void *arg)
 {
  thread *self = (thread *)(arg);
+  if (self->node_ != -1) {
+    system_cpu_run_thread_on_node(self->node_);
+  }
  self->run_cb_();
  return NULL;
 }
--- a/intern/cycles/util/thread.h
+++ b/intern/cycles/util/thread.h
@@ -46,7 +46,9 @@ typedef std::condition_variable thread_condition_variable;

 class thread {
 public:
-  thread(function<void()> run_cb);
+  /* NOTE: Node index of -1 means that affinity will be inherited from the
+   * parent thread and no override on top of that will happen. */
+  thread(function<void()> run_cb, int node = -1);
  ~thread();

  static void *run(void *arg);
@@ -60,6 +62,7 @@ class thread {
  std::thread std_thread;
 #endif
  bool joined_;
+  int node_;
 };

 using thread_spin_lock = tbb::spin_mutex;
--- a/intern/ghost/GHOST_Types.h
+++ b/intern/ghost/GHOST_Types.h
@@ -496,6 +496,8 @@ typedef struct {
  int target_start;
  /** Represents the position of the end of the selection */
  int target_end;
+  /** custom temporal data */
+  GHOST_TUserDataPtr tmp;
 } GHOST_TEventImeData;

 typedef struct {
--- a/intern/ghost/intern/GHOST_ImeWin32.cpp
+++ b/intern/ghost/intern/GHOST_ImeWin32.cpp
@@ -106,7 +106,7 @@ bool GHOST_ImeWin32::IsImeKeyEvent(char ascii)
    if (IsLanguage(IMELANG_JAPANESE) && (ascii >= ' ' && ascii <= '~')) {
      return true;
    }
-    else if (IsLanguage(IMELANG_CHINESE) && ascii && strchr("!\"$'(),.:;<>?[\\]^_`/", ascii)) {
+    else if (IsLanguage(IMELANG_CHINESE) && ascii && strchr("!\"$'(),.:;<>?[\\]^_`", ascii)) {
      return true;
    }
  }
--- a/intern/ghost/intern/GHOST_Wintab.cpp
+++ b/intern/ghost/intern/GHOST_Wintab.cpp
@@ -130,7 +130,8 @@ GHOST_Wintab *GHOST_Wintab::loadWintab(HWND hwnd)
    }
  }

-  return new GHOST_Wintab(std::move(handle),
+  return new GHOST_Wintab(hwnd,
+                          std::move(handle),
                          info,
                          get,
                          set,
@@ -173,7 +174,8 @@ void GHOST_Wintab::extractCoordinates(LOGCONTEXT &lc, Coord &tablet, Coord &syst
  system.y.ext = -lc.lcSysExtY;
 }

-GHOST_Wintab::GHOST_Wintab(unique_hmodule handle,
+GHOST_Wintab::GHOST_Wintab(HWND hwnd,
+                           unique_hmodule handle,
                           GHOST_WIN32_WTInfo info,
                           GHOST_WIN32_WTGet get,
                           GHOST_WIN32_WTSet set,
@@ -296,12 +298,14 @@ GHOST_TabletData GHOST_Wintab::getLastTabletData()
 void GHOST_Wintab::getInput(std::vector<GHOST_WintabInfoWin32> &outWintabInfo)
 {
  const int numPackets = m_fpPacketsGet(m_context.get(), m_pkts.size(), m_pkts.data());
-  outWintabInfo.reserve(numPackets);
+  outWintabInfo.resize(numPackets);
+  size_t outExtent = 0;

  for (int i = 0; i < numPackets; i++) {
    PACKET pkt = m_pkts[i];
-    GHOST_WintabInfoWin32 out;
+    GHOST_WintabInfoWin32 &out = outWintabInfo[i + outExtent];

+    out.tabletData = GHOST_TABLET_DATA_NONE;
    /* % 3 for multiple devices ("DualTrack"). */
    switch (pkt.pkCursor % 3) {
      case 0:
@@ -324,7 +328,12 @@ void GHOST_Wintab::getInput(std::vector<GHOST_WintabInfoWin32> &outWintabInfo)
    }

    if ((m_maxAzimuth > 0) && (m_maxAltitude > 0)) {
-      /* From the wintab spec:
+      ORIENTATION ort = pkt.pkOrientation;
+      float vecLen;
+      float altRad, azmRad; /* In radians. */
+
+      /*
+       * From the wintab spec:
       * orAzimuth: Specifies the clockwise rotation of the cursor about the z axis through a
       * full circular range.
       * orAltitude: Specifies the angle with the x-y plane through a signed, semicircular range.
@@ -337,14 +346,12 @@ void GHOST_Wintab::getInput(std::vector<GHOST_WintabInfoWin32> &outWintabInfo)
       * value.
       */

-      ORIENTATION ort = pkt.pkOrientation;
-
      /* Convert raw fixed point data to radians. */
-      float altRad = (float)((fabs((float)ort.orAltitude) / (float)m_maxAltitude) * M_PI / 2.0);
-      float azmRad = (float)(((float)ort.orAzimuth / (float)m_maxAzimuth) * M_PI * 2.0);
+      altRad = (float)((fabs((float)ort.orAltitude) / (float)m_maxAltitude) * M_PI / 2.0);
+      azmRad = (float)(((float)ort.orAzimuth / (float)m_maxAzimuth) * M_PI * 2.0);

      /* Find length of the stylus' projected vector on the XY plane. */
-      float vecLen = cos(altRad);
+      vecLen = cos(altRad);

      /* From there calculate X and Y components based on azimuth. */
      out.tabletData.Xtilt = sin(azmRad) * vecLen;
@@ -355,8 +362,13 @@ void GHOST_Wintab::getInput(std::vector<GHOST_WintabInfoWin32> &outWintabInfo)

    /* Some Wintab libraries don't handle relative button input, so we track button presses
     * manually. */
+    out.button = GHOST_kButtonMaskNone;
+    out.type = GHOST_kEventCursorMove;
+
    DWORD buttonsChanged = m_buttons ^ pkt.pkButtons;
    WORD buttonIndex = 0;
+    GHOST_WintabInfoWin32 buttonRef = out;
+    int buttons = 0;

    while (buttonsChanged) {
      if (buttonsChanged & 1) {
@@ -364,14 +376,23 @@ void GHOST_Wintab::getInput(std::vector<GHOST_WintabInfoWin32> &outWintabInfo)
        GHOST_TButtonMask button = mapWintabToGhostButton(pkt.pkCursor, buttonIndex);

        if (button != GHOST_kButtonMaskNone) {
-          /* If this is not the first button found, push info for the prior Wintab button. */
-          if (out.button != GHOST_kButtonMaskNone) {
-            outWintabInfo.push_back(out);
+          /* Extend output if multiple buttons are pressed. We don't extend input until we confirm
+           * a Wintab buttons maps to a system button. */
+          if (buttons > 0) {
+            outWintabInfo.resize(outWintabInfo.size() + 1);
+            outExtent++;
+            GHOST_WintabInfoWin32 &out = outWintabInfo[i + outExtent];
+            out = buttonRef;
          }
+          buttons++;

          out.button = button;
-          out.type = buttonsChanged & pkt.pkButtons ? GHOST_kEventButtonDown :
-                                                      GHOST_kEventButtonUp;
+          if (buttonsChanged & pkt.pkButtons) {
+            out.type = GHOST_kEventButtonDown;
+          }
+          else {
+            out.type = GHOST_kEventButtonUp;
+          }
        }

        m_buttons ^= 1 << buttonIndex;
@@ -380,8 +401,6 @@ void GHOST_Wintab::getInput(std::vector<GHOST_WintabInfoWin32> &outWintabInfo)
      buttonsChanged >>= 1;
      buttonIndex++;
    }
-
-    outWintabInfo.push_back(out);
  }

  if (!outWintabInfo.empty()) {
--- a/intern/ghost/intern/GHOST_Wintab.h
+++ b/intern/ghost/intern/GHOST_Wintab.h
@@ -56,12 +56,11 @@ typedef std::unique_ptr<std::remove_pointer_t<HMODULE>, decltype(&::FreeLibrary)
 typedef std::unique_ptr<std::remove_pointer_t<HCTX>, GHOST_WIN32_WTClose> unique_hctx;

 struct GHOST_WintabInfoWin32 {
-  int32_t x = 0;
-  int32_t y = 0;
-  GHOST_TEventType type = GHOST_kEventCursorMove;
-  GHOST_TButtonMask button = GHOST_kButtonMaskNone;
-  uint64_t time = 0;
-  GHOST_TabletData tabletData = GHOST_TABLET_DATA_NONE;
+  int32_t x, y;
+  GHOST_TEventType type;
+  GHOST_TButtonMask button;
+  uint64_t time;
+  GHOST_TabletData tabletData;
 };

 class GHOST_Wintab {
@@ -214,7 +213,8 @@ class GHOST_Wintab {
  /** Most recently received tablet data, or none if pen is not in range. */
  GHOST_TabletData m_lastTabletData = GHOST_TABLET_DATA_NONE;

-  GHOST_Wintab(unique_hmodule handle,
+  GHOST_Wintab(HWND hwnd,
+               unique_hmodule handle,
               GHOST_WIN32_WTInfo info,
               GHOST_WIN32_WTGet get,
               GHOST_WIN32_WTSet set,
--- a/intern/guardedalloc/MEM_guardedalloc.h
+++ b/intern/guardedalloc/MEM_guardedalloc.h
@@ -147,12 +147,6 @@ extern void *(*MEM_mallocN_aligned)(size_t len,
                                    const char *str) /* ATTR_MALLOC */ ATTR_WARN_UNUSED_RESULT
    ATTR_ALLOC_SIZE(1) ATTR_NONNULL(3);

-/**
- * Print a list of the names and sizes of all allocated memory
- * blocks. as a python dict for easy investigation.
- */
-extern void (*MEM_printmemlist_pydict)(void);
-
 /**
 * Print a list of the names and sizes of all allocated memory blocks.
 */
--- a/intern/libmv/CMakeLists.txt
+++ b/intern/libmv/CMakeLists.txt
@@ -38,6 +38,8 @@ set(LIB
 )

 if(WITH_LIBMV)
+  setup_libdirs()
+
  if(WIN32)
    add_definitions(-D_USE_MATH_DEFINES)
  endif()
--- a/intern/libmv/bundle.sh
+++ b/intern/libmv/bundle.sh
@@ -118,6 +118,8 @@ set(LIB
 )

 if(WITH_LIBMV)
+  setup_libdirs()
+
  if(WIN32)
    add_definitions(-D_USE_MATH_DEFINES)
  endif()
--- a/intern/numaapi/AUTHORS
+++ b/intern/numaapi/AUTHORS
@@ -0,0 +1 @@
+Sergey Sharybin <sergey.vfx@gmail.com>
--- a/intern/numaapi/CMakeLists.txt
+++ b/intern/numaapi/CMakeLists.txt
@@ -0,0 +1,42 @@
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# ***** END GPL LICENSE BLOCK *****
+
+set(INC
+  include
+)
+
+set(INC_SYS
+
+)
+
+set(SRC
+  source/numaapi.c
+  source/numaapi_linux.c
+  source/numaapi_stub.c
+  source/numaapi_win32.c
+
+  include/numaapi.h
+  source/build_config.h
+)
+
+set(LIB
+)
+
+add_definitions(-DWITH_DYNLOAD)
+
+blender_add_lib(bf_intern_numaapi "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
--- a/intern/numaapi/LICENSE
+++ b/intern/numaapi/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2016 libnumaapi authors.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+IN THE SOFTWARE.
--- a/intern/numaapi/README
+++ b/intern/numaapi/README
@@ -0,0 +1,7 @@
+LibNumaAPI is aimed to provide one common cross-platform API for all
+possible platforms, so cross-platform applications might not worry
+about implementation details.
+
+LICENSE
+
+LibNumaAPI library is released under the MIT license.
--- a/intern/numaapi/README.blender
+++ b/intern/numaapi/README.blender
@@ -0,0 +1,5 @@
+Project: LibNumaAPI
+URL: https://github.com/Nazg-Gul/libNumaAPI
+License: MIT
+Upstream version: 1c1ae7bc78e
+Local modifications: None
--- a/intern/numaapi/include/numaapi.h
+++ b/intern/numaapi/include/numaapi.h
@@ -0,0 +1,122 @@
+// Copyright (c) 2016, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin <sergey.vfx@gmail.com>
+
+/** \file
+ * \ingroup intern_numaapi
+ */
+
+#ifndef __LIBNUMAAPI_H__
+#define __LIBNUMAAPI_H__
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NUMAAPI_VERSION_MAJOR 1
+#define NUMAAPI_VERSION_MINOR 0
+
+typedef enum NUMAAPI_Result {
+  NUMAAPI_SUCCESS       = 0,
+  // NUMA is not available on this platform.
+  NUMAAPI_NOT_AVAILABLE = 1,
+  // Generic error, no real details are available,
+  NUMAAPI_ERROR         = 2,
+  // Error installing atexit() handlers.
+  NUMAAPI_ERROR_ATEXIT  = 3,
+} NUMAAPI_Result;
+
+////////////////////////////////////////////////////////////////////////////////
+// Initialization.
+
+// Initialize NUMA API.
+//
+// This is first call which should be called before any other NUMA functions
+// can be used.
+NUMAAPI_Result numaAPI_Initialize(void);
+
+// Get string representation of NUMAPIResult.
+const char* numaAPI_ResultAsString(NUMAAPI_Result result);
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology query.
+
+// Get number of available nodes.
+//
+// This is in fact an index of last node plus one and it's not guaranteed
+// that all nodes up to this one are available.
+int numaAPI_GetNumNodes(void);
+
+// Returns truth if the given node is available for compute.
+bool numaAPI_IsNodeAvailable(int node);
+
+// Get number of available processors on a given node.
+int numaAPI_GetNumNodeProcessors(int node);
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology helpers.
+//
+// Those are a bit higher level queries, but is still rather platform-specific
+// and generally useful.
+
+// Get number of processors within the NUMA nodes on which current thread is
+// set affinity on.
+int numaAPI_GetNumCurrentNodesProcessors(void);
+
+////////////////////////////////////////////////////////////////////////////////
+// Affinities.
+
+// Runs the current process and its children on a specific node.
+//
+// Returns truth if affinity has successfully changed.
+//
+// NOTE: This function can not change active CPU group. Mainly designed to deal
+// with Threadripper 2 topology, to make it possible to gain maximum performance
+// for the main application thread.
+bool numaAPI_RunProcessOnNode(int node);
+
+// Runs the current thread and its children on a specific node.
+//
+// Returns truth if affinity has successfully changed.
+bool numaAPI_RunThreadOnNode(int node);
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory management.
+
+// Allocate memory on a given node,
+void* numaAPI_AllocateOnNode(size_t size, int node);
+
+// Allocate memory in the local memory, closest to the current node.
+void* numaAPI_AllocateLocal(size_t size);
+
+// Frees size bytes of memory starting at start.
+//
+// TODO(sergey): Consider making it regular free() semantic.
+void numaAPI_Free(void* start, size_t size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // __LIBNUMAAPI_H__
--- a/intern/numaapi/source/build_config.h
+++ b/intern/numaapi/source/build_config.h
@@ -0,0 +1,443 @@
+// Copyright (c) 2018, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin <sergey.vfx@gmail.com>
+
+/** \file
+ * \ingroup intern_numaapi
+ */
+
+#ifndef __BUILD_CONFIG_H__
+#define __BUILD_CONFIG_H__
+
+#include <limits.h>
+#include <stdint.h>
+
+// Initially is based on Chromium's build_config.h, with tweaks and extensions
+// needed for this project.
+//
+// NOTE: All commonly used symbols (which are checked on a "top" level, from
+// outside of any platform-specific ifdef block) are to be explicitly defined
+// to 0 when they are not "active". This is extra lines of code in this file,
+// but is not being edited that often. Such approach helps catching cases when
+// one attempted to access build configuration variable without including the
+// header by simply using -Wundef compiler attribute.
+//
+// NOTE: Not having things explicitly defined to 0 is harmless (in terms it
+// follows same rules as Google projects) and will simply cause compiler to
+// become more noisy, which is simple to correct.
+
+////////////////////////////////////////////////////////////////////////////////
+// A set of macros to use for platform detection.
+
+#if defined(__native_client__)
+// __native_client__ must be first, so that other OS_ defines are not set.
+#  define OS_NACL 1
+// OS_NACL comes in two sandboxing technology flavors, SFI or Non-SFI.
+// PNaCl toolchain defines __native_client_nonsfi__ macro in Non-SFI build
+// mode, while it does not in SFI build mode.
+#  if defined(__native_client_nonsfi__)
+#    define OS_NACL_NONSFI
+#  else
+#    define OS_NACL_SFI
+#  endif
+#elif defined(_AIX)
+#  define OS_AIX 1
+#elif defined(ANDROID)
+#  define OS_ANDROID 1
+#elif defined(__APPLE__)
+// Only include TargetConditions after testing ANDROID as some android builds
+// on mac don't have this header available and it's not needed unless the target
+// is really mac/ios.
+#  include <TargetConditionals.h>
+#  define OS_MACOSX 1
+#  if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
+#    define OS_IOS 1
+#  endif  // defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
+#elif defined(__HAIKU__)
+#  define OS_HAIKU 1
+#elif defined(__hpux)
+#  define OS_HPUX 1
+#elif defined(__linux__)
+#  define OS_LINUX 1
+// Include a system header to pull in features.h for glibc/uclibc macros.
+#  include <unistd.h>
+#  if defined(__GLIBC__) && !defined(__UCLIBC__)
+// We really are using glibc, not uClibc pretending to be glibc.
+#    define LIBC_GLIBC 1
+#  endif
+#elif defined(__sgi)
+#  define OS_IRIX 1
+#elif defined(_WIN32)
+#  define OS_WIN 1
+#elif defined(__Fuchsia__)
+#  define OS_FUCHSIA 1
+#elif defined(__FreeBSD__)
+#  define OS_FREEBSD 1
+#elif defined(__NetBSD__)
+#  define OS_NETBSD 1
+#elif defined(__OpenBSD__)
+#  define OS_OPENBSD 1
+#elif defined(__sun)
+#  define OS_SOLARIS 1
+#elif defined(__QNXNTO__)
+#  define OS_QNX 1
+#elif defined(__asmjs__) || defined(__wasm__)
+#  define OS_ASMJS 1
+#else
+#  error Please add support for your platform in build_config.h
+#endif
+
+#if !defined(OS_AIX)
+#  define OS_AIX 0
+#endif
+#if !defined(OS_ASMJS)
+#  define OS_ASMJS 0
+#endif
+#if !defined(OS_NACL)
+#  define OS_NACL 0
+#endif
+#if !defined(OS_NACL_NONSFI)
+#  define OS_NACL_NONSFI 0
+#endif
+#if !defined(OS_NACL_SFI)
+#  define OS_NACL_SFI 0
+#endif
+#if !defined(OS_ANDROID)
+#  define OS_ANDROID 0
+#endif
+#if !defined(OS_MACOSX)
+#  define OS_MACOSX 0
+#endif
+#if !defined(OS_IOS)
+#  define OS_IOS 0
+#endif
+#if !defined(OS_HAIKU)
+#  define OS_HAIKU 0
+#endif
+#if !defined(OS_HPUX)
+#  define OS_HPUX 0
+#endif
+#if !defined(OS_IRIX)
+#  define OS_IRIX 0
+#endif
+#if !defined(OS_LINUX)
+#  define OS_LINUX 0
+#endif
+#if !defined(LIBC_GLIBC)
+#  define LIBC_GLIBC 0
+#endif
+#if !defined(OS_WIN)
+#  define OS_WIN 0
+#endif
+#if !defined(OS_FUCHSIA)
+#  define OS_FUCHSIA 0
+#endif
+#if !defined(OS_FREEBSD)
+#  define OS_FREEBSD 0
+#endif
+#if !defined(OS_NETBSD)
+#  define OS_NETBSD 0
+#endif
+#if !defined(OS_OPENBSD)
+#  define OS_OPENBSD 0
+#endif
+#if !defined(OS_SOLARIS)
+#  define OS_SOLARIS 0
+#endif
+#if !defined(OS_QNX)
+#  define OS_QNX 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// *BSD OS family detection.
+//
+// For access to standard BSD features, use OS_BSD instead of a
+// more specific macro.
+#if OS_FREEBSD || OS_OPENBSD || OS_NETBSD
+#  define OS_BSD 1
+#else
+#  define OS_BSD 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// POSIX system detection.
+//
+// For access to standard POSIXish features use OS_POSIX instead of a
+// more specific macro.
+#if OS_AIX || OS_ANDROID || OS_ASMJS || OS_FREEBSD || OS_LINUX || OS_MACOSX || \
+    OS_NACL || OS_NETBSD || OS_OPENBSD || OS_QNX || OS_SOLARIS
+#  define OS_POSIX 1
+#else
+#  define OS_POSIX 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Compiler detection, including its capabilities.
+
+#if defined(__clang__)
+#  define COMPILER_CLANG 1
+#elif defined(__GNUC__)
+#  define COMPILER_GCC 1
+#  define COMPILER_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#elif defined(_MSC_VER)
+#  define COMPILER_MSVC 1
+#  define COMPILER_MSVC_VERSION (_MSC_VER)
+#elif defined(__MINGW32__)
+#  define COMPILER_MINGW32 1
+#elif defined(__MINGW64__)
+#  define COMPILER_MINGW64 1
+#else
+#  error Please add support for your compiler in build_config.h
+#endif
+
+#if !defined(COMPILER_CLANG)
+#  define COMPILER_CLANG 0
+#endif
+#if !defined(COMPILER_GCC)
+#  define COMPILER_GCC 0
+#endif
+#if !defined(COMPILER_MSVC)
+#  define COMPILER_MSVC 0
+#endif
+#if !defined(COMPILER_MINGW32)
+#  define COMPILER_MINGW32 0
+#endif
+#if !defined(COMPILER_MINGW64)
+#  define COMPILER_MINGW64 0
+#endif
+
+// Compiler is any of MinGW family.
+#if COMPILER_MINGW32 || COMPILER_MINGW64
+#  define COMPILER_MINGW 1
+#else
+#  define COMPILER_MINGW 0
+#endif
+
+// Check what is the latest C++ specification the compiler supports.
+//
+// NOTE: Use explicit definition here to avoid expansion-to-defined warning from
+// being generated. While this will most likely a false-positive warning in this
+// particular case, that warning might be helpful to catch errors elsewhere.
+
+// C++11 check.
+#if ((defined(__cplusplus) && (__cplusplus > 199711L)) ||                      \
+     (defined(_MSC_VER) && (_MSC_VER >= 1800)))
+#  define COMPILER_SUPPORTS_CXX11 1
+#else
+#  define COMPILER_SUPPORTS_CXX11 0
+#endif
+// C++14 check.
+#if (defined(__cplusplus) && (__cplusplus > 201311L))
+#  define COMPILER_SUPPORTS_CXX14 1
+#else
+#  define COMPILER_SUPPORTS_CXX14 0
+#endif
+// C++17 check.
+#if (defined(__cplusplus) && (__cplusplus > 201611L))
+#  define COMPILER_SUPPORTS_CXX17 1
+#else
+#  define COMPILER_SUPPORTS_CXX17 0
+#endif
+// C++20 check.
+#if (defined(__cplusplus) && (__cplusplus > 201911L))
+#  define COMPILER_SUPPORTS_CXX20 1
+#else
+#  define COMPILER_SUPPORTS_CXX20 0
+#endif
+
+// COMPILER_USE_ADDRESS_SANITIZER is defined when program is detected that
+// compilation happened wit haddress sanitizer enabled. This allows to give
+// tips to sanitizer, or maybe work around some known issues with third party
+// libraries.
+#if !defined(COMPILER_USE_ADDRESS_SANITIZER)
+#  if defined(__has_feature)
+#    define COMPILER_USE_ADDRESS_SANITIZER 1
+#  elif defined(__SANITIZE_ADDRESS__)
+#    define COMPILER_USE_ADDRESS_SANITIZER 1
+#  endif
+#endif
+
+#if !defined(COMPILER_USE_ADDRESS_SANITIZER)
+#  define COMPILER_USE_ADDRESS_SANITIZER 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Processor architecture detection.
+//
+// For more info on what's defined, see:
+//
+//   http://msdn.microsoft.com/en-us/library/b0084kay.aspx
+//   http://www.agner.org/optimize/calling_conventions.pdf
+//
+//   or with gcc, run: "echo | gcc -E -dM -"
+#if defined(_M_X64) || defined(__x86_64__)
+#  define ARCH_CPU_X86_FAMILY 1
+#  define ARCH_CPU_X86_64 1
+#  define ARCH_CPU_64_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(_M_IX86) || defined(__i386__)
+#  define ARCH_CPU_X86_FAMILY 1
+#  define ARCH_CPU_X86 1
+#  define ARCH_CPU_32_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__s390x__)
+#  define ARCH_CPU_S390_FAMILY 1
+#  define ARCH_CPU_S390X 1
+#  define ARCH_CPU_64_BITS 1
+#  define ARCH_CPU_BIG_ENDIAN 1
+#elif defined(__s390__)
+#  define ARCH_CPU_S390_FAMILY 1
+#  define ARCH_CPU_S390 1
+#  define ARCH_CPU_31_BITS 1
+#  define ARCH_CPU_BIG_ENDIAN 1
+#elif (defined(__PPC64__) || defined(__PPC__)) && defined(__BIG_ENDIAN__)
+#  define ARCH_CPU_PPC64_FAMILY 1
+#  define ARCH_CPU_PPC64 1
+#  define ARCH_CPU_64_BITS 1
+#  define ARCH_CPU_BIG_ENDIAN 1
+#elif defined(__PPC64__)
+#  define ARCH_CPU_PPC64_FAMILY 1
+#  define ARCH_CPU_PPC64 1
+#  define ARCH_CPU_64_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__ARMEL__)
+#  define ARCH_CPU_ARM_FAMILY 1
+#  define ARCH_CPU_ARMEL 1
+#  define ARCH_CPU_32_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__aarch64__) || defined(_M_ARM64)
+#  define ARCH_CPU_ARM_FAMILY 1
+#  define ARCH_CPU_ARM64 1
+#  define ARCH_CPU_64_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__riscv) && __riscv_xlen == 32
+#  define ARCH_CPU_RISCV_FAMILY 1
+#  define ARCH_CPU_RISCV32 1
+#  define ARCH_CPU_64_BITS 0
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__riscv) && __riscv_xlen == 64
+#  define ARCH_CPU_RISCV_FAMILY 1
+#  define ARCH_CPU_RISCV64 1
+#  define ARCH_CPU_64_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__pnacl__) || defined(__asmjs__) || defined(__wasm__)
+#  define ARCH_CPU_32_BITS 1
+#  define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__MIPSEL__)
+#  if defined(__LP64__)
+#    define ARCH_CPU_MIPS_FAMILY 1
+#    define ARCH_CPU_MIPS64EL 1
+#    define ARCH_CPU_64_BITS 1
+#    define ARCH_CPU_LITTLE_ENDIAN 1
+#  else
+#    define ARCH_CPU_MIPS_FAMILY 1
+#    define ARCH_CPU_MIPSEL 1
+#    define ARCH_CPU_32_BITS 1
+#    define ARCH_CPU_LITTLE_ENDIAN 1
+#  endif
+#elif defined(__MIPSEB__)
+#  if defined(__LP64__)
+#    define ARCH_CPU_MIPS_FAMILY 1
+#    define ARCH_CPU_MIPS64 1
+#    define ARCH_CPU_64_BITS 1
+#    define ARCH_CPU_BIG_ENDIAN 1
+#  else
+#    define ARCH_CPU_MIPS_FAMILY 1
+#    define ARCH_CPU_MIPS 1
+#    define ARCH_CPU_32_BITS 1
+#    define ARCH_CPU_BIG_ENDIAN 1
+#  endif
+#else
+#  error Please add support for your architecture in build_config.h
+#endif
+
+#if !defined(ARCH_CPU_LITTLE_ENDIAN)
+#  define ARCH_CPU_LITTLE_ENDIAN 0
+#endif
+#if !defined(ARCH_CPU_BIG_ENDIAN)
+#  define ARCH_CPU_BIG_ENDIAN 0
+#endif
+
+#if !defined(ARCH_CPU_32_BITS)
+#  define ARCH_CPU_32_BITS 0
+#endif
+#if !defined(ARCH_CPU_64_BITS)
+#  define ARCH_CPU_64_BITS 0
+#endif
+
+#if !defined(ARCH_CPU_X86_FAMILY)
+#  define ARCH_CPU_X86_FAMILY 0
+#endif
+#if !defined(ARCH_CPU_ARM_FAMILY)
+#  define ARCH_CPU_ARM_FAMILY 0
+#endif
+#if !defined(ARCH_CPU_MIPS_FAMILY)
+#  define ARCH_CPU_MIPS_FAMILY 0
+#endif
+#if !defined(ARCH_CPU_PPC64_FAMILY)
+#  define ARCH_CPU_PPC64_FAMILY 0
+#endif
+#if !defined(ARCH_CPU_RISCV_FAMILY)
+#  define ARCH_CPU_RISCV_FAMILY 0
+#endif
+#if !defined(ARCH_CPU_S390_FAMILY)
+#  define ARCH_CPU_S390_FAMILY 0
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Sizes of platform-dependent types.
+
+#if defined(__SIZEOF_POINTER__)
+#  define PLATFORM_SIZEOF_PTR __SIZEOF_POINTER__
+#elif defined(UINTPTR_MAX)
+#  if (UINTPTR_MAX == 0xffffffff)
+#    define PLATFORM_SIZEOF_PTR 4
+#  elif (UINTPTR_MAX == 0xffffffffffffffff)  // NOLINT
+#    define PLATFORM_SIZEOF_PTR 8
+#  endif
+#elif defined(__WORDSIZE)
+#  if (__WORDSIZE == 32)
+#    define PLATFORM_SIZEOF_PTR 4
+#  else if (__WORDSIZE == 64)
+#    define PLATFORM_SIZEOF_PTR 8
+#  endif
+#endif
+#if !defined(PLATFORM_SIZEOF_PTR)
+#  error Cannot find pointer size.
+#endif
+
+#if (UINT_MAX == 0xffffffff)
+#  define PLATFORM_SIZEOF_INT 4
+#elif (UINT_MAX == 0xffffffffffffffff)  // NOLINT
+#  define PLATFORM_SIZEOF_INT 8
+#else
+#  error Cannot find "int" size.
+#endif
+
+#if (USHRT_MAX == 0xffffffff)
+#  define PLATFORM_SIZEOF_SHORT 4
+#elif (USHRT_MAX == 0xffff)  // NOLINT
+#  define PLATFORM_SIZEOF_SHORT 2
+#else
+#  error Cannot find "short" size.
+#endif
+
+#endif  // __BUILD_CONFIG_H__
--- a/intern/numaapi/source/numaapi.c
+++ b/intern/numaapi/source/numaapi.c
@@ -0,0 +1,40 @@
+// Copyright (c) 2018, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin <sergey.vfx@gmail.com>
+
+/** \file
+ * \ingroup intern_numaapi
+ */
+
+#include "numaapi.h"
+
+#include <assert.h>
+
+const char* numaAPI_ResultAsString(NUMAAPI_Result result) {
+  switch (result) {
+    case NUMAAPI_SUCCESS: return "SUCCESS";
+    case NUMAAPI_NOT_AVAILABLE: return "NOT_AVAILABLE";
+    case NUMAAPI_ERROR: return "ERROR";
+    case NUMAAPI_ERROR_ATEXIT: return "ERROR_AT_EXIT";
+  }
+  assert(!"Unknown result was passed to numapi_ResultAsString().");
+  return "UNKNOWN";
+}
--- a/intern/numaapi/source/numaapi_linux.c
+++ b/intern/numaapi/source/numaapi_linux.c
@@ -0,0 +1,298 @@
+// Copyright (c) 2016, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin <sergey.vfx@gmail.com>
+
+/** \file
+ * \ingroup intern_numaapi
+ */
+
+#include "build_config.h"
+
+#if OS_LINUX
+
+#include "numaapi.h"
+
+#include <stdlib.h>
+
+#ifndef WITH_DYNLOAD
+#  include <numa.h>
+#else
+#  include <dlfcn.h>
+#endif
+
+#ifdef WITH_DYNLOAD
+
+// Descriptor numa library.
+static void* numa_lib;
+
+// Types of all symbols which are read from the library.
+struct bitmask;
+typedef int tnuma_available(void);
+typedef int tnuma_max_node(void);
+typedef int tnuma_node_to_cpus(int node, struct bitmask* mask);
+typedef long tnuma_node_size(int node, long* freep);
+typedef int tnuma_run_on_node(int node);
+typedef void* tnuma_alloc_onnode(size_t size, int node);
+typedef void* tnuma_alloc_local(size_t size);
+typedef void tnuma_free(void* start, size_t size);
+typedef struct bitmask* tnuma_bitmask_clearall(struct bitmask *bitmask);
+typedef int tnuma_bitmask_isbitset(const struct bitmask *bitmask,
+                                   unsigned int n);
+typedef struct bitmask* tnuma_bitmask_setbit(struct bitmask *bitmask,
+                                             unsigned int n);
+typedef unsigned int tnuma_bitmask_nbytes(struct bitmask *bitmask);
+typedef void tnuma_bitmask_free(struct bitmask *bitmask);
+typedef struct bitmask* tnuma_allocate_cpumask(void);
+typedef struct bitmask* tnuma_allocate_nodemask(void);
+typedef void tnuma_free_cpumask(struct bitmask* bitmask);
+typedef void tnuma_free_nodemask(struct bitmask* bitmask);
+typedef int tnuma_run_on_node_mask(struct bitmask *nodemask);
+typedef int tnuma_run_on_node_mask_all(struct bitmask *nodemask);
+typedef struct bitmask *tnuma_get_run_node_mask(void);
+typedef void tnuma_set_interleave_mask(struct bitmask *nodemask);
+typedef void tnuma_set_localalloc(void);
+
+// Actual symbols.
+static tnuma_available* numa_available;
+static tnuma_max_node* numa_max_node;
+static tnuma_node_to_cpus* numa_node_to_cpus;
+static tnuma_node_size* numa_node_size;
+static tnuma_run_on_node* numa_run_on_node;
+static tnuma_alloc_onnode* numa_alloc_onnode;
+static tnuma_alloc_local* numa_alloc_local;
+static tnuma_free* numa_free;
+static tnuma_bitmask_clearall* numa_bitmask_clearall;
+static tnuma_bitmask_isbitset* numa_bitmask_isbitset;
+static tnuma_bitmask_setbit* numa_bitmask_setbit;
+static tnuma_bitmask_nbytes* numa_bitmask_nbytes;
+static tnuma_bitmask_free* numa_bitmask_free;
+static tnuma_allocate_cpumask* numa_allocate_cpumask;
+static tnuma_allocate_nodemask* numa_allocate_nodemask;
+static tnuma_free_nodemask* numa_free_nodemask;
+static tnuma_free_cpumask* numa_free_cpumask;
+static tnuma_run_on_node_mask* numa_run_on_node_mask;
+static tnuma_run_on_node_mask_all* numa_run_on_node_mask_all;
+static tnuma_get_run_node_mask* numa_get_run_node_mask;
+static tnuma_set_interleave_mask* numa_set_interleave_mask;
+static tnuma_set_localalloc* numa_set_localalloc;
+
+static void* findLibrary(const char** paths) {
+  int i = 0;
+  while (paths[i] != NULL) {
+      void* lib = dlopen(paths[i], RTLD_LAZY);
+      if (lib != NULL) {
+        return lib;
+      }
+      ++i;
+  }
+  return NULL;
+}
+
+static void numaExit(void) {
+  if (numa_lib == NULL) {
+    return;
+  }
+  dlclose(numa_lib);
+  numa_lib = NULL;
+}
+
+static NUMAAPI_Result loadNumaSymbols(void) {
+  // Prevent multiple initializations.
+  static bool initialized = false;
+  static NUMAAPI_Result result = NUMAAPI_NOT_AVAILABLE;
+  if (initialized) {
+    return result;
+  }
+  initialized = true;
+  // Find appropriate .so library.
+  const char* numa_paths[] = {
+      "libnuma.so.1",
+      "libnuma.so",
+      NULL};
+  // Register de-initialization.
+  const int error = atexit(numaExit);
+  if (error) {
+    result = NUMAAPI_ERROR_ATEXIT;
+    return result;
+  }
+  // Load library.
+  numa_lib = findLibrary(numa_paths);
+  if (numa_lib == NULL) {
+    result = NUMAAPI_NOT_AVAILABLE;
+    return result;
+  }
+  // Load symbols.
+
+#define _LIBRARY_FIND(lib, name)          \
+  do {                                    \
+    name = (t##name *)dlsym(lib, #name);  \
+  } while (0)
+#define NUMA_LIBRARY_FIND(name) _LIBRARY_FIND(numa_lib, name)
+
+  NUMA_LIBRARY_FIND(numa_available);
+  NUMA_LIBRARY_FIND(numa_max_node);
+  NUMA_LIBRARY_FIND(numa_node_to_cpus);
+  NUMA_LIBRARY_FIND(numa_node_size);
+  NUMA_LIBRARY_FIND(numa_run_on_node);
+  NUMA_LIBRARY_FIND(numa_alloc_onnode);
+  NUMA_LIBRARY_FIND(numa_alloc_local);
+  NUMA_LIBRARY_FIND(numa_free);
+  NUMA_LIBRARY_FIND(numa_bitmask_clearall);
+  NUMA_LIBRARY_FIND(numa_bitmask_isbitset);
+  NUMA_LIBRARY_FIND(numa_bitmask_setbit);
+  NUMA_LIBRARY_FIND(numa_bitmask_nbytes);
+  NUMA_LIBRARY_FIND(numa_bitmask_free);
+  NUMA_LIBRARY_FIND(numa_allocate_cpumask);
+  NUMA_LIBRARY_FIND(numa_allocate_nodemask);
+  NUMA_LIBRARY_FIND(numa_free_cpumask);
+  NUMA_LIBRARY_FIND(numa_free_nodemask);
+  NUMA_LIBRARY_FIND(numa_run_on_node_mask);
+  NUMA_LIBRARY_FIND(numa_run_on_node_mask_all);
+  NUMA_LIBRARY_FIND(numa_get_run_node_mask);
+  NUMA_LIBRARY_FIND(numa_set_interleave_mask);
+  NUMA_LIBRARY_FIND(numa_set_localalloc);
+
+#undef NUMA_LIBRARY_FIND
+#undef _LIBRARY_FIND
+
+  result = NUMAAPI_SUCCESS;
+  return result;
+}
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Initialization.
+
+NUMAAPI_Result numaAPI_Initialize(void) {
+#ifdef WITH_DYNLOAD
+  NUMAAPI_Result result = loadNumaSymbols();
+  if (result != NUMAAPI_SUCCESS) {
+    return result;
+  }
+#endif
+  if (numa_available() < 0) {
+    return NUMAAPI_NOT_AVAILABLE;
+  }
+  return NUMAAPI_SUCCESS;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology query.
+
+int numaAPI_GetNumNodes(void) {
+  return numa_max_node() + 1;
+}
+
+bool numaAPI_IsNodeAvailable(int node) {
+  return numaAPI_GetNumNodeProcessors(node) > 0;
+}
+
+int numaAPI_GetNumNodeProcessors(int node) {
+  struct bitmask* cpu_mask = numa_allocate_cpumask();
+  numa_node_to_cpus(node, cpu_mask);
+  const unsigned int num_bytes = numa_bitmask_nbytes(cpu_mask);
+  const unsigned int num_bits = num_bytes * 8;
+  // TODO(sergey): There might be faster way calculating number of set bits.
+  int num_processors = 0;
+  for (unsigned int bit = 0; bit < num_bits; ++bit) {
+    if (numa_bitmask_isbitset(cpu_mask, bit)) {
+      ++num_processors;
+    }
+  }
+#ifdef WITH_DYNLOAD
+  if (numa_free_cpumask != NULL) {
+    numa_free_cpumask(cpu_mask);
+  } else {
+    numa_bitmask_free(cpu_mask);
+  }
+#else
+  numa_free_cpumask(cpu_mask);
+#endif
+  return num_processors;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology helpers.
+
+int numaAPI_GetNumCurrentNodesProcessors(void) {
+  struct bitmask* node_mask = numa_get_run_node_mask();
+  const unsigned int num_bytes = numa_bitmask_nbytes(node_mask);
+  const unsigned int num_bits = num_bytes * 8;
+  int num_processors = 0;
+  for (unsigned int bit = 0; bit < num_bits; ++bit) {
+    if (numa_bitmask_isbitset(node_mask, bit)) {
+      num_processors += numaAPI_GetNumNodeProcessors(bit);
+    }
+  }
+  numa_bitmask_free(node_mask);
+  return num_processors;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Affinities.
+
+bool numaAPI_RunProcessOnNode(int node) {
+  numaAPI_RunThreadOnNode(node);
+  return true;
+}
+
+bool numaAPI_RunThreadOnNode(int node) {
+  // Construct bit mask from node index.
+  struct bitmask* node_mask = numa_allocate_nodemask();
+  numa_bitmask_clearall(node_mask);
+  numa_bitmask_setbit(node_mask, node);
+  numa_run_on_node_mask_all(node_mask);
+  // TODO(sergey): The following commands are based on x265 code, we might want
+  // to make those optional, or require to call those explicitly.
+  //
+  // Current assumption is that this is similar to SetThreadGroupAffinity().
+  if (numa_node_size(node, NULL) > 0) {
+    numa_set_interleave_mask(node_mask);
+    numa_set_localalloc();
+  }
+#ifdef WITH_DYNLOAD
+  if (numa_free_nodemask != NULL) {
+    numa_free_nodemask(node_mask);
+  } else {
+    numa_bitmask_free(node_mask);
+  }
+#else
+  numa_free_nodemask(node_mask);
+#endif
+  return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory management.
+
+void* numaAPI_AllocateOnNode(size_t size, int node) {
+  return numa_alloc_onnode(size, node);
+}
+
+void* numaAPI_AllocateLocal(size_t size) {
+  return numa_alloc_local(size);
+}
+
+void numaAPI_Free(void* start, size_t size) {
+  numa_free(start, size);
+}
+
+#endif  // OS_LINUX
--- a/intern/numaapi/source/numaapi_stub.c
+++ b/intern/numaapi/source/numaapi_stub.c
@@ -0,0 +1,98 @@
+// Copyright (c) 2016, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin <sergey.vfx@gmail.com>
+
+/** \file
+ * \ingroup intern_numaapi
+ */
+
+#include "numaapi.h"
+
+#include "build_config.h"
+
+// Stub implementation for platforms which doesn't have NUMA support.
+
+#if !OS_LINUX && !OS_WIN
+
+////////////////////////////////////////////////////////////////////////////////
+// Initialization.
+
+NUMAAPI_Result numaAPI_Initialize(void) {
+  return NUMAAPI_NOT_AVAILABLE;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology query.
+
+int numaAPI_GetNumNodes(void) {
+  return 0;
+}
+
+bool numaAPI_IsNodeAvailable(int node) {
+  (void) node;  // Ignored.
+  return false;
+}
+
+int numaAPI_GetNumNodeProcessors(int node) {
+  (void) node;  // Ignored.
+  return 0;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology helpers.
+
+int numaAPI_GetNumCurrentNodesProcessors(void) {
+  return 0;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Affinities.
+
+bool numaAPI_RunProcessOnNode(int node) {
+  (void) node;  // Ignored.
+  return false;
+}
+
+bool numaAPI_RunThreadOnNode(int node) {
+  (void) node;  // Ignored.
+  return false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory management.
+
+void* numaAPI_AllocateOnNode(size_t size, int node) {
+  (void) size;  // Ignored.
+  (void) node;  // Ignored.
+  return 0;
+}
+
+void* numaAPI_AllocateLocal(size_t size) {
+  (void) size;  // Ignored.
+  return NULL;
+}
+
+void numaAPI_Free(void* start, size_t size) {
+  (void) start;  // Ignored.
+  (void) size;  // Ignored.
+}
+
+#endif  // !OS_LINUX && !OS_WIN
--- a/intern/numaapi/source/numaapi_win32.c
+++ b/intern/numaapi/source/numaapi_win32.c
@@ -0,0 +1,296 @@
+// Copyright (c) 2016, libnumaapi authors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: Sergey Sharybin <sergey.vfx@gmail.com>
+
+/** \file
+ * \ingroup intern_numaapi
+ */
+
+#include "build_config.h"
+
+#if OS_WIN
+
+#include "numaapi.h"
+
+#ifndef NOGDI
+#  define NOGDI
+#endif
+#ifndef NOMINMAX
+#  define NOMINMAX
+#endif
+#ifndef WIN32_LEAN_AND_MEAN
+#  define WIN32_LEAN_AND_MEAN
+#endif
+#ifndef NOCOMM
+#  define NOCOMM
+#endif
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <windows.h>
+
+#if ARCH_CPU_64_BITS
+#  include <VersionHelpers.h>
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Initialization.
+
+// Kernel library, from where the symbols come.
+static HMODULE kernel_lib;
+
+// Types of all symbols which are read from the library.
+
+// NUMA function types.
+typedef BOOL t_GetNumaHighestNodeNumber(PULONG highest_node_number);
+typedef BOOL t_GetNumaNodeProcessorMask(UCHAR node, ULONGLONG* processor_mask);
+typedef BOOL t_GetNumaNodeProcessorMaskEx(USHORT node,
+                                          GROUP_AFFINITY* processor_mask);
+typedef BOOL t_GetNumaProcessorNode(UCHAR processor, UCHAR* node_number);
+typedef void* t_VirtualAllocExNuma(HANDLE process_handle,
+                                   LPVOID address,
+                                   SIZE_T size,
+                                   DWORD  allocation_type,
+                                   DWORD  protect,
+                                   DWORD  preferred);
+typedef BOOL t_VirtualFree(void* address, SIZE_T size, DWORD free_type);
+// Threading function types.
+typedef BOOL t_SetProcessAffinityMask(HANDLE process_handle,
+                                      DWORD_PTR process_affinity_mask);
+typedef BOOL t_SetThreadGroupAffinity(HANDLE thread_handle,
+                                      const GROUP_AFFINITY* group_affinity,
+                                      GROUP_AFFINITY* PreviousGroupAffinity);
+typedef BOOL t_GetThreadGroupAffinity(HANDLE thread_handle,
+                                      GROUP_AFFINITY* group_affinity);
+typedef DWORD t_GetCurrentProcessorNumber(void);
+typedef void t_GetCurrentProcessorNumberEx(PROCESSOR_NUMBER* proc_number);
+typedef DWORD t_GetActiveProcessorCount(WORD group_number);
+
+
+// NUMA symbols.
+static t_GetNumaHighestNodeNumber* _GetNumaHighestNodeNumber;
+static t_GetNumaNodeProcessorMask* _GetNumaNodeProcessorMask;
+static t_GetNumaNodeProcessorMaskEx* _GetNumaNodeProcessorMaskEx;
+static t_GetNumaProcessorNode* _GetNumaProcessorNode;
+static t_VirtualAllocExNuma* _VirtualAllocExNuma;
+static t_VirtualFree* _VirtualFree;
+// Threading symbols.
+static t_SetProcessAffinityMask* _SetProcessAffinityMask;
+static t_SetThreadGroupAffinity* _SetThreadGroupAffinity;
+static t_GetThreadGroupAffinity* _GetThreadGroupAffinity;
+static t_GetCurrentProcessorNumber* _GetCurrentProcessorNumber;
+static t_GetCurrentProcessorNumberEx* _GetCurrentProcessorNumberEx;
+static t_GetActiveProcessorCount* _GetActiveProcessorCount;
+
+static void numaExit(void) {
+  // TODO(sergey): Consider closing library here.
+}
+
+static NUMAAPI_Result loadNumaSymbols(void) {
+  // Prevent multiple initializations.
+  static bool initialized = false;
+  static NUMAAPI_Result result = NUMAAPI_NOT_AVAILABLE;
+  if (initialized) {
+    return result;
+  }
+  initialized = true;
+  // Register de-initialization.
+  const int error = atexit(numaExit);
+  if (error) {
+    result = NUMAAPI_ERROR_ATEXIT;
+    return result;
+  }
+  // Load library.
+  kernel_lib = LoadLibraryA("Kernel32.dll");
+  // Load symbols.
+
+#define _LIBRARY_FIND(lib, name)                   \
+  do {                                             \
+    _##name = (t_##name *)GetProcAddress(lib, #name);  \
+  } while (0)
+#define KERNEL_LIBRARY_FIND(name) _LIBRARY_FIND(kernel_lib, name)
+
+  // NUMA.
+  KERNEL_LIBRARY_FIND(GetNumaHighestNodeNumber);
+  KERNEL_LIBRARY_FIND(GetNumaNodeProcessorMask);
+  KERNEL_LIBRARY_FIND(GetNumaNodeProcessorMaskEx);
+  KERNEL_LIBRARY_FIND(GetNumaProcessorNode);
+  KERNEL_LIBRARY_FIND(VirtualAllocExNuma);
+  KERNEL_LIBRARY_FIND(VirtualFree);
+  // Threading.
+  KERNEL_LIBRARY_FIND(SetProcessAffinityMask);
+  KERNEL_LIBRARY_FIND(SetThreadGroupAffinity);
+  KERNEL_LIBRARY_FIND(GetThreadGroupAffinity);
+  KERNEL_LIBRARY_FIND(GetCurrentProcessorNumber);
+  KERNEL_LIBRARY_FIND(GetCurrentProcessorNumberEx);
+  KERNEL_LIBRARY_FIND(GetActiveProcessorCount);
+
+#undef KERNEL_LIBRARY_FIND
+#undef _LIBRARY_FIND
+
+  result = NUMAAPI_SUCCESS;
+  return result;
+}
+
+NUMAAPI_Result numaAPI_Initialize(void) {
+#if !ARCH_CPU_64_BITS
+  // No NUMA on 32 bit platforms.
+  return NUMAAPI_NOT_AVAILABLE;
+#else
+  if (!IsWindows7OrGreater()) {
+    // Require Windows 7 or higher.
+    NUMAAPI_NOT_AVAILABLE;
+  }
+  loadNumaSymbols();
+  return NUMAAPI_SUCCESS;
+#endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Internal helpers.
+
+static int countNumSetBits(ULONGLONG mask) {
+  // TODO(sergey): There might be faster way calculating number of set bits.
+  // NOTE: mask must be unsigned, there is undefined behavior for signed ints.
+  int num_bits = 0;
+  while (mask != 0) {
+    num_bits += (mask & 1);
+    mask = (mask >> 1);
+  }
+  return num_bits;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology query.
+
+int numaAPI_GetNumNodes(void) {
+  ULONG highest_node_number;
+  if (!_GetNumaHighestNodeNumber(&highest_node_number)) {
+    return 0;
+  }
+  // TODO(sergey): Resolve the type narrowing.
+  // NOTE: This is not necessarily a total amount of nodes in the system.
+  return (int)highest_node_number + 1;
+}
+
+bool numaAPI_IsNodeAvailable(int node) {
+  // Trick to detect whether the node is usable or not: check whether
+  // there are any processors associated with it.
+  //
+  // This is needed because numaApiGetNumNodes() is not guaranteed to
+  // give total amount of nodes and some nodes might be unavailable.
+  GROUP_AFFINITY processor_mask = { 0 };
+  if (!_GetNumaNodeProcessorMaskEx(node, &processor_mask)) {
+    return false;
+  }
+  if (processor_mask.Mask == 0) {
+    return false;
+  }
+  return true;
+}
+
+int numaAPI_GetNumNodeProcessors(int node) {
+  GROUP_AFFINITY processor_mask = { 0 };
+  if (!_GetNumaNodeProcessorMaskEx(node, &processor_mask)) {
+    return 0;
+  }
+  return countNumSetBits(processor_mask.Mask);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology helpers.
+
+int numaAPI_GetNumCurrentNodesProcessors(void) {
+  HANDLE thread_handle = GetCurrentThread();
+  GROUP_AFFINITY group_affinity;
+  // TODO(sergey): Needs implementation.
+  if (!_GetThreadGroupAffinity(thread_handle, &group_affinity)) {
+    return 0;
+  }
+  // First, count number of possible bits in the affinity mask.
+  const int num_processors = countNumSetBits(group_affinity.Mask);
+  // Then check that it's not exceeding number of processors in tjhe group.
+  const int num_group_processors =
+      _GetActiveProcessorCount(group_affinity.Group);
+  if (num_group_processors < num_processors) {
+    return num_group_processors;
+  }
+  return num_processors;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Affinities.
+
+bool numaAPI_RunProcessOnNode(int node) {
+  // TODO(sergey): Make sure requested node is within active CPU group.
+  // Change affinity of the proces to make it to run on a given node.
+  HANDLE process_handle = GetCurrentProcess();
+  GROUP_AFFINITY processor_mask = { 0 };
+  if (_GetNumaNodeProcessorMaskEx(node, &processor_mask) == 0) {
+    return false;
+  }
+  // TODO: Affinity should respect processor group.
+  if (_SetProcessAffinityMask(process_handle, processor_mask.Mask) == 0) {
+    return false;
+  }
+  return true;
+}
+
+bool numaAPI_RunThreadOnNode(int node) {
+  HANDLE thread_handle = GetCurrentThread();
+  GROUP_AFFINITY group_affinity = { 0 };
+  if (_GetNumaNodeProcessorMaskEx(node, &group_affinity) == 0) {
+    return false;
+  }
+  if (_SetThreadGroupAffinity(thread_handle, &group_affinity, NULL) == 0) {
+    return false;
+  }
+  return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory management.
+
+void* numaAPI_AllocateOnNode(size_t size, int node) {
+  return _VirtualAllocExNuma(GetCurrentProcess(),
+                             NULL,
+                             size,
+                             MEM_RESERVE | MEM_COMMIT,
+                             PAGE_READWRITE,
+                             node);
+}
+
+void* numaAPI_AllocateLocal(size_t size) {
+  UCHAR current_processor = (UCHAR)_GetCurrentProcessorNumber();
+  UCHAR node;
+  if (!_GetNumaProcessorNode(current_processor, &node)) {
+    return NULL;
+  }
+  return numaAPI_AllocateOnNode(size, node);
+}
+
+void numaAPI_Free(void* start, size_t size) {
+  if (!_VirtualFree(start, size, MEM_RELEASE)) {
+    // TODO(sergey): Throw an error!
+  }
+}
+
+#endif  // OS_WIN
--- a/intern/opencolorio/ocio_impl_glsl.cc
+++ b/intern/opencolorio/ocio_impl_glsl.cc
@@ -642,7 +642,7 @@ bool OCIOImpl::gpuDisplayShaderBind(OCIO_ConstConfigRcPtr *config,
  }

  /* Bind textures to sampler units. Texture 0 is set by caller.
-   * Uniforms have already been set for texture bind points. */
+   * Uniforms have already been set for texture bind points.*/
  if (!use_overlay) {
    /* Avoid missing binds. */
    GPU_texture_bind(textures.dummy, TEXTURE_SLOT_OVERLAY);
--- a/release/darwin/Blender.app/Contents/Info.plist
+++ b/release/darwin/Blender.app/Contents/Info.plist
@@ -3,7 +3,7 @@
 <plist version="1.0">
 <dict>
 	<key>LSMinimumSystemVersion</key>
-	<string>10.13</string>
+	<string>10.9.0</string>
 	<key>CFBundleDocumentTypes</key>
 	<array>
 		<dict>
--- a/release/datafiles/blender_icons.svg
+++ b/release/datafiles/blender_icons.svg
--- a/release/datafiles/prvicons.svg
+++ b/release/datafiles/prvicons.svg
--- a/release/scripts/modules/bpy_types.py
+++ b/release/scripts/modules/bpy_types.py
@@ -100,19 +100,6 @@ class Texture(bpy_types.ID):
 class Collection(bpy_types.ID):
    __slots__ = ()

-    @property
-    def children_recursive(self):
-        """A list of all children from this collection."""
-        children_recursive = []
-
-        def recurse(parent):
-            for child in parent.children:
-                children_recursive.append(child)
-                recurse(child)
-
-        recurse(self)
-        return children_recursive
-
    @property
    def users_dupli_group(self):
        """The collection instance objects this collection is used in"""
@@ -133,27 +120,6 @@ class Object(bpy_types.ID):
        return tuple(child for child in bpy.data.objects
                     if child.parent == self)

-    @property
-    def children_recursive(self):
-        """A list of all children from this object.
-
-        .. note:: Takes ``O(len(bpy.data.objects))`` time."""
-        import bpy
-        parent_child_map = {}
-        for child in bpy.data.objects:
-            if (parent := child.parent) is not None:
-                parent_child_map.setdefault(parent, []).append(child)
-
-        children_recursive = []
-
-        def recurse(parent):
-            for child in parent_child_map.get(parent, ()):
-                children_recursive.append(child)
-                recurse(child)
-
-        recurse(self)
-        return children_recursive
-
    @property
    def users_collection(self):
        """
--- a/release/scripts/modules/keyingsets_utils.py
+++ b/release/scripts/modules/keyingsets_utils.py
@@ -238,15 +238,11 @@ def RKS_GEN_custom_props(_ksi, _context, ks, data):
            continue

        prop_path = '["%s"]' % bpy.utils.escape_identifier(cprop_name)
-
        try:
            rna_property = data.path_resolve(prop_path, False)
        except ValueError:
-            # Can technically happen, but there is no known case.
-            continue
-        if rna_property is None:
-            # In this case the property cannot be converted to an
-            # FCurve-compatible value, so we can't keyframe it anyways.
+            # This happens when a custom property is set to None. In that case it cannot
+            # be converted to an FCurve-compatible value, so we can't keyframe it anyway.
            continue
        if rna_property.rna_type not in prop_type_compat:
            continue
--- a/release/scripts/presets/keyconfig/keymap_data/blender_default.py
+++ b/release/scripts/presets/keyconfig/keymap_data/blender_default.py
@@ -6267,8 +6267,7 @@ def km_image_editor_tool_uv_select(params, *, fallback):
        _fallback_id("Image Editor Tool: Uv, Tweak", fallback),
        {"space_type": 'IMAGE_EDITOR', "region_type": 'WINDOW'},
        {"items": [
-            *([] if (fallback and (params.select_mouse == 'RIGHTMOUSE')) else _template_items_tool_select(
-                params, "uv.select", "uv.cursor_set", extend="extend")),
+            *([] if fallback else _template_items_tool_select(params, "uv.select", "uv.cursor_set", extend="extend")),
            *([] if (not params.use_fallback_tool_rmb) else _template_uv_select(
                type=params.select_mouse, value=params.select_mouse_value, legacy=params.legacy)),
        ]},
@@ -6386,7 +6385,7 @@ def km_node_editor_tool_select(params, *, fallback):
        _fallback_id("Node Tool: Tweak", fallback),
        {"space_type": 'NODE_EDITOR', "region_type": 'WINDOW'},
        {"items": [
-            *([] if (fallback and (params.select_mouse == 'RIGHTMOUSE')) else [
+            *([] if fallback else [
                ("node.select", {"type": params.select_mouse, "value": 'PRESS'},
                 {"properties": [("deselect_all", not params.legacy)]}),
            ]),
@@ -6468,7 +6467,7 @@ def km_3d_view_tool_select(params, *, fallback):
        _fallback_id("3D View Tool: Tweak", fallback),
        {"space_type": 'VIEW_3D', "region_type": 'WINDOW'},
        {"items": [
-            *([] if (fallback and (params.select_mouse == 'RIGHTMOUSE')) else _template_items_tool_select(
+            *([] if fallback else _template_items_tool_select(
                params, "view3d.select", "view3d.cursor3d", extend="toggle")),
            *([] if (not params.use_fallback_tool_rmb) else _template_view3d_select(
                type=params.select_mouse, value=params.select_mouse_value, legacy=params.legacy)),
@@ -7378,7 +7377,7 @@ def km_3d_view_tool_edit_gpencil_select(params, *, fallback):
        _fallback_id("3D View Tool: Edit Gpencil, Tweak", fallback),
        {"space_type": 'VIEW_3D', "region_type": 'WINDOW'},
        {"items": [
-            *([] if (fallback and (params.select_mouse == 'RIGHTMOUSE')) else _template_items_tool_select(
+            *([] if fallback else _template_items_tool_select(
                params, "gpencil.select", "view3d.cursor3d", extend="toggle")),
            *([] if (not params.use_fallback_tool_rmb) else _template_view3d_gpencil_select(
                type=params.select_mouse, value=params.select_mouse_value, legacy=params.legacy)),
@@ -7554,7 +7553,7 @@ def km_sequencer_editor_tool_generic_select(params, *, fallback):
        _fallback_id("Sequencer Tool: Tweak", fallback),
        {"space_type": 'SEQUENCE_EDITOR', "region_type": 'WINDOW'},
        {"items": [
-            *([] if (fallback and (params.select_mouse == 'RIGHTMOUSE')) else _template_items_tool_select(
+            *([] if fallback else _template_items_tool_select(
                params, "sequencer.select", "sequencer.cursor_set", extend="toggle")),

            *([] if (not params.use_fallback_tool_rmb) else _template_sequencer_preview_select(
--- a/release/scripts/startup/bl_operators/clip.py
+++ b/release/scripts/startup/bl_operators/clip.py
@@ -949,7 +949,7 @@ class CLIP_OT_setup_tracking_scene(Operator):
            """Make all the newly created and the old objects of a collection """ \
                """to be properly setup for shadow catch"""
            for ob in collection.objects:
-                ob.is_shadow_catcher = True
+                ob.cycles.is_shadow_catcher = True
                for child in collection.children:
                    setup_shadow_catcher_objects(child)

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Jacques Lucke	0375b51776	Fix T94707: inverted output of separate geometry node incorrect This was an oversight in rB3e92b4ed2408eacd126c0.	2022-01-07 08:02:20 +01:00
Jacques Lucke	32e8381e0e	Geometry Nodes: Experimental Scale Elements node (WIP). This node is best used in combination with e.g. the Extrude node. Differential Revision: https://developer.blender.org/D13757	2022-01-06 18:19:06 +01:00
Jacques Lucke	c31a346ec5	progress	2022-01-06 17:01:17 +01:00
Jacques Lucke	00c5fa3bf1	progress	2022-01-06 16:13:56 +01:00
Jacques Lucke	2f6a84bee9	progress	2022-01-06 15:04:53 +01:00
Jacques Lucke	f9e03a5e16	Merge branch 'master' into temp-scale-elements-node-test	2022-01-06 14:32:40 +01:00
Jacques Lucke	abaed315d5	initial scale faces mode	2022-01-04 20:13:35 +01:00
Jacques Lucke	05a7f7c1fd	Merge branch 'master' into scale-elements-node-test	2022-01-04 17:41:45 +01:00
Jacques Lucke	7e3459ff78	initial commit	2022-01-04 13:59:47 +01:00