Merge remote-tracking branch 'origin' into microfacet_hair

Merge branch 'blender-v3.5-release' into main
2023-03-07 14:59:54 +01:00 · 2023-03-07 14:20:17 +01:00 · 2023-03-07 13:23:43 +01:00 · 2023-03-07 13:21:51 +01:00 · 2023-03-07 12:32:00 +01:00 · 2023-03-07 12:31:59 +01:00
2354 changed files with 2111884 additions and 26914 deletions
--- a/.arcconfig
+++ b/.arcconfig
@@ -1,8 +0,0 @@
-{
-	"project_id" : "Blender",
-	"conduit_uri" : "https://developer.blender.org/",
-	"phabricator.uri" : "https://developer.blender.org/",
-	"git.default-relative-commit" : "origin/master",
-	"arc.land.update.default" : "rebase",
-	"arc.land.onto.default" : "master"
-}
--- a/.clang-format
+++ b/.clang-format
@@ -236,6 +236,8 @@ ForEachMacros:
  - LOOP_UNSELECTED_POINTS
  - LOOP_VISIBLE_KEYS
  - LOOP_VISIBLE_POINTS
+  - LIGHT_FOREACH_BEGIN_DIRECTIONAL
+  - LIGHT_FOREACH_BEGIN_LOCAL
  - LISTBASE_CIRCULAR_BACKWARD_BEGIN
  - LISTBASE_CIRCULAR_FORWARD_BEGIN
  - LISTBASE_FOREACH
--- a/.gitea/default_merge_message/REBASE_TEMPLATE.md
+++ b/.gitea/default_merge_message/REBASE_TEMPLATE.md
@@ -2,4 +2,4 @@ ${CommitTitle}

 ${CommitBody}

-Pull Request #${PullRequestIndex}
+Pull Request: https://projects.blender.org/blender/blender/pulls/${PullRequestIndex}
--- a/.gitea/default_merge_message/SQUASH_TEMPLATE.md
+++ b/.gitea/default_merge_message/SQUASH_TEMPLATE.md
@@ -1,3 +1,3 @@
 ${PullRequestTitle}

-Pull Request #${PullRequestIndex}
+Pull Request: https://projects.blender.org/blender/blender/pulls/${PullRequestIndex}
--- a/.gitea/issue_template/bug.yaml
+++ b/.gitea/issue_template/bug.yaml
@@ -1,9 +1,9 @@
 name: Bug Report
 about: File a bug report
 labels:
-  - "type::Report"
-  - "status::Needs Triage"
-  - "priority::Normal"
+  - "Type/Report"
+  - "Status/Needs Triage"
+  - "Priority/Normal"
 body:
  - type: markdown
    attributes:
--- a/.gitea/issue_template/design.yaml
+++ b/.gitea/issue_template/design.yaml
@@ -1,7 +1,7 @@
 name: Design
 about: Create a design task (for developers only)
 labels:
-  - "type::Design"
+  - "Type/Design"
 body:
  - type: textarea
    id: body
--- a/.gitea/issue_template/todo.yaml
+++ b/.gitea/issue_template/todo.yaml
@@ -1,7 +1,7 @@
 name: To Do
 about: Create a to do task (for developers only)
 labels:
-  - "type::To Do"
+  - "Type/To Do"
 body:
  - type: textarea
    id: body
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,5 +1,4 @@
-This repository is only used as a mirror of git.blender.org. Blender development happens on
-https://developer.blender.org.
+This repository is only used as a mirror. Blender development happens on projects.blender.org.

 To get started with contributing code, please see:
 https://wiki.blender.org/wiki/Process/Contributing_Code
--- a/.github/stale.yml
+++ b/.github/stale.yml
@@ -15,8 +15,7 @@ staleLabel: stale
 # Comment to post when closing a stale Issue or Pull Request.
 closeComment: >
  This issue has been automatically closed, because this repository is only
-  used as a mirror of git.blender.org. Blender development happens on
-  developer.blender.org.
+  used as a mirror. Blender development happens on projects.blender.org.

  To get started contributing code, please read:
  https://wiki.blender.org/wiki/Process/Contributing_Code
--- a/.gitignore
+++ b/.gitignore
@@ -39,7 +39,7 @@ Desktop.ini
 /doc/python_api/rst/bmesh.ops.rst

 # in-source lib downloads
-/build_files/build_environment/downloads
+/build_files/build_environment/downloads/

 # in-source buildbot signing configuration
 /build_files/buildbot/codesign/config_server.py
@@ -48,4 +48,20 @@ Desktop.ini
 waveletNoiseTile.bin

 # testing environment
-/Testing
+/Testing/
+
+# Translations.
+/locale/user-config.py
+
+# External repositories.
+/scripts/addons/
+/scripts/addons_contrib/
+
+# Ignore old submodules directories.
+# Eventually need to get rid of those, but for the first time of transition
+# avoid indidents when the folders exists after bisect and developers staging
+# them by accident.
+/release/scripts/addons/
+/release/datafiles/locale/
+/release/scripts/addons_contrib/
+/source/tools/
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,20 +0,0 @@
-[submodule "release/scripts/addons"]
-	path = release/scripts/addons
-	url = ../blender-addons.git
-	branch = master
-	ignore = all
-[submodule "release/scripts/addons_contrib"]
-	path = release/scripts/addons_contrib
-	url = ../blender-addons-contrib.git
-	branch = master
-	ignore = all
-[submodule "release/datafiles/locale"]
-	path = release/datafiles/locale
-	url = ../blender-translations.git
-	branch = master
-	ignore = all
-[submodule "source/tools"]
-	path = source/tools
-	url = ../blender-dev-tools.git
-	branch = master
-	ignore = all
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -331,7 +331,6 @@ option(WITH_MOD_REMESH          "Enable Remesh Modifier" ON)
 option(WITH_MOD_OCEANSIM        "Enable Ocean Modifier" ON)

 # Image format support
-option(WITH_OPENIMAGEIO         "Enable OpenImageIO Support (http://www.openimageio.org)" ON)
 option(WITH_IMAGE_OPENEXR       "Enable OpenEXR Support (http://www.openexr.com)" ON)
 option(WITH_IMAGE_OPENJPEG      "Enable OpenJpeg Support (http://www.openjpeg.org)" ON)
 option(WITH_IMAGE_TIFF          "Enable LibTIFF Support" ON)
@@ -358,6 +357,7 @@ option(WITH_MATERIALX           "Enable MaterialX Support" OFF)
 # Disable opencollada when we don't have precompiled libs
 option(WITH_OPENCOLLADA   "Enable OpenCollada Support (http://www.opencollada.org)" ON)
 option(WITH_IO_WAVEFRONT_OBJ  "Enable Wavefront-OBJ 3D file format support (*.obj)" ON)
+option(WITH_IO_PLY            "Enable PLY 3D file format support (*.ply)" ON)
 option(WITH_IO_STL            "Enable STL 3D file format support (*.stl)" ON)
 option(WITH_IO_GPENCIL        "Enable grease-pencil file format IO (*.svg, *.pdf)" ON)

@@ -524,7 +524,7 @@ endif()
 if(NOT APPLE)
  option(WITH_CYCLES_DEVICE_HIP        "Enable Cycles AMD HIP support" ON)
  option(WITH_CYCLES_HIP_BINARIES      "Build Cycles AMD HIP binaries" OFF)
-  set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
+  set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
  mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
  mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
 endif()
@@ -625,8 +625,10 @@ mark_as_advanced(

 # Vulkan
 option(WITH_VULKAN_BACKEND "Enable Vulkan as graphics backend (only for development)" OFF)
+option(WITH_VULKAN_GUARDEDALLOC "Use guardedalloc for host allocations done inside Vulkan (development option)" OFF)
 mark_as_advanced(
  WITH_VULKAN_BACKEND
+  WITH_VULKAN_GUARDEDALLOC
 )

 # Metal
@@ -890,8 +892,6 @@ set_and_warn_dependency(WITH_IMAGE_TIFF WITH_HARU       OFF)

 # auto enable openimageio for cycles
 if(WITH_CYCLES)
-  set(WITH_OPENIMAGEIO ON)
-
  # auto enable llvm for cycles_osl
  if(WITH_CYCLES_OSL)
    set(WITH_LLVM ON CACHE BOOL "" FORCE)
@@ -952,21 +952,6 @@ endif()
 # -----------------------------------------------------------------------------
 # Check if Sub-modules are Cloned

-if(WITH_INTERNATIONAL)
-  file(GLOB RESULT "${CMAKE_SOURCE_DIR}/release/datafiles/locale")
-  list(LENGTH RESULT DIR_LEN)
-  if(DIR_LEN EQUAL 0)
-    message(
-      WARNING
-      "Translation path '${CMAKE_SOURCE_DIR}/release/datafiles/locale' is missing, "
-      "This is a 'git submodule', which are known not to work with bridges to other version "
-      "control systems."
-    )
-    set(TRANSLATIONS_FOUND OFF)
-    set_and_warn_library_found("Translations" TRANSLATIONS_FOUND WITH_INTERNATIONAL)
-  endif()
-endif()
-
 if(WITH_PYTHON)
  # While we have this as an '#error' in 'bpy_capi_utils.h',
  # upgrading Python tends to cause confusion for users who build.
@@ -982,14 +967,14 @@ if(WITH_PYTHON)
    )
  endif()

-  file(GLOB RESULT "${CMAKE_SOURCE_DIR}/release/scripts/addons")
+  file(GLOB RESULT "${CMAKE_SOURCE_DIR}/scripts/addons")
  list(LENGTH RESULT DIR_LEN)
  if(DIR_LEN EQUAL 0)
    message(
      WARNING
-      "Addons path '${CMAKE_SOURCE_DIR}/release/scripts/addons' is missing, "
-      "This is a 'git submodule', which are known not to work with bridges to other version "
-      "control systems: * CONTINUING WITHOUT ADDONS *"
+      "Addons path '${CMAKE_SOURCE_DIR}/scripts/addons' is missing. "
+      "This is an external repository which needs to be checked out. Use `make update` to do so. "
+      "* CONTINUING WITHOUT ADDONS *"
    )
  endif()
 endif()
@@ -1098,13 +1083,6 @@ if(NOT WITH_FFTW3 AND WITH_MOD_OCEANSIM)
 endif()

 if(WITH_CYCLES)
-  if(NOT WITH_OPENIMAGEIO)
-    message(
-      FATAL_ERROR
-      "Cycles requires WITH_OPENIMAGEIO, the library may not have been found. "
-      "Configure OIIO or disable WITH_CYCLES"
-    )
-  endif()
  if(WITH_CYCLES_OSL)
    if(NOT WITH_LLVM)
      message(
@@ -1955,8 +1933,7 @@ if(FIRST_RUN)
  info_cfg_option(WITH_IMAGE_OPENEXR)
  info_cfg_option(WITH_IMAGE_OPENJPEG)
  info_cfg_option(WITH_IMAGE_TIFF)
-  info_cfg_option(WITH_OPENIMAGEIO)
-
+  
  info_cfg_text("Audio:")
  info_cfg_option(WITH_CODEC_AVI)
  info_cfg_option(WITH_CODEC_FFMPEG)
--- a/36
+++ b/36
@@ -69,7 +69,7 @@ Static Source Code Checking
   * check_cmake:           Runs our own cmake file checker which detects errors in the cmake file list definitions.
   * check_pep8:            Checks all Python script are pep8 which are tagged to use the stricter formatting.
   * check_mypy:            Checks all Python scripts using mypy,
-                            see: source/tools/check_source/check_mypy_config.py scripts which are included.
+                            see: tools/check_source/check_mypy_config.py scripts which are included.

 Documentation Checking

@@ -85,7 +85,7 @@ Spell Checkers
   * check_spelling_osl:    Check for spelling errors (OSL only).
   * check_spelling_py:     Check for spelling errors (Python only).

-   Note: an additional word-list is maintained at: 'source/tools/check_source/check_spelling_c_config.py'
+   Note: an additional word-list is maintained at: 'tools/check_source/check_spelling_c_config.py'

   Note: that spell checkers can take a 'CHECK_SPELLING_CACHE' filepath argument,
   so re-running does not need to re-check unchanged files.
@@ -299,7 +299,11 @@ else
 	ifneq ("$(wildcard $(DEPS_BUILD_DIR)/build.ninja)","")
 		DEPS_BUILD_COMMAND:=ninja
 	else
-		DEPS_BUILD_COMMAND:=make -s
+		ifeq ($(OS), Darwin)
+			DEPS_BUILD_COMMAND:=make -s
+		else
+			DEPS_BUILD_COMMAND:="$(BLENDER_DIR)/build_files/build_environment/linux/make_deps_wrapper.sh" -s
+		endif
 	endif
 endif

@@ -398,7 +402,7 @@ endif

 deps: .FORCE
 	@echo
-	@echo Configuring dependencies in \"$(DEPS_BUILD_DIR)\"
+	@echo Configuring dependencies in \"$(DEPS_BUILD_DIR)\", install to \"$(DEPS_INSTALL_DIR)\"

 	@cmake -H"$(DEPS_SOURCE_DIR)" \
 	       -B"$(DEPS_BUILD_DIR)" \
@@ -486,22 +490,22 @@ check_smatch: .FORCE
 	$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_smatch.py"

 check_mypy: .FORCE
-	@$(PYTHON) "$(BLENDER_DIR)/source/tools/check_source/check_mypy.py"
+	@$(PYTHON) "$(BLENDER_DIR)/tools/check_source/check_mypy.py"

 check_wiki_file_structure: .FORCE
 	@PYTHONIOENCODING=utf_8 $(PYTHON) \
-	    "$(BLENDER_DIR)/source/tools/check_wiki/check_wiki_file_structure.py"
+	    "$(BLENDER_DIR)/tools/check_wiki/check_wiki_file_structure.py"

 check_spelling_py: .FORCE
 	@cd "$(BUILD_DIR)" ; \
 	PYTHONIOENCODING=utf_8 $(PYTHON) \
-	    "$(BLENDER_DIR)/source/tools/check_source/check_spelling.py" \
-	    "$(BLENDER_DIR)/release/scripts"
+	    "$(BLENDER_DIR)/tools/check_source/check_spelling.py" \
+	    "$(BLENDER_DIR)/scripts"

 check_spelling_c: .FORCE
 	@cd "$(BUILD_DIR)" ; \
 	PYTHONIOENCODING=utf_8 $(PYTHON) \
-	    "$(BLENDER_DIR)/source/tools/check_source/check_spelling.py" \
+	    "$(BLENDER_DIR)/tools/check_source/check_spelling.py" \
 	    --cache-file=$(CHECK_SPELLING_CACHE) \
 	    "$(BLENDER_DIR)/source" \
 	    "$(BLENDER_DIR)/intern/cycles" \
@@ -511,21 +515,21 @@ check_spelling_c: .FORCE
 check_spelling_osl: .FORCE
 	@cd "$(BUILD_DIR)" ; \
 	PYTHONIOENCODING=utf_8 $(PYTHON) \
-	    "$(BLENDER_DIR)/source/tools/check_source/check_spelling.py" \
+	    "$(BLENDER_DIR)/tools/check_source/check_spelling.py" \
 	    --cache-file=$(CHECK_SPELLING_CACHE) \
 	    "$(BLENDER_DIR)/intern/cycles/kernel/shaders"

 check_descriptions: .FORCE
 	@$(BLENDER_BIN) --background -noaudio --factory-startup --python \
-	    "$(BLENDER_DIR)/source/tools/check_source/check_descriptions.py"
+	    "$(BLENDER_DIR)/tools/check_source/check_descriptions.py"

 check_deprecated: .FORCE
 	@PYTHONIOENCODING=utf_8 $(PYTHON) \
-	    source/tools/check_source/check_deprecated.py
+	    tools/check_source/check_deprecated.py

 check_licenses: .FORCE
 	@PYTHONIOENCODING=utf_8 $(PYTHON) \
-	    "$(BLENDER_DIR)/source/tools/check_source/check_licenses.py" \
+	    "$(BLENDER_DIR)/tools/check_source/check_licenses.py" \
 	    "--show-headers=$(SHOW_HEADERS)"

 check_pep8: .FORCE
@@ -534,7 +538,7 @@ check_pep8: .FORCE

 check_cmake: .FORCE
 	@PYTHONIOENCODING=utf_8 $(PYTHON) \
-	    source/tools/check_source/check_cmake_consistency.py
+	    tools/check_source/check_cmake_consistency.py


 # -----------------------------------------------------------------------------
@@ -572,8 +576,8 @@ update_code: .FORCE
 	@$(PYTHON) ./build_files/utils/make_update.py --no-libraries

 format: .FORCE
-	@PATH="${LIBDIR}/llvm/bin/:$(PATH)" $(PYTHON) source/tools/utils_maintenance/clang_format_paths.py $(PATHS)
-	@$(PYTHON) source/tools/utils_maintenance/autopep8_format_paths.py --autopep8-command="$(AUTOPEP8)" $(PATHS)
+	@PATH="${LIBDIR}/llvm/bin/:$(PATH)" $(PYTHON) tools/utils_maintenance/clang_format_paths.py $(PATHS)
+	@$(PYTHON) tools/utils_maintenance/autopep8_format_paths.py --autopep8-command="$(AUTOPEP8)" $(PATHS)


 # -----------------------------------------------------------------------------
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ Development
 -----------

 - [Build Instructions](https://wiki.blender.org/wiki/Building_Blender)
- [Code Review & Bug Tracker](https://developer.blender.org)
+- [Code Review & Bug Tracker](https://projects.blender.org)
 - [Developer Forum](https://devtalk.blender.org)
 - [Developer Documentation](https://wiki.blender.org)

--- a/build_files/build_environment/cmake/epoxy.cmake
+++ b/build_files/build_environment/cmake/epoxy.cmake
@@ -10,7 +10,7 @@ ExternalProject_Add(external_epoxy
  URL_HASH ${EPOXY_HASH_TYPE}=${EPOXY_HASH}
  PREFIX ${BUILD_DIR}/epoxy
  PATCH_COMMAND ${PATCH_CMD} -p 1 -N -d ${BUILD_DIR}/epoxy/src/external_epoxy/ < ${PATCH_DIR}/epoxy.diff
-  CONFIGURE_COMMAND ${CONFIGURE_ENV} && ${MESON} setup --prefix ${LIBDIR}/epoxy --default-library ${EPOXY_LIB_TYPE} --libdir lib ${BUILD_DIR}/epoxy/src/external_epoxy-build ${BUILD_DIR}/epoxy/src/external_epoxy -Dtests=false
+  CONFIGURE_COMMAND ${CONFIGURE_ENV} && ${MESON} setup --prefix ${LIBDIR}/epoxy --default-library ${EPOXY_LIB_TYPE} --libdir lib ${BUILD_DIR}/epoxy/src/external_epoxy-build ${BUILD_DIR}/epoxy/src/external_epoxy -Dtests=false ${MESON_BUILD_TYPE}
  BUILD_COMMAND ninja
  INSTALL_COMMAND ninja install
 )
--- a/build_files/build_environment/cmake/fribidi.cmake
+++ b/build_files/build_environment/cmake/fribidi.cmake
@@ -9,7 +9,7 @@ ExternalProject_Add(external_fribidi
  URL_HASH ${FRIBIDI_HASH_TYPE}=${FRIBIDI_HASH}
  DOWNLOAD_DIR ${DOWNLOAD_DIR}
  PREFIX ${BUILD_DIR}/fribidi
-  CONFIGURE_COMMAND ${MESON} setup --prefix ${LIBDIR}/fribidi -Ddocs=false --default-library static --libdir lib ${BUILD_DIR}/fribidi/src/external_fribidi-build ${BUILD_DIR}/fribidi/src/external_fribidi
+  CONFIGURE_COMMAND ${MESON} setup --prefix ${LIBDIR}/fribidi ${MESON_BUILD_TYPE} -Ddocs=false --default-library static --libdir lib ${BUILD_DIR}/fribidi/src/external_fribidi-build ${BUILD_DIR}/fribidi/src/external_fribidi
  BUILD_COMMAND ninja
  INSTALL_COMMAND ninja install
  INSTALL_DIR ${LIBDIR}/fribidi
--- a/build_files/build_environment/cmake/gmp.cmake
+++ b/build_files/build_environment/cmake/gmp.cmake
@@ -22,7 +22,7 @@ elseif(UNIX AND NOT APPLE)
  )
 endif()

-# Boolean crashes with Arm assembly, see T103423.
+# Boolean crashes with Arm assembly, see #103423.
 if(BLENDER_PLATFORM_ARM)
  set(GMP_OPTIONS
    ${GMP_OPTIONS}
--- a/build_files/build_environment/cmake/harfbuzz.cmake
+++ b/build_files/build_environment/cmake/harfbuzz.cmake
@@ -21,6 +21,7 @@ set(HARFBUZZ_EXTRA_OPTIONS
  # Only used for command line utilities,
  # disable as this would add an addition & unnecessary build-dependency.
  -Dcairo=disabled
+  ${MESON_BUILD_TYPE}
 )

 ExternalProject_Add(external_harfbuzz
@@ -59,3 +60,10 @@ if(BUILD_MODE STREQUAL Release AND WIN32)
    DEPENDEES install
  )
 endif()
+
+if(BUILD_MODE STREQUAL Debug AND WIN32)
+  ExternalProject_Add_Step(external_harfbuzz after_install
+    COMMAND ${CMAKE_COMMAND} -E copy ${LIBDIR}/harfbuzz/lib/libharfbuzz.a ${HARVEST_TARGET}/harfbuzz/lib/libharfbuzz_d.lib
+    DEPENDEES install
+  )
+endif()
--- a/build_files/build_environment/cmake/igc.cmake
+++ b/build_files/build_environment/cmake/igc.cmake
@@ -40,7 +40,8 @@ ExternalProject_Add(external_igc_llvm
    ${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0004-OpenCL-support-cl_ext_float_atomics.patch &&
    ${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/clang/0005-OpenCL-Add-cl_khr_integer_dot_product.patch &&
    ${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch &&
-    ${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch
+    ${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0002-Remove-repo-name-in-LLVM-IR.patch &&
+    ${PATCH_CMD} -p 1 -d ${IGC_LLVM_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/llvm/0003-Add-missing-include-limit-in-benchmark.patch
 )
 add_dependencies(
  external_igc_llvm
@@ -55,9 +56,6 @@ ExternalProject_Add(external_igc_spirv_translator
  CONFIGURE_COMMAND echo .
  BUILD_COMMAND echo .
  INSTALL_COMMAND echo .
-  PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0001-update-SPIR-V-headers-for-SPV_INTEL_split_barrier.patch &&
-    ${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0002-Add-support-for-split-barriers-extension-SPV_INTEL_s.patch &&
-    ${PATCH_CMD} -p 1 -d ${IGC_SPIRV_TRANSLATOR_SOURCE_DIR} < ${IGC_OPENCL_CLANG_PATCH_DIR}/spirv/0003-Support-cl_bf16_conversions.patch
 )
 add_dependencies(
  external_igc_spirv_translator
--- a/build_files/build_environment/cmake/mesa.cmake
+++ b/build_files/build_environment/cmake/mesa.cmake
@@ -15,7 +15,7 @@ llvm-config = '${LIBDIR}/llvm/bin/llvm-config'"
 )

 set(MESA_EXTRA_FLAGS
-  -Dbuildtype=release
+  ${MESON_BUILD_TYPE}
  -Dc_args=${MESA_CFLAGS}
  -Dcpp_args=${MESA_CXXFLAGS}
  -Dc_link_args=${MESA_LDFLAGS}
--- a/build_files/build_environment/cmake/openvdb.cmake
+++ b/build_files/build_environment/cmake/openvdb.cmake
@@ -44,13 +44,21 @@ set(OPENVDB_EXTRA_ARGS
  # -DLLVM_DIR=${LIBDIR}/llvm/lib/cmake/llvm
 )

+set(OPENVDB_PATCH ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb.diff)
+if(APPLE)
+  set(OPENVDB_PATCH
+    ${OPENVDB_PATCH} &&
+    ${PATCH_CMD} -p 0 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb_metal.diff
+  )
+endif()
+
 ExternalProject_Add(openvdb
  URL file://${PACKAGE_DIR}/${OPENVDB_FILE}
  DOWNLOAD_DIR ${DOWNLOAD_DIR}
  URL_HASH ${OPENVDB_HASH_TYPE}=${OPENVDB_HASH}
  CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR}
  PREFIX ${BUILD_DIR}/openvdb
-  PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb.diff
+  PATCH_COMMAND ${OPENVDB_PATCH}
  CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/openvdb ${DEFAULT_CMAKE_FLAGS} ${OPENVDB_EXTRA_ARGS}
  INSTALL_DIR ${LIBDIR}/openvdb
 )
--- a/build_files/build_environment/cmake/options.cmake
+++ b/build_files/build_environment/cmake/options.cmake
@@ -16,8 +16,10 @@ message("BuildMode = ${BUILD_MODE}")

 if(BUILD_MODE STREQUAL "Debug")
  set(LIBDIR ${CMAKE_CURRENT_BINARY_DIR}/Debug)
+  set(MESON_BUILD_TYPE -Dbuildtype=debug)
 else()
  set(LIBDIR ${CMAKE_CURRENT_BINARY_DIR}/Release)
+  set(MESON_BUILD_TYPE -Dbuildtype=release)
 endif()

 set(DOWNLOAD_DIR "${CMAKE_CURRENT_BINARY_DIR}/downloads" CACHE STRING "Path for downloaded files")
--- a/build_files/build_environment/cmake/python.cmake
+++ b/build_files/build_environment/cmake/python.cmake
@@ -88,6 +88,19 @@ else()
    export LDFLAGS=${PYTHON_LDFLAGS} &&
    export PKG_CONFIG_PATH=${LIBDIR}/ffi/lib/pkgconfig)

+  # NOTE: untested on APPLE so far.
+  if(NOT APPLE)
+    set(PYTHON_CONFIGURE_EXTRA_ARGS
+      ${PYTHON_CONFIGURE_EXTRA_ARGS}
+      # Used on most release Linux builds (Fedora for e.g.),
+      # increases build times noticeably with the benefit of a modest speedup at runtime.
+      --enable-optimizations
+      # While LTO is OK when building on the same system, it's incompatible across GCC versions,
+      # making it impractical for developers to build against, so keep it disabled.
+      # `--with-lto`
+    )
+  endif()
+
  ExternalProject_Add(external_python
    URL file://${PACKAGE_DIR}/${PYTHON_FILE}
    DOWNLOAD_DIR ${DOWNLOAD_DIR}
--- a/build_files/build_environment/cmake/versions.cmake
+++ b/build_files/build_environment/cmake/versions.cmake
@@ -668,9 +668,9 @@ set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)
 # compiler, the versions used are taken from the following location
 # https://github.com/intel/intel-graphics-compiler/releases

-set(IGC_VERSION 1.0.12149.1)
+set(IGC_VERSION 1.0.13064.7)
 set(IGC_URI https://github.com/intel/intel-graphics-compiler/archive/refs/tags/igc-${IGC_VERSION}.tar.gz)
-set(IGC_HASH 44f67f24e3bc5130f9f062533abf8154782a9d0a992bc19b498639a8521ae836)
+set(IGC_HASH a929abd4cca2b293961ec0437ee4b3b2147bd3b2c8a3c423af78c0c359b2e5ae)
 set(IGC_HASH_TYPE SHA256)
 set(IGC_FILE igc-${IGC_VERSION}.tar.gz)

@@ -690,15 +690,15 @@ set(IGC_LLVM_FILE ${IGC_LLVM_VERSION}.tar.gz)
 #
 # WARNING WARNING WARNING

-set(IGC_OPENCL_CLANG_VERSION 363a5262d8c7cff3fb28f3bdb5d85c8d7e91c1bb)
+set(IGC_OPENCL_CLANG_VERSION ee31812ea8b89d08c2918f045d11a19bd33525c5)
 set(IGC_OPENCL_CLANG_URI https://github.com/intel/opencl-clang/archive/${IGC_OPENCL_CLANG_VERSION}.tar.gz)
-set(IGC_OPENCL_CLANG_HASH aa8cf72bb239722ce8ce44f79413c6887ecc8ca18477dd520aa5c4809756da9a)
+set(IGC_OPENCL_CLANG_HASH 1db6735bbcfaa31e8a9ba39f121d6bafa806ea8919e9f56782d6aaa67771ddda)
 set(IGC_OPENCL_CLANG_HASH_TYPE SHA256)
 set(IGC_OPENCL_CLANG_FILE opencl-clang-${IGC_OPENCL_CLANG_VERSION}.tar.gz)

-set(IGC_VCINTRINSICS_VERSION v0.5.0)
+set(IGC_VCINTRINSICS_VERSION v0.11.0)
 set(IGC_VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/refs/tags/${IGC_VCINTRINSICS_VERSION}.tar.gz)
-set(IGC_VCINTRINSICS_HASH 70bb47c5e32173cf61514941e83ae7c7eb4485e6d2fca60cfa1f50d4f42c41f2)
+set(IGC_VCINTRINSICS_HASH e5acd5626ce7fa6d41ce154c50ac805eda734ee66af94ef28e680ac2ad81bb9f)
 set(IGC_VCINTRINSICS_HASH_TYPE SHA256)
 set(IGC_VCINTRINSICS_FILE vc-intrinsics-${IGC_VCINTRINSICS_VERSION}.tar.gz)

@@ -714,9 +714,9 @@ set(IGC_SPIRV_TOOLS_HASH 6e19900e948944243024aedd0a201baf3854b377b9cc7a386553bc1
 set(IGC_SPIRV_TOOLS_HASH_TYPE SHA256)
 set(IGC_SPIRV_TOOLS_FILE SPIR-V-Tools-${IGC_SPIRV_TOOLS_VERSION}.tar.gz)

-set(IGC_SPIRV_TRANSLATOR_VERSION a31ffaeef77e23d500b3ea3d35e0c42ff5648ad9)
+set(IGC_SPIRV_TRANSLATOR_VERSION d739c01d65ec00dee64dedd40deed805216a7193)
 set(IGC_SPIRV_TRANSLATOR_URI https://github.com/KhronosGroup/SPIRV-LLVM-Translator/archive/${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)
-set(IGC_SPIRV_TRANSLATOR_HASH 9e26c96a45341b8f8af521bacea20e752623346340addd02af95d669f6e89252)
+set(IGC_SPIRV_TRANSLATOR_HASH ddc0cc9ccbe59dadeaf291012d59de142b2e9f2b124dbb634644d39daddaa13e)
 set(IGC_SPIRV_TRANSLATOR_HASH_TYPE SHA256)
 set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.tar.gz)

@@ -724,15 +724,15 @@ set(IGC_SPIRV_TRANSLATOR_FILE SPIR-V-Translator-${IGC_SPIRV_TRANSLATOR_VERSION}.
 ### Intel Graphics Compiler DEPS END ###
 ########################################

-set(GMMLIB_VERSION intel-gmmlib-22.1.8)
+set(GMMLIB_VERSION intel-gmmlib-22.3.0)
 set(GMMLIB_URI https://github.com/intel/gmmlib/archive/refs/tags/${GMMLIB_VERSION}.tar.gz)
-set(GMMLIB_HASH bf23e9a3742b4fb98c7666c9e9b29f3219e4b2fb4d831aaf4eed71f5e2d17368)
+set(GMMLIB_HASH c1f33e1519edfc527127baeb0436b783430dfd256c643130169a3a71dc86aff9)
 set(GMMLIB_HASH_TYPE SHA256)
 set(GMMLIB_FILE ${GMMLIB_VERSION}.tar.gz)

-set(OCLOC_VERSION 22.38.24278)
+set(OCLOC_VERSION 22.49.25018.21)
 set(OCLOC_URI https://github.com/intel/compute-runtime/archive/refs/tags/${OCLOC_VERSION}.tar.gz)
-set(OCLOC_HASH db0c542fccd651e6404b15a74d46027f1ce0eda8dc9e25a40cbb6c0faef257ee)
+set(OCLOC_HASH 92362dae08b503a34e5d3820ed284198c452bcd5e7504d90eb69887b20492c06)
 set(OCLOC_HASH_TYPE SHA256)
 set(OCLOC_FILE ocloc-${OCLOC_VERSION}.tar.gz)

--- a/build_files/build_environment/cmake/wayland.cmake
+++ b/build_files/build_environment/cmake/wayland.cmake
@@ -13,7 +13,7 @@ ExternalProject_Add(external_wayland
  # NOTE: `-lm` is needed for `libxml2` which is a static library that uses `libm.so`,
  # without this, math symbols such as `floor` aren't found.
  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH=${LIBDIR}/expat/lib/pkgconfig:${LIBDIR}/xml2/lib/pkgconfig:${LIBDIR}/ffi/lib/pkgconfig:$PKG_CONFIG_PATH
-                    ${MESON} --prefix ${LIBDIR}/wayland -Ddocumentation=false -Dtests=false -D "c_link_args=-L${LIBDIR}/ffi/lib -lm" . ../external_wayland
+                    ${MESON} --prefix ${LIBDIR}/wayland ${MESON_BUILD_TYPE} -Ddocumentation=false -Dtests=false -D "c_link_args=-L${LIBDIR}/ffi/lib -lm" . ../external_wayland
  BUILD_COMMAND ninja
  INSTALL_COMMAND ninja install
 )
--- a/build_files/build_environment/cmake/wayland_protocols.cmake
+++ b/build_files/build_environment/cmake/wayland_protocols.cmake
@@ -7,7 +7,7 @@ ExternalProject_Add(external_wayland_protocols
  PREFIX ${BUILD_DIR}/wayland-protocols
  # Use `-E` so the `PKG_CONFIG_PATH` can be defined to link against our own WAYLAND.
  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env PKG_CONFIG_PATH=${LIBDIR}/wayland/lib64/pkgconfig:$PKG_CONFIG_PATH
-                    ${MESON} --prefix ${LIBDIR}/wayland-protocols . ../external_wayland_protocols -Dtests=false
+                    ${MESON} --prefix ${LIBDIR}/wayland-protocols ${MESON_BUILD_TYPE} . ../external_wayland_protocols -Dtests=false
  BUILD_COMMAND ninja
  INSTALL_COMMAND ninja install
 )
--- a/build_files/build_environment/cmake/xvidcore.cmake
+++ b/build_files/build_environment/cmake/xvidcore.cmake
@@ -17,11 +17,13 @@ ExternalProject_Add(external_xvidcore
  INSTALL_DIR ${LIBDIR}/xvidcore
 )

-ExternalProject_Add_Step(external_xvidcore after_install
-  COMMAND ${CMAKE_COMMAND} -E rename ${LIBDIR}/xvidcore/lib/xvidcore.a ${LIBDIR}/xvidcore/lib/libxvidcore.a || true
-  COMMAND ${CMAKE_COMMAND} -E remove ${LIBDIR}/xvidcore/lib/xvidcore.dll.a
-  DEPENDEES install
-)
+if(WIN32)
+  ExternalProject_Add_Step(external_xvidcore after_install
+    COMMAND ${CMAKE_COMMAND} -E rename ${LIBDIR}/xvidcore/lib/xvidcore.a ${LIBDIR}/xvidcore/lib/libxvidcore.a || true
+    COMMAND ${CMAKE_COMMAND} -E remove ${LIBDIR}/xvidcore/lib/xvidcore.dll.a
+    DEPENDEES install
+  )
+endif()

 if(MSVC)
  set_target_properties(external_xvidcore PROPERTIES FOLDER Mingw)
--- a/build_files/build_environment/install_deps.sh
+++ b/build_files/build_environment/install_deps.sh
@@ -6615,11 +6615,9 @@ print_info() {
  fi

  if [ -d $INST/oiio ]; then
-    _1="-D WITH_OPENIMAGEIO=ON"
-    _2="-D OPENIMAGEIO_ROOT_DIR=$INST/oiio"
+    _1="-D OPENIMAGEIO_ROOT_DIR=$INST/oiio"
    PRINT "  $_1"
-    PRINT "  $_2"
-    _buildargs="$_buildargs $_1 $_2"
+    _buildargs="$_buildargs $_1"
  fi

  if [ "$OSL_SKIP" = false ]; then
--- a/build_files/build_environment/linux/linux_rocky8_setup.sh
+++ b/build_files/build_environment/linux/linux_rocky8_setup.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env bash
 # SPDX-License-Identifier: GPL-2.0-or-later

-# This script is part of the official build environment, see WIKI page for details.
-# https://wiki.blender.org/wiki/Building_Blender/Other/CentOS7ReleaseEnvironment
+# This script is part of the official build environment, see wiki page for details.
+# https://wiki.blender.org/wiki/Building_Blender/Other/Rocky8ReleaseEnvironment

 set -e

@@ -59,7 +59,7 @@ PACKAGES_FOR_LIBS=(
    automake
    libtool

-    # TODO: why is this needed?
+    # Used to set rpath on shared libraries
    patchelf

    # Builds generated by meson use Ninja for the actual build.
--- a/build_files/build_environment/linux/make_deps_wrapper.sh
+++ b/build_files/build_environment/linux/make_deps_wrapper.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+# This script ensures:
+# - One dependency is built at a time.
+# - That dependency uses all available cores.
+#
+# Without this, simply calling `make -j$(nproc)` from the `${CMAKE_BUILD_DIR}/deps/`
+# directory will build many projects at once.
+#
+# This is undesirable for the following reasons:
+#
+# - The output from projects is mixed together,
+#   making it difficult to track down the cause of a build failure.
+#
+# - Larger dependencies such as LLVM can bottleneck the build process,
+#   making it necessary to cancel the build and manually run build commands in each directory.
+#
+# - Building many projects at once means canceling (Control-C) can lead to the build being in an undefined state.
+#   It's possible canceling happens as a patch is being applied or files are being copied.
+#   (steps that aren't part of the compilation process where it's typically safe to cancel).
+
+if [[ -z "$MY_MAKE_CALL_LEVEL" ]]; then
+  export MY_MAKE_CALL_LEVEL=0
+  export MY_MAKEFLAGS=$MAKEFLAGS
+
+  # Extract the jobs argument (`-jN`, `-j N`, `--jobs=N`).
+  add_next=0
+  for i in "$@"; do
+    case $i in
+      -j*)
+        export MY_JOBS_ARG=$i
+        if [ "$MY_JOBS_ARG" = "-j" ]; then
+          add_next=1
+        fi
+        ;;
+      --jobs=*)
+        shift # past argument=value
+        MY_JOBS_ARG=$i
+        ;;
+      *)
+        if (( add_next == 1 )); then
+          MY_JOBS_ARG="$MY_JOBS_ARG $i"
+          add_next=0
+        fi
+        ;;
+    esac
+  done
+  unset i add_next
+
+  if [[ -z "$MY_JOBS_ARG" ]]; then
+    MY_JOBS_ARG="-j$(nproc)"
+  fi
+  export MY_JOBS_ARG
+  # Support user defined `MAKEFLAGS`.
+  export MAKEFLAGS="$MY_MAKEFLAGS -j1"
+else
+  export MY_MAKE_CALL_LEVEL=$(( MY_MAKE_CALL_LEVEL + 1 ))
+  if (( MY_MAKE_CALL_LEVEL == 1 )); then
+    # Important to set jobs to 1, otherwise user defined jobs argument is used.
+    export MAKEFLAGS="$MY_MAKEFLAGS -j1"
+  elif (( MY_MAKE_CALL_LEVEL == 2 )); then
+    # This is the level used by each sub-project.
+    export MAKEFLAGS="$MY_MAKEFLAGS $MY_JOBS_ARG"
+  fi
+  # Else leave `MY_MAKEFLAGS` flags as-is, avoids setting a high number of jobs on recursive
+  # calls (which may easily run out of memory). Let the job-server handle the rest.
+fi
+
+# Useful for troubleshooting the wrapper.
+# echo "Call level: $MY_MAKE_CALL_LEVEL, args=$@".
+
+# Call actual make but ensure recursive calls run via this script.
+exec make MAKE="$0" "$@"
--- a/build_files/build_environment/patches/igc_opencl_clang.diff
+++ b/build_files/build_environment/patches/igc_opencl_clang.diff
@@ -1,7 +1,7 @@
 diff -Naur external_igc_opencl_clang.orig/CMakeLists.txt external_igc_opencl_clang/CMakeLists.txt
 --- external_igc_opencl_clang.orig/CMakeLists.txt	2022-03-16 05:51:10 -0600
 +++ external_igc_opencl_clang/CMakeLists.txt	2022-05-23 10:40:09 -0600
-@@ -126,22 +126,24 @@
+@@ -147,22 +147,24 @@
         )
     endif()
 
--- a/build_files/build_environment/patches/openvdb_metal.diff
+++ b/build_files/build_environment/patches/openvdb_metal.diff
--- a/build_files/cmake/buildinfo.cmake
+++ b/build_files/cmake/buildinfo.cmake
@@ -23,19 +23,19 @@ if(EXISTS ${SOURCE_DIR}/.git)

  if(MY_WC_BRANCH STREQUAL "HEAD")
    # Detached HEAD, check whether commit hash is reachable
-    # in the master branch
+    # in the main branch
    execute_process(COMMAND git rev-parse --short=12 HEAD
                    WORKING_DIRECTORY ${SOURCE_DIR}
                    OUTPUT_VARIABLE MY_WC_HASH
                    OUTPUT_STRIP_TRAILING_WHITESPACE)

-    execute_process(COMMAND git branch --list master blender-v* --contains ${MY_WC_HASH}
+    execute_process(COMMAND git branch --list main blender-v* --contains ${MY_WC_HASH}
                    WORKING_DIRECTORY ${SOURCE_DIR}
                    OUTPUT_VARIABLE _git_contains_check
                    OUTPUT_STRIP_TRAILING_WHITESPACE)

    if(NOT _git_contains_check STREQUAL "")
-      set(MY_WC_BRANCH "master")
+      set(MY_WC_BRANCH "main")
    else()
      execute_process(COMMAND git show-ref --tags -d
                      WORKING_DIRECTORY ${SOURCE_DIR}
@@ -48,7 +48,7 @@ if(EXISTS ${SOURCE_DIR}/.git)
                      OUTPUT_STRIP_TRAILING_WHITESPACE)

      if(_git_tag_hashes MATCHES "${_git_head_hash}")
-        set(MY_WC_BRANCH "master")
+        set(MY_WC_BRANCH "main")
      else()
        execute_process(COMMAND git branch --contains ${MY_WC_HASH}
                        WORKING_DIRECTORY ${SOURCE_DIR}
--- a/build_files/cmake/config/blender_lite.cmake
+++ b/build_files/cmake/config/blender_lite.cmake
@@ -36,6 +36,7 @@ set(WITH_IMAGE_WEBP          OFF CACHE BOOL "" FORCE)
 set(WITH_INPUT_IME           OFF CACHE BOOL "" FORCE)
 set(WITH_INPUT_NDOF          OFF CACHE BOOL "" FORCE)
 set(WITH_INTERNATIONAL       OFF CACHE BOOL "" FORCE)
+set(WITH_IO_PLY              OFF CACHE BOOL "" FORCE)
 set(WITH_IO_STL              OFF CACHE BOOL "" FORCE)
 set(WITH_IO_WAVEFRONT_OBJ    OFF CACHE BOOL "" FORCE)
 set(WITH_IO_GPENCIL          OFF CACHE BOOL "" FORCE)
@@ -52,7 +53,6 @@ set(WITH_OPENAL              OFF CACHE BOOL "" FORCE)
 set(WITH_OPENCOLLADA         OFF CACHE BOOL "" FORCE)
 set(WITH_OPENCOLORIO         OFF CACHE BOOL "" FORCE)
 set(WITH_OPENIMAGEDENOISE    OFF CACHE BOOL "" FORCE)
-set(WITH_OPENIMAGEIO         OFF CACHE BOOL "" FORCE)
 set(WITH_OPENMP              OFF CACHE BOOL "" FORCE)
 set(WITH_OPENSUBDIV          OFF CACHE BOOL "" FORCE)
 set(WITH_OPENVDB             OFF CACHE BOOL "" FORCE)
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -85,7 +85,7 @@ if(NOT APPLE)
  set(WITH_CYCLES_DEVICE_OPTIX    ON  CACHE BOOL "" FORCE)
  set(WITH_CYCLES_CUDA_BINARIES   ON  CACHE BOOL "" FORCE)
  set(WITH_CYCLES_CUBIN_COMPILER  OFF CACHE BOOL "" FORCE)
-  set(WITH_CYCLES_HIP_BINARIES    ON  CACHE BOOL "" FORCE)
+  set(WITH_CYCLES_HIP_BINARIES    OFF CACHE BOOL "" FORCE)
  set(WITH_CYCLES_DEVICE_ONEAPI   ON  CACHE BOOL "" FORCE)
  set(WITH_CYCLES_ONEAPI_BINARIES ON  CACHE BOOL "" FORCE)
 endif()
--- a/build_files/cmake/example_scripts/cmake_linux_install.sh
+++ b/build_files/cmake/example_scripts/cmake_linux_install.sh
@@ -11,11 +11,11 @@
 mkdir ~/blender-git
 cd ~/blender-git

-git clone http://git.blender.org/blender.git
+git clone https://projects.blender.org/blender/blender.git
 cd blender
 git submodule update --init --recursive
-git submodule foreach git checkout master
-git submodule foreach git pull --rebase origin master
+git submodule foreach git checkout main
+git submodule foreach git pull --rebase origin main

 # create build dir
 mkdir ~/blender-git/build-cmake
@@ -35,7 +35,7 @@ ln -s ~/blender-git/build-cmake/bin/blender ~/blender-git/blender/blender.bin
 echo ""
 echo "* Useful Commands *"
 echo "   Run Blender: ~/blender-git/blender/blender.bin"
-echo "   Update Blender: git pull --rebase; git submodule foreach git pull --rebase origin master"
+echo "   Update Blender: git pull --rebase; git submodule foreach git pull --rebase origin main"
 echo "   Reconfigure Blender: cd ~/blender-git/build-cmake ; cmake ."
 echo "   Build Blender: cd ~/blender-git/build-cmake ; make"
 echo ""
--- a/build_files/cmake/macros.cmake
+++ b/build_files/cmake/macros.cmake
@@ -544,7 +544,7 @@ endfunction()
 function(setup_platform_linker_libs
  target
  )
-  # jemalloc must be early in the list, to be before pthread (see T57998)
+  # jemalloc must be early in the list, to be before pthread (see #57998).
  if(WITH_MEM_JEMALLOC)
    target_link_libraries(${target} ${JEMALLOC_LIBRARIES})
  endif()
@@ -1090,7 +1090,7 @@ function(msgfmt_simple
  add_custom_command(
    OUTPUT  ${_file_to}
    COMMAND ${CMAKE_COMMAND} -E make_directory ${_file_to_path}
-    COMMAND "$<TARGET_FILE:msgfmt>" ${_file_from} ${_file_to}
+    COMMAND ${CMAKE_COMMAND} -E env ${PLATFORM_ENV_BUILD} "$<TARGET_FILE:msgfmt>" ${_file_from} ${_file_to}
    DEPENDS msgfmt ${_file_from})

  set_source_files_properties(${_file_to} PROPERTIES GENERATED TRUE)
--- a/build_files/cmake/platform/platform_apple.cmake
+++ b/build_files/cmake/platform/platform_apple.cmake
@@ -270,19 +270,7 @@ if(WITH_PUGIXML)
  find_package(PugiXML REQUIRED)
 endif()

-if(WITH_OPENIMAGEIO)
-  find_package(OpenImageIO)
-  list(APPEND OPENIMAGEIO_LIBRARIES
-    ${PNG_LIBRARIES}
-    ${JPEG_LIBRARIES}
-    ${TIFF_LIBRARY}
-    ${OPENEXR_LIBRARIES}
-    ${OPENJPEG_LIBRARIES}
-    ${ZLIB_LIBRARIES}
-  )
-  set(OPENIMAGEIO_DEFINITIONS "-DOIIO_STATIC_BUILD")
-  set(OPENIMAGEIO_IDIFF "${LIBDIR}/openimageio/bin/idiff")
-endif()
+find_package(OpenImageIO REQUIRED)
 add_bundled_libraries(openimageio/lib)

 if(WITH_OPENCOLORIO)
@@ -440,7 +428,7 @@ string(APPEND PLATFORM_LINKFLAGS " -stdlib=libc++")
 # Make stack size more similar to Embree, required for Embree.
 string(APPEND PLATFORM_LINKFLAGS_EXECUTABLE " -Wl,-stack_size,0x100000")

-# Suppress ranlib "has no symbols" warnings (workaround for T48250)
+# Suppress ranlib "has no symbols" warnings (workaround for #48250).
 set(CMAKE_C_ARCHIVE_CREATE   "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
 set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
 # llvm-ranlib doesn't support this flag. Xcode's libtool does.
--- a/build_files/cmake/platform/platform_unix.cmake
+++ b/build_files/cmake/platform/platform_unix.cmake
@@ -438,32 +438,7 @@ if(WITH_IMAGE_WEBP)
  set_and_warn_library_found("WebP" WEBP_FOUND WITH_IMAGE_WEBP)
 endif()

-if(WITH_OPENIMAGEIO)
-  find_package_wrapper(OpenImageIO)
-  set(OPENIMAGEIO_LIBRARIES
-    ${OPENIMAGEIO_LIBRARIES}
-    ${PNG_LIBRARIES}
-    ${JPEG_LIBRARIES}
-    ${ZLIB_LIBRARIES}
-  )
-
-  set(OPENIMAGEIO_DEFINITIONS "")
-
-  if(WITH_BOOST)
-    list(APPEND OPENIMAGEIO_LIBRARIES "${BOOST_LIBRARIES}")
-  endif()
-  if(WITH_IMAGE_TIFF)
-    list(APPEND OPENIMAGEIO_LIBRARIES "${TIFF_LIBRARY}")
-  endif()
-  if(WITH_IMAGE_OPENEXR)
-    list(APPEND OPENIMAGEIO_LIBRARIES "${OPENEXR_LIBRARIES}")
-  endif()
-  if(WITH_IMAGE_WEBP)
-    list(APPEND OPENIMAGEIO_LIBRARIES "${WEBP_LIBRARIES}")
-  endif()
-
-  set_and_warn_library_found("OPENIMAGEIO" OPENIMAGEIO_FOUND WITH_OPENIMAGEIO)
-endif()
+find_package_wrapper(OpenImageIO REQUIRED)
 add_bundled_libraries(openimageio/lib)

 if(WITH_OPENCOLORIO)
--- a/build_files/cmake/platform/platform_win32.cmake
+++ b/build_files/cmake/platform/platform_win32.cmake
@@ -121,7 +121,7 @@ if(WITH_WINDOWS_BUNDLE_CRT)
  include(InstallRequiredSystemLibraries)

  # ucrtbase(d).dll cannot be in the manifest, due to the way windows 10 handles
-  # redirects for this dll, for details see T88813.
+  # redirects for this dll, for details see #88813.
  foreach(lib ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS})
    string(FIND ${lib} "ucrtbase" pos)
    if(NOT pos EQUAL -1)
@@ -295,7 +295,7 @@ unset(MATERIALX_LIB_FOLDER_EXISTS)
 if(NOT MSVC_CLANG                  AND # Available with MSVC 15.7+ but not for CLANG.
   NOT WITH_WINDOWS_SCCACHE        AND # And not when sccache is enabled
   NOT VS_CLANG_TIDY)                  # Clang-tidy does not like these options
-  add_compile_options(/experimental:external /external:templates- /external:I "${LIBDIR}" /external:W0)
+  add_compile_options(/experimental:external /external:I "${LIBDIR}" /external:W0)
 endif()

 # Add each of our libraries to our cmake_prefix_path so find_package() could work
@@ -522,6 +522,28 @@ if(WITH_PYTHON)
  set(PYTHON_LIBRARIES debug "${PYTHON_LIBRARY_DEBUG}" optimized "${PYTHON_LIBRARY}" )
 endif()

+if(NOT WITH_WINDOWS_FIND_MODULES)
+  # even if boost is off, we still need to install the dlls when we use our lib folder since
+  # some of the other dependencies may need them. For this to work, BOOST_VERSION,
+  # BOOST_POSTFIX, and BOOST_DEBUG_POSTFIX need to be set.
+  set(BOOST ${LIBDIR}/boost)
+  set(BOOST_INCLUDE_DIR ${BOOST}/include)
+  set(BOOST_LIBPATH ${BOOST}/lib)
+  set(BOOST_VERSION_HEADER ${BOOST_INCLUDE_DIR}/boost/version.hpp)
+  if(EXISTS ${BOOST_VERSION_HEADER})
+    file(STRINGS "${BOOST_VERSION_HEADER}" BOOST_LIB_VERSION REGEX "#define BOOST_LIB_VERSION ")
+    if(BOOST_LIB_VERSION MATCHES "#define BOOST_LIB_VERSION \"([0-9_]+)\"")
+      set(BOOST_VERSION "${CMAKE_MATCH_1}")
+    endif()
+  endif()
+  if(NOT BOOST_VERSION)
+    message(FATAL_ERROR "Unable to determine Boost version")
+  endif()
+  set(BOOST_POSTFIX "vc142-mt-x64-${BOOST_VERSION}")
+  set(BOOST_DEBUG_POSTFIX "vc142-mt-gyd-x64-${BOOST_VERSION}")
+  set(BOOST_PREFIX "")
+endif()
+
 if(WITH_BOOST)
  if(WITH_CYCLES AND WITH_CYCLES_OSL)
    set(boost_extra_libs wave)
@@ -537,22 +559,6 @@ if(WITH_BOOST)
  endif()
  if(NOT Boost_FOUND)
    warn_hardcoded_paths(BOOST)
-    set(BOOST ${LIBDIR}/boost)
-    set(BOOST_INCLUDE_DIR ${BOOST}/include)
-    set(BOOST_LIBPATH ${BOOST}/lib)
-    set(BOOST_VERSION_HEADER ${BOOST_INCLUDE_DIR}/boost/version.hpp)
-    if(EXISTS ${BOOST_VERSION_HEADER})
-      file(STRINGS "${BOOST_VERSION_HEADER}" BOOST_LIB_VERSION REGEX "#define BOOST_LIB_VERSION ")
-      if(BOOST_LIB_VERSION MATCHES "#define BOOST_LIB_VERSION \"([0-9_]+)\"")
-        set(BOOST_VERSION "${CMAKE_MATCH_1}")
-      endif()
-    endif()
-    if(NOT BOOST_VERSION)
-      message(FATAL_ERROR "Unable to determine Boost version")
-    endif()
-    set(BOOST_POSTFIX "vc142-mt-x64-${BOOST_VERSION}")
-    set(BOOST_DEBUG_POSTFIX "vc142-mt-gyd-x64-${BOOST_VERSION}")
-    set(BOOST_PREFIX "")
    # This is file new in 3.4 if it does not exist, assume we are building against 3.3 libs
    set(BOOST_34_TRIGGER_FILE ${BOOST_LIBPATH}/${BOOST_PREFIX}boost_python310-${BOOST_DEBUG_POSTFIX}.lib)
    if(NOT EXISTS ${BOOST_34_TRIGGER_FILE})
@@ -602,25 +608,18 @@ if(WITH_BOOST)
  set(BOOST_DEFINITIONS "-DBOOST_ALL_NO_LIB")
 endif()

-if(WITH_OPENIMAGEIO)
-  windows_find_package(OpenImageIO)
-  if(NOT OpenImageIO_FOUND)
-    set(OPENIMAGEIO ${LIBDIR}/OpenImageIO)
-    set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib)
-    set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO}/include)
-    set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR})
-    set(OIIO_OPTIMIZED optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO.lib optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util.lib)
-    set(OIIO_DEBUG debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_d.lib debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util_d.lib)
-    set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG})
-  endif()
-  set(OPENIMAGEIO_DEFINITIONS "-DUSE_TBB=0")
+windows_find_package(OpenImageIO)
+if(NOT OpenImageIO_FOUND)
+  set(OPENIMAGEIO ${LIBDIR}/OpenImageIO)
+  set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib)
+  set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO}/include)
+  set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR})
+  set(OIIO_OPTIMIZED optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO.lib optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util.lib)
+  set(OIIO_DEBUG debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_d.lib debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util_d.lib)
+  set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG})
  set(OPENIMAGEIO_IDIFF "${OPENIMAGEIO}/bin/idiff.exe")
-  # If the .dll does not exist, assume it is a static OIIO
-  if(NOT EXISTS ${OPENIMAGEIO}/bin/OpenImageIO.dll)
-    add_definitions(-DOIIO_STATIC_DEFINE)
-  endif()
-  add_definitions(-DOIIO_NO_SSE=1)
 endif()
+add_definitions(-DOIIO_NO_SSE=1)

 if(WITH_LLVM)
  set(LLVM_ROOT_DIR ${LIBDIR}/llvm CACHE PATH "Path to the LLVM installation")
@@ -901,11 +900,11 @@ endif()

 if(WINDOWS_PYTHON_DEBUG)
  # Include the system scripts in the blender_python_system_scripts project.
-  file(GLOB_RECURSE inFiles "${CMAKE_SOURCE_DIR}/release/scripts/*.*" )
+  file(GLOB_RECURSE inFiles "${CMAKE_SOURCE_DIR}/scripts/*.*" )
  add_custom_target(blender_python_system_scripts SOURCES ${inFiles})
  foreach(_source IN ITEMS ${inFiles})
    get_filename_component(_source_path "${_source}" PATH)
-    string(REPLACE "${CMAKE_SOURCE_DIR}/release/scripts/" "" _source_path "${_source_path}")
+    string(REPLACE "${CMAKE_SOURCE_DIR}/scripts/" "" _source_path "${_source_path}")
    string(REPLACE "/" "\\" _group_path "${_source_path}")
    source_group("${_group_path}" FILES "${_source}")
  endforeach()
@@ -940,7 +939,7 @@ if(WINDOWS_PYTHON_DEBUG)
    file(WRITE ${USER_PROPS_FILE} "<?xml version=\"1.0\" encoding=\"utf-8\"?>
 <Project DefaultTargets=\"Build\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">
  <PropertyGroup>
-    <LocalDebuggerCommandArguments>-con --env-system-scripts \"${CMAKE_SOURCE_DIR}/release/scripts\" </LocalDebuggerCommandArguments>
+    <LocalDebuggerCommandArguments>-con --env-system-scripts \"${CMAKE_SOURCE_DIR}/scripts\" </LocalDebuggerCommandArguments>
  </PropertyGroup>
 </Project>")
  endif()
@@ -1040,7 +1039,7 @@ endif()

 # Environment variables to run precompiled executables that needed libraries.
 list(JOIN PLATFORM_BUNDLED_LIBRARY_DIRS ";" _library_paths)
-set(PLATFORM_ENV_BUILD_DIRS "${LIBDIR}/OpenImageIO/bin\;${LIBDIR}/boost/lib\;${LIBDIR}/openexr/bin\;${LIBDIR}/imath/bin\;${PATH}")
+set(PLATFORM_ENV_BUILD_DIRS "${LIBDIR}/tbb/bin\;${LIBDIR}/OpenImageIO/bin\;${LIBDIR}/boost/lib\;${LIBDIR}/openexr/bin\;${LIBDIR}/imath/bin\;${PATH}")
 set(PLATFORM_ENV_BUILD "PATH=${PLATFORM_ENV_BUILD_DIRS}")
 # Install needs the additional folders from PLATFORM_ENV_BUILD_DIRS as well, as tools like idiff and abcls use the release mode dlls
 set(PLATFORM_ENV_INSTALL "PATH=${CMAKE_INSTALL_PREFIX_WITH_CONFIG}/blender.shared/\;${PLATFORM_ENV_BUILD_DIRS}\;$ENV{PATH}")
--- a/build_files/cmake/project_info.py
+++ b/build_files/cmake/project_info.py
@@ -142,7 +142,7 @@ def cmake_advanced_info() -> Union[Tuple[List[str], List[Tuple[str, str]]], Tupl

    make_exe = cmake_cache_var("CMAKE_MAKE_PROGRAM")
    if make_exe is None:
-        print("Make command not found in: %r not found" % project_path)
+        print("Make command not found: CMAKE_MAKE_PROGRAM")
        return None, None

    make_exe_basename = os.path.basename(make_exe)
--- a/build_files/config/pipeline_config.yaml
+++ b/build_files/config/pipeline_config.yaml
@@ -1,53 +1,3 @@
-#
-# Used by Buildbot build pipeline make_update.py script only for now
-# We intended to update the make_update.py in the branches to use this file eventually
-#
-update-code:
-    git:
-        submodules:
-        -   branch: master
-            commit_id: HEAD
-            path: release/scripts/addons
-        -   branch: master
-            commit_id: HEAD
-            path: release/scripts/addons_contrib
-        -   branch: master
-            commit_id: HEAD
-            path: release/datafiles/locale
-        -   branch: master
-            commit_id: HEAD
-            path: source/tools
-    svn:
-        libraries:
-            darwin-arm64:
-                branch: trunk
-                commit_id: HEAD
-                path: lib/darwin_arm64
-            darwin-x86_64:
-                branch: trunk
-                commit_id: HEAD
-                path: lib/darwin
-            linux-x86_64:
-                branch: trunk
-                commit_id: HEAD
-                path: lib/linux_x86_64_glibc_228
-            windows-amd64:
-                branch: trunk
-                commit_id: HEAD
-                path: lib/win64_vc15
-        tests:
-            branch: trunk
-            commit_id: HEAD
-            path: lib/tests
-        benchmarks:
-            branch: trunk
-            commit_id: HEAD
-            path: lib/benchmarks
-        assets:
-            branch: trunk
-            commit_id: HEAD
-            path: lib/assets
-
 #
 # Buildbot only configs
 #
--- a/build_files/utils/make_bpy_wheel.py
+++ b/build_files/utils/make_bpy_wheel.py
@@ -58,7 +58,7 @@ Each Blender release supports one Python version, and the package is only compat
 ## Source Code

 * [Releases](https://download.blender.org/source/)
-* Repository: [git.blender.org/blender.git](https://git.blender.org/gitweb/gitweb.cgi/blender.git)
+* Repository: [projects.blender.org/blender/blender.git](https://projects.blender.org/blender/blender)

 ## Credits

--- a/build_files/utils/make_source_archive.py
+++ b/build_files/utils/make_source_archive.py
@@ -135,7 +135,7 @@ def submodules_to_manifest(
        submodule = line.split()[1]

        # Don't use native slashes as GIT for MS-Windows outputs forward slashes.
-        if skip_addon_contrib and submodule == "release/scripts/addons_contrib":
+        if skip_addon_contrib and submodule == "scripts/addons_contrib":
            continue

        for path in git_ls_files(blender_srcdir / submodule):
--- a/build_files/utils/make_update.py
+++ b/build_files/utils/make_update.py
@@ -16,14 +16,28 @@ import shutil
 import sys

 import make_utils
+from pathlib import Path
 from make_utils import call, check_output
+from urllib.parse import urljoin

 from typing import (
    List,
+    Iterable,
    Optional,
 )


+class Submodule:
+    path: str
+    branch: str
+    branch_fallback: str
+
+    def __init__(self, path: str, branch: str, branch_fallback: str) -> None:
+        self.path = path
+        self.branch = branch
+        self.branch_fallback = branch_fallback
+
+
 def print_stage(text: str) -> None:
    print("")
    print(text)
@@ -42,6 +56,7 @@ def parse_arguments() -> argparse.Namespace:
    parser.add_argument("--svn-branch", default=None)
    parser.add_argument("--git-command", default="git")
    parser.add_argument("--use-linux-libraries", action="store_true")
+    parser.add_argument("--architecture", type=str, choices=("x86_64", "amd64", "arm64",))
    return parser.parse_args()


@@ -51,6 +66,19 @@ def get_blender_git_root() -> str:
 # Setup for precompiled libraries and tests from svn.


+def get_effective_architecture(args: argparse.Namespace) -> str:
+    architecture = args.architecture
+    if architecture:
+        assert isinstance(architecture, str)
+        return architecture
+
+    # Check platform.version to detect arm64 with x86_64 python binary.
+    if "ARM64" in platform.version():
+        return "arm64"
+
+    return platform.machine().lower()
+
+
 def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None:
    svn_non_interactive = [args.svn_command, '--non-interactive']

@@ -58,11 +86,11 @@ def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None
    svn_url = make_utils.svn_libraries_base_url(release_version, args.svn_branch)

    # Checkout precompiled libraries
+    architecture = get_effective_architecture(args)
    if sys.platform == 'darwin':
-        # Check platform.version to detect arm64 with x86_64 python binary.
-        if platform.machine() == 'arm64' or ('ARM64' in platform.version()):
+        if architecture == 'arm64':
            lib_platform = "darwin_arm64"
-        elif platform.machine() == 'x86_64':
+        elif architecture == 'x86_64':
            lib_platform = "darwin"
        else:
            lib_platform = None
@@ -170,7 +198,7 @@ def git_update_skip(args: argparse.Namespace, check_remote_exists: bool = True)
        return "rebase or merge in progress, complete it first"

    # Abort if uncommitted changes.
-    changes = check_output([args.git_command, 'status', '--porcelain', '--untracked-files=no'])
+    changes = check_output([args.git_command, 'status', '--porcelain', '--untracked-files=no', '--ignore-submodules'])
    if len(changes) != 0:
        return "you have unstaged changes"

@@ -184,97 +212,291 @@ def git_update_skip(args: argparse.Namespace, check_remote_exists: bool = True)
    return ""


+def use_upstream_workflow(args: argparse.Namespace) -> bool:
+    return make_utils.git_remote_exist(args.git_command, "upstream")
+
+
+def work_tree_update_upstream_workflow(args: argparse.Namespace, use_fetch: bool = True) -> str:
+    """
+    Update the Blender repository using the Github style of fork organization
+
+    Returns true if the current local branch has been updated to the upstream state.
+    Otherwise false is returned.
+    """
+
+    branch_name = make_utils.git_branch(args.git_command)
+
+    if use_fetch:
+        call((args.git_command, "fetch", "upstream"))
+
+    upstream_branch = f"upstream/{branch_name}"
+    if not make_utils.git_branch_exists(args.git_command, upstream_branch):
+        return "no_branch"
+
+    retcode = call((args.git_command, "merge", "--ff-only", upstream_branch), exit_on_error=False)
+    if retcode != 0:
+        return "Unable to fast forward\n"
+
+    return ""
+
+
+def work_tree_update(args: argparse.Namespace, use_fetch: bool = True) -> str:
+    """
+    Update the Git working tree using the best strategy
+
+    This function detects whether it is a github style of fork remote organization is used, or
+    is it a repository which origin is an upstream.
+    """
+
+    if use_upstream_workflow(args):
+        message = work_tree_update_upstream_workflow(args, use_fetch)
+        if message != "no_branch":
+            return message
+
+        # If there is upstream configured but the local branch is not in the upstream, try to
+        # update the branch from the fork.
+
+    update_command = [args.git_command, "pull", "--rebase"]
+
+    call(update_command)
+
+    return ""
+
+
 # Update blender repository.
-def blender_update(args: argparse.Namespace) -> None:
+def blender_update(args: argparse.Namespace) -> str:
    print_stage("Updating Blender Git Repository")
-    call([args.git_command, "pull", "--rebase"])
+
+    return work_tree_update(args)


-# Update submodules.
-def submodules_update(
-        args: argparse.Namespace,
-        release_version: Optional[str],
-        branch: Optional[str],
-) -> str:
-    print_stage("Updating Submodules")
-    if make_utils.command_missing(args.git_command):
-        sys.stderr.write("git not found, can't update code\n")
-        sys.exit(1)
+def resolve_external_url(blender_url: str, repo_name: str) -> str:
+    return urljoin(blender_url + "/", "../" + repo_name)

-    # Update submodules to appropriate given branch,
-    # falling back to master if none is given and/or found in a sub-repository.
-    branch_fallback = "master"
+
+def external_script_copy_old_submodule_over(args: argparse.Namespace, directory_name: str) -> None:
+    blender_git_root = Path(get_blender_git_root())
+    scripts_dir = blender_git_root / "scripts"
+    external_dir = scripts_dir / directory_name
+
+    old_submodule_relative_dir = Path("release") / "scripts" / directory_name
+    print(f"Moving {old_submodule_relative_dir} to scripts/{directory_name} ...")
+
+    old_submodule_dir = blender_git_root / old_submodule_relative_dir
+    shutil.move(old_submodule_dir, external_dir)
+
+    # Remove old ".git" which is a file with path to a submodule bare repo inside of main
+    # repo .git/modules directory.
+    (external_dir / ".git").unlink()
+
+    bare_repo_relative_dir = Path(".git") / "modules" / "release" / "scripts" / directory_name
+    print(f"Copying {bare_repo_relative_dir} to scripts/{directory_name}/.git ...")
+    bare_repo_dir = blender_git_root / bare_repo_relative_dir
+    shutil.copytree(bare_repo_dir, external_dir / ".git")
+
+    git_config = external_dir / ".git" / "config"
+    call((args.git_command, "config", "--file", str(git_config), "--unset", "core.worktree"))
+
+
+def external_script_initialize_if_needed(args: argparse.Namespace,
+                                         repo_name: str,
+                                         directory_name: str) -> None:
+    """Initialize checkout of an external repository scripts directory"""
+
+    blender_git_root = Path(get_blender_git_root())
+    blender_dot_git = blender_git_root / ".git"
+    scripts_dir = blender_git_root / "scripts"
+    external_dir = scripts_dir / directory_name
+
+    if external_dir.exists():
+        return
+
+    print(f"Initializing scripts/{directory_name} ...")
+
+    old_submodule_dot_git = blender_git_root / "release" / "scripts" / directory_name / ".git"
+    if old_submodule_dot_git.exists() and blender_dot_git.is_dir():
+        external_script_copy_old_submodule_over(args, directory_name)
+        return
+
+    origin_name = "upstream" if use_upstream_workflow(args) else "origin"
+    blender_url = make_utils.git_get_remote_url(args.git_command, origin_name)
+    external_url = resolve_external_url(blender_url, repo_name)
+
+    call((args.git_command, "clone", "--origin", origin_name, external_url, str(external_dir)))
+
+
+def external_script_add_origin_if_needed(args: argparse.Namespace,
+                                         repo_name: str,
+                                         directory_name: str) -> None:
+    """
+    Add remote called 'origin' if there is a fork of the external repository available
+
+    This is only done when using Github style upstream workflow in the main repository.
+    """
+
+    if not use_upstream_workflow(args):
+        return
+
+    cwd = os.getcwd()
+
+    blender_git_root = Path(get_blender_git_root())
+    scripts_dir = blender_git_root / "scripts"
+    external_dir = scripts_dir / directory_name
+
+    origin_blender_url = make_utils.git_get_remote_url(args.git_command, "origin")
+    origin_external_url = resolve_external_url(origin_blender_url, repo_name)
+
+    try:
+        os.chdir(external_dir)
+
+        if (make_utils.git_remote_exist(args.git_command, "origin") or
+                not make_utils.git_remote_exist(args.git_command, "upstream")):
+            return
+
+        if not make_utils.git_is_remote_repository(args.git_command, origin_external_url):
+            return
+
+        print(f"Adding origin remote to {directory_name} pointing to fork ...")
+
+        # Non-obvious tricks to introduce the new remote called "origin" to the existing
+        # submodule configuration.
+        #
+        # This is all within the content of creating a fork of a submodule after `make update`
+        # has been run and possibly local branches tracking upstream were added.
+        #
+        # The idea here goes as following:
+        #
+        #  - Rename remote "upstream" to "origin", which takes care of changing the names of
+        #    remotes the local branches are tracking.
+        #
+        #  - Change the URL to the "origin", which so was was still pointing to upstream.
+        #
+        #  - Re-introduce the "upstream" remote, with the same URL as it had prior to rename.
+
+        upstream_url = make_utils.git_get_remote_url(args.git_command, "upstream")
+
+        call((args.git_command, "remote", "rename", "upstream", "origin"))
+        make_utils.git_set_config(args.git_command, f"remote.origin.url", origin_external_url)
+
+        call((args.git_command, "remote", "add", "upstream", upstream_url))
+    finally:
+        os.chdir(cwd)
+
+    return
+
+
+def external_scripts_update(args: argparse.Namespace,
+                            repo_name: str,
+                            directory_name: str,
+                            branch: Optional[str]) -> str:
+    """Update a single external checkout with the given name in the scripts folder"""
+
+    external_script_initialize_if_needed(args, repo_name, directory_name)
+    external_script_add_origin_if_needed(args, repo_name, directory_name)
+
+    print(f"Updating scripts/{directory_name} ...")
+
+    cwd = os.getcwd()
+
+    blender_git_root = Path(get_blender_git_root())
+    scripts_dir = blender_git_root / "scripts"
+    external_dir = scripts_dir / directory_name
+
+    # Update externals to appropriate given branch, falling back to main if none is given and/or
+    # found in a sub-repository.
+    branch_fallback = "main"
    if not branch:
        branch = branch_fallback

-    submodules = [
-        ("release/scripts/addons", branch, branch_fallback),
-        ("release/scripts/addons_contrib", branch, branch_fallback),
-        ("release/datafiles/locale", branch, branch_fallback),
-        ("source/tools", branch, branch_fallback),
-    ]
-
-    # Initialize submodules only if needed.
-    for submodule_path, submodule_branch, submodule_branch_fallback in submodules:
-        if not os.path.exists(os.path.join(submodule_path, ".git")):
-            call([args.git_command, "submodule", "update", "--init", "--recursive"])
-            break
-
-    # Checkout appropriate branch and pull changes.
    skip_msg = ""
-    for submodule_path, submodule_branch, submodule_branch_fallback in submodules:
-        cwd = os.getcwd()
-        try:
-            os.chdir(submodule_path)
-            msg = git_update_skip(args, check_remote_exists=False)
-            if msg:
-                skip_msg += submodule_path + " skipped: " + msg + "\n"
-            else:
-                # Find a matching branch that exists.
-                call([args.git_command, "fetch", "origin"])
-                if make_utils.git_branch_exists(args.git_command, submodule_branch):
-                    pass
-                elif make_utils.git_branch_exists(args.git_command, submodule_branch_fallback):
-                    submodule_branch = submodule_branch_fallback
-                else:
-                    # Skip.
-                    submodule_branch = ""

-                # Switch to branch and pull.
-                if submodule_branch:
-                    if make_utils.git_branch(args.git_command) != submodule_branch:
+    try:
+        os.chdir(external_dir)
+        msg = git_update_skip(args, check_remote_exists=False)
+        if msg:
+            skip_msg += directory_name + " skipped: " + msg + "\n"
+        else:
+            # Find a matching branch that exists.
+            for remote in ("origin", "upstream"):
+                if make_utils.git_remote_exist(args.git_command, remote):
+                    call([args.git_command, "fetch", remote])
+
+            submodule_branch = branch
+
+            if make_utils.git_branch_exists(args.git_command, submodule_branch):
+                pass
+            elif make_utils.git_branch_exists(args.git_command, branch_fallback):
+                submodule_branch = branch_fallback
+            else:
+                # Skip.
+                submodule_branch = ""
+
+            # Switch to branch and pull.
+            if submodule_branch:
+                if make_utils.git_branch(args.git_command) != submodule_branch:
+                    # If the local branch exists just check out to it.
+                    # If there is no local branch but only remote specify an explicit remote.
+                    # Without this explicit specification Git attempts to set-up tracking
+                    # automatically and fails when the branch is available in multiple remotes.
+                    if make_utils.git_local_branch_exists(args.git_command, submodule_branch):
                        call([args.git_command, "checkout", submodule_branch])
-                    call([args.git_command, "pull", "--rebase", "origin", submodule_branch])
-        finally:
-            os.chdir(cwd)
+                    elif make_utils.git_remote_exist(args.git_command, "origin"):
+                        call([args.git_command, "checkout", "-t", f"origin/{submodule_branch}"])
+                    elif make_utils.git_remote_exist(args.git_command, "upstream"):
+                        call([args.git_command, "checkout", "-t", f"upstream/{submodule_branch}"])
+                # Don't use extra fetch since all remotes of interest have been already fetched
+                # some lines above.
+                skip_msg += work_tree_update(args, use_fetch=False)
+    finally:
+        os.chdir(cwd)

    return skip_msg


+def scripts_submodules_update(args: argparse.Namespace, branch: Optional[str]) -> str:
+    """Update working trees of addons and addons_contrib within the scripts/ directory"""
+    msg = ""
+
+    msg += external_scripts_update(args, "blender-addons", "addons", branch)
+    msg += external_scripts_update(args, "blender-addons-contrib", "addons_contrib", branch)
+
+    return msg
+
+
+def submodules_update(args: argparse.Namespace, branch: Optional[str]) -> str:
+    """Update submodules or other externally tracked source trees"""
+    msg = ""
+
+    msg += scripts_submodules_update(args, branch)
+
+    return msg
+
+
 if __name__ == "__main__":
    args = parse_arguments()
    blender_skip_msg = ""
    submodules_skip_msg = ""

-    # Test if we are building a specific release version.
-    branch = make_utils.git_branch(args.git_command)
-    if branch == 'HEAD':
-        sys.stderr.write('Blender git repository is in detached HEAD state, must be in a branch\n')
-        sys.exit(1)
-
-    tag = make_utils.git_tag(args.git_command)
-    release_version = make_utils.git_branch_release_version(branch, tag)
+    blender_version = make_utils. parse_blender_version()
+    if blender_version.cycle != 'alpha':
+        major = blender_version.version // 100
+        minor = blender_version.version % 100
+        branch = f"blender-v{major}.{minor}-release"
+        release_version: Optional[str] = f"{major}.{minor}"
+    else:
+        branch = 'main'
+        release_version = None

    if not args.no_libraries:
        svn_update(args, release_version)
    if not args.no_blender:
        blender_skip_msg = git_update_skip(args)
+        if not blender_skip_msg:
+            blender_skip_msg = blender_update(args)
        if blender_skip_msg:
            blender_skip_msg = "Blender repository skipped: " + blender_skip_msg + "\n"
-        else:
-            blender_update(args)
    if not args.no_submodules:
-        submodules_skip_msg = submodules_update(args, release_version, branch)
+        submodules_skip_msg = submodules_update(args, branch)

    # Report any skipped repositories at the end, so it's not as easy to miss.
    skip_msg = blender_skip_msg + submodules_skip_msg
--- a/build_files/utils/make_utils.py
+++ b/build_files/utils/make_utils.py
@@ -9,7 +9,9 @@ import re
 import shutil
 import subprocess
 import sys
+import os
 from pathlib import Path
+from urllib.parse import urljoin

 from typing import (
    Sequence,
@@ -19,7 +21,7 @@ from typing import (

 def call(cmd: Sequence[str], exit_on_error: bool = True, silent: bool = False) -> int:
    if not silent:
-        print(" ".join(cmd))
+        print(" ".join([str(x) for x in cmd]))

    # Flush to ensure correct order output on Windows.
    sys.stdout.flush()
@@ -52,13 +54,57 @@ def check_output(cmd: Sequence[str], exit_on_error: bool = True) -> str:
    return output.strip()


+def git_local_branch_exists(git_command: str, branch: str) -> bool:
+    return (
+        call([git_command, "rev-parse", "--verify", branch], exit_on_error=False, silent=True) == 0
+    )
+
+
 def git_branch_exists(git_command: str, branch: str) -> bool:
    return (
-        call([git_command, "rev-parse", "--verify", branch], exit_on_error=False, silent=True) == 0 or
+        git_local_branch_exists(git_command, branch) or
+        call([git_command, "rev-parse", "--verify", "remotes/upstream/" + branch], exit_on_error=False, silent=True) == 0 or
        call([git_command, "rev-parse", "--verify", "remotes/origin/" + branch], exit_on_error=False, silent=True) == 0
    )


+def git_get_remote_url(git_command: str, remote_name: str) -> str:
+    return check_output((git_command, "ls-remote", "--get-url", remote_name))
+
+
+def git_remote_exist(git_command: str, remote_name: str) -> bool:
+    """Check whether there is a remote with the given name"""
+    # `git ls-remote --get-url upstream` will print an URL if there is such remote configured, and
+    # otherwise will print "upstream".
+    remote_url = check_output((git_command, "ls-remote", "--get-url", remote_name))
+    return remote_url != remote_name
+
+
+def git_get_resolved_submodule_url(git_command: str, blender_url: str, submodule_path: str) -> str:
+    git_root = check_output([git_command, "rev-parse", "--show-toplevel"])
+    dot_gitmodules = os.path.join(git_root, ".gitmodules")
+
+    submodule_key_prefix = f"submodule.{submodule_path}"
+    submodule_key_url = f"{submodule_key_prefix}.url"
+
+    gitmodule_url = git_get_config(
+        git_command, submodule_key_url, file=dot_gitmodules)
+
+    # A bit of a trickery to construct final URL.
+    # Only works for the relative submodule URLs.
+    #
+    # Note that unless the LHS URL ends up with a slash urljoin treats the last component as a
+    # file.
+    assert gitmodule_url.startswith('..')
+    return urljoin(blender_url + "/", gitmodule_url)
+
+
+def git_is_remote_repository(git_command: str, repo: str) -> bool:
+    """Returns true if the given repository is a valid/clonable git repo"""
+    exit_code = call((git_command, "ls-remote", repo, "HEAD"), exit_on_error=False, silent=True)
+    return exit_code == 0
+
+
 def git_branch(git_command: str) -> str:
    # Get current branch name.
    try:
@@ -70,6 +116,20 @@ def git_branch(git_command: str) -> str:
    return branch.strip().decode('utf8')


+def git_get_config(git_command: str, key: str, file: Optional[str] = None) -> str:
+    if file:
+        return check_output([git_command, "config", "--file", file, "--get", key])
+
+    return check_output([git_command, "config", "--get", key])
+
+
+def git_set_config(git_command: str, key: str, value: str, file: Optional[str] = None) -> str:
+    if file:
+        return check_output([git_command, "config", "--file", file, key, value])
+
+    return check_output([git_command, "config", key, value])
+
+
 def git_tag(git_command: str) -> Optional[str]:
    # Get current tag name.
    try:
--- a/build_files/windows/check_submodules.cmd
+++ b/build_files/windows/check_submodules.cmd
@@ -3,9 +3,9 @@ if NOT exist "%BLENDER_DIR%\source\tools\.git" (
 	if not "%GIT%" == "" (
 		"%GIT%" submodule update --init --recursive --progress
 		if errorlevel 1 goto FAIL
-		"%GIT%" submodule foreach git checkout master
+		"%GIT%" submodule foreach git checkout main
 		if errorlevel 1 goto FAIL
-		"%GIT%" submodule foreach git pull --rebase origin master
+		"%GIT%" submodule foreach git pull --rebase origin main
 		if errorlevel 1 goto FAIL
 		goto EOF
 	) else (
--- a/build_files/windows/show_hashes.cmd
+++ b/build_files/windows/show_hashes.cmd
@@ -4,9 +4,9 @@ if "%GIT%" == "" (
 )
 cd "%BLENDER_DIR%"
 for /f "delims=" %%i in ('"%GIT%" rev-parse HEAD') do echo Branch_hash=%%i
-cd "%BLENDER_DIR%/release/datafiles/locale"
+cd "%BLENDER_DIR%/locale"
 for /f "delims=" %%i in ('"%GIT%" rev-parse HEAD') do echo Locale_hash=%%i
-cd "%BLENDER_DIR%/release/scripts/addons"
+cd "%BLENDER_DIR%/scripts/addons"
 for /f "delims=" %%i in ('"%GIT%" rev-parse HEAD') do echo Addons_Hash=%%i
 cd "%BLENDER_DIR%"
 :EOF
--- a/doc/blender_file_format/BlendFileReader.py
+++ b/doc/blender_file_format/BlendFileReader.py
@@ -231,7 +231,7 @@ class FileBlockHeader:
            self.SDNAIndex = 0
            self.Count = 0
            self.FileOffset = handle.tell()
-        #self.Code += ' ' * (4 - len(self.Code))
+        # self.Code += ' ' * (4 - len(self.Code))
        log.debug("found blend-file-block-fileheader {0} {1}".format(self.Code, self.FileOffset))

    def skip(self, handle):
--- a/doc/doxygen/Doxyfile
+++ b/doc/doxygen/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = Blender
 # could be handy for archiving the generated documentation or if some version
 # control system is used.

-PROJECT_NUMBER         = V3.5
+PROJECT_NUMBER         = V3.6

 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
--- a/doc/python_api/examples/blf.py
+++ b/doc/python_api/examples/blf.py
@@ -37,7 +37,7 @@ def draw_callback_px(self, context):
    # BLF drawing routine
    font_id = font_info["font_id"]
    blf.position(font_id, 2, 80, 0)
-    blf.size(font_id, 50, 72)
+    blf.size(font_id, 50)
    blf.draw(font_id, "Hello World")


--- a/doc/python_api/rst/include__bmesh.rst
+++ b/doc/python_api/rst/include__bmesh.rst
@@ -31,7 +31,7 @@ For an overview of BMesh data types and how they reference each other see:
 Example Script
 --------------

-.. literalinclude:: __/__/__/release/scripts/templates_py/bmesh_simple.py
+.. literalinclude:: __/__/__/scripts/templates_py/bmesh_simple.py


 Standalone Module
--- a/doc/python_api/rst/info_quickstart.rst
+++ b/doc/python_api/rst/info_quickstart.rst
@@ -288,7 +288,7 @@ In Python, this is done by defining a class, which is a subclass of an existing
 Example Operator
 ----------------

-.. literalinclude:: __/__/__/release/scripts/templates_py/operator_simple.py
+.. literalinclude:: __/__/__/scripts/templates_py/operator_simple.py

 Once this script runs, ``SimpleOperator`` is registered with Blender
 and can be called from Operator Search or added to the toolbar.
@@ -320,7 +320,7 @@ Example Panel
 Panels are registered as a class, like an operator.
 Notice the extra ``bl_`` variables used to set the context they display in.

-.. literalinclude:: __/__/__/release/scripts/templates_py/ui_panel_simple.py
+.. literalinclude:: __/__/__/scripts/templates_py/ui_panel_simple.py

 To run the script:

--- a/doc/python_api/sphinx_doc_gen.py
+++ b/doc/python_api/sphinx_doc_gen.py
@@ -367,13 +367,13 @@ except ImportError:
 # Note that ".." is replaced by "__" in the RST files,
 # to avoid having to match Blender's source tree.
 EXTRA_SOURCE_FILES = (
-    "../../../release/scripts/templates_py/bmesh_simple.py",
-    "../../../release/scripts/templates_py/gizmo_operator.py",
-    "../../../release/scripts/templates_py/gizmo_operator_target.py",
-    "../../../release/scripts/templates_py/gizmo_simple.py",
-    "../../../release/scripts/templates_py/operator_simple.py",
-    "../../../release/scripts/templates_py/ui_panel_simple.py",
-    "../../../release/scripts/templates_py/ui_previews_custom_icon.py",
+    "../../../scripts/templates_py/bmesh_simple.py",
+    "../../../scripts/templates_py/gizmo_operator.py",
+    "../../../scripts/templates_py/gizmo_operator_target.py",
+    "../../../scripts/templates_py/gizmo_simple.py",
+    "../../../scripts/templates_py/operator_simple.py",
+    "../../../scripts/templates_py/ui_panel_simple.py",
+    "../../../scripts/templates_py/ui_previews_custom_icon.py",
    "../examples/bmesh.ops.1.py",
    "../examples/bpy.app.translations.py",
 )
@@ -476,7 +476,7 @@ MODULE_GROUPING = {

 # -------------------------------BLENDER----------------------------------------

-# converting bytes to strings, due to T30154
+# Converting bytes to strings, due to #30154.
 BLENDER_REVISION = str(bpy.app.build_hash, 'utf_8')
 BLENDER_REVISION_TIMESTAMP = bpy.app.build_commit_timestamp

@@ -487,7 +487,7 @@ BLENDER_VERSION_DOTS = "%d.%d" % (bpy.app.version[0], bpy.app.version[1])
 if BLENDER_REVISION != "Unknown":
    # SHA1 Git hash
    BLENDER_VERSION_HASH = BLENDER_REVISION
-    BLENDER_VERSION_HASH_HTML_LINK = "<a href=https://developer.blender.org/rB%s>%s</a>" % (
+    BLENDER_VERSION_HASH_HTML_LINK = "<a href=https://projects.blender.org/blender/blender/commit/%s>%s</a>" % (
        BLENDER_VERSION_HASH, BLENDER_VERSION_HASH,
    )
    BLENDER_VERSION_DATE = time.strftime("%d/%m/%Y", time.localtime(BLENDER_REVISION_TIMESTAMP))
@@ -647,7 +647,7 @@ def undocumented_message(module_name, type_name, identifier):
        module_name, type_name, identifier,
    )

-    return "Undocumented, consider `contributing <https://developer.blender.org/T51061>`__."
+    return "Undocumented, consider `contributing <https://developer.blender.org/>`__."


 def range_str(val):
@@ -1816,9 +1816,9 @@ def pyrna2sphinx(basepath):

    # operators
    def write_ops():
-        API_BASEURL = "https://developer.blender.org/diffusion/B/browse/master/release/scripts"
-        API_BASEURL_ADDON = "https://developer.blender.org/diffusion/BA"
-        API_BASEURL_ADDON_CONTRIB = "https://developer.blender.org/diffusion/BAC"
+        API_BASEURL = "https://projects.blender.org/blender/blender/src/branch/main/scripts"
+        API_BASEURL_ADDON = "https://projects.blender.org/blender/blender-addons"
+        API_BASEURL_ADDON_CONTRIB = "https://projects.blender.org/blender/blender-addons-contrib"

        op_modules = {}
        op = None
@@ -2200,7 +2200,7 @@ def write_rst_enum_items(basepath, key, key_no_prefix, enum_items):
    Write a single page for a static enum in RST.

    This helps avoiding very large lists being in-lined in many places which is an issue
-    especially with icons in ``bpy.types.UILayout``. See T87008.
+    especially with icons in ``bpy.types.UILayout``. See #87008.
    """
    filepath = os.path.join(basepath, "%s.rst" % key_no_prefix)
    with open(filepath, "w", encoding="utf-8") as fh:
--- a/doc/python_api/static/js/version_switch.js
+++ b/doc/python_api/static/js/version_switch.js
@@ -156,7 +156,7 @@ var Popover = function() {
    },
    getNamed : function(v) {
      $.each(all_versions, function(ix, title) {
-        if (ix === "master" || ix === "latest") {
+        if (ix === "master" || ix === "main" || ix === "latest") {
          var m = title.match(/\d\.\d[\w\d\.]*/)[0];
          if (parseFloat(m) == v) {
            v = ix;
--- a/extern/hipew/README.blender
+++ b/extern/hipew/README.blender
@@ -1,5 +1,5 @@
 Project: Blender
-URL: https://git.blender.org/blender.git
+URL: https://projects.blender.org/blender/blender.git
 License: Apache 2.0
 Upstream version: N/A
 Local modifications: None
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -12,6 +12,7 @@ from bpy.props import (
    PointerProperty,
    StringProperty,
 )
+from bpy.app.translations import pgettext_iface as iface_

 from math import pi

@@ -1664,30 +1665,51 @@ class CyclesPreferences(bpy.types.AddonPreferences):
            col.label(text="No compatible GPUs found for Cycles", icon='INFO')

            if device_type == 'CUDA':
-                col.label(text="Requires NVIDIA GPU with compute capability 3.0", icon='BLANK1')
+                compute_capability = "3.0"
+                col.label(text=iface_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
+                          icon='BLANK1', translate=False)
            elif device_type == 'OPTIX':
-                col.label(text="Requires NVIDIA GPU with compute capability 5.0", icon='BLANK1')
-                col.label(text="and NVIDIA driver version 470 or newer", icon='BLANK1')
+                compute_capability = "5.0"
+                driver_version = "470"
+                col.label(text=iface_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
+                          icon='BLANK1', translate=False)
+                col.label(text=iface_("and NVIDIA driver version %s or newer") % driver_version,
+                          icon='BLANK1', translate=False)
            elif device_type == 'HIP':
-                import sys
-                if sys.platform[:3] == "win":
-                    col.label(text="Requires AMD GPU with RDNA architecture", icon='BLANK1')
-                    col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
-                elif sys.platform.startswith("linux"):
-                    col.label(text="Requires AMD GPU with RDNA architecture", icon='BLANK1')
-                    col.label(text="and AMD driver version 22.10 or newer", icon='BLANK1')
+                if True:
+                    col.label(text="HIP temporarily disabled due to compiler bugs", icon='BLANK1')
+                else:
+                    import sys
+                    if sys.platform[:3] == "win":
+                        driver_version = "21.Q4"
+                        col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
+                        col.label(text=iface_("and AMD Radeon Pro %s driver or newer") % driver_version,
+                                  icon='BLANK1', translate=False)
+                    elif sys.platform.startswith("linux"):
+                        driver_version = "22.10"
+                        col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
+                        col.label(text=iface_("and AMD driver version %s or newer") % driver_version, icon='BLANK1',
+                                  translate=False)
            elif device_type == 'ONEAPI':
                import sys
                if sys.platform.startswith("win"):
+                    driver_version = "101.4032"
                    col.label(text="Requires Intel GPU with Xe-HPG architecture", icon='BLANK1')
-                    col.label(text="and Windows driver version 101.4032 or newer", icon='BLANK1')
+                    col.label(text=iface_("and Windows driver version %s or newer") % driver_version,
+                              icon='BLANK1', translate=False)
                elif sys.platform.startswith("linux"):
+                    driver_version = "1.3.24931"
                    col.label(text="Requires Intel GPU with Xe-HPG architecture and", icon='BLANK1')
-                    col.label(text="  - intel-level-zero-gpu version 1.3.24931 or newer", icon='BLANK1')
+                    col.label(text=iface_("  - intel-level-zero-gpu version %s or newer") % driver_version,
+                              icon='BLANK1', translate=False)
                    col.label(text="  - oneAPI Level-Zero Loader", icon='BLANK1')
            elif device_type == 'METAL':
-                col.label(text="Requires Apple Silicon with macOS 12.2 or newer", icon='BLANK1')
-                col.label(text="or AMD with macOS 12.3 or newer", icon='BLANK1')
+                silicon_mac_version = "12.2"
+                amd_mac_version = "12.3"
+                col.label(text=iface_("Requires Apple Silicon with macOS %s or newer") % silicon_mac_version,
+                          icon='BLANK1', translate=False)
+                col.label(text=iface_("or AMD with macOS %s or newer") % amd_mac_version, icon='BLANK1',
+                          translate=False)
            return

        for device in devices:
@@ -1697,7 +1719,8 @@ class CyclesPreferences(bpy.types.AddonPreferences):
                .replace('(TM)', unicodedata.lookup('TRADE MARK SIGN'))
                .replace('(tm)', unicodedata.lookup('TRADE MARK SIGN'))
                .replace('(R)', unicodedata.lookup('REGISTERED SIGN'))
-                .replace('(C)', unicodedata.lookup('COPYRIGHT SIGN'))
+                .replace('(C)', unicodedata.lookup('COPYRIGHT SIGN')),
+                translate=False
            )

    def draw_impl(self, layout, context):
@@ -1722,19 +1745,21 @@ class CyclesPreferences(bpy.types.AddonPreferences):
            row.prop(self, "peer_memory")

        if compute_device_type == 'METAL':
-            import platform, re
-            isNavi2 = False
+            import platform
+            import re
+            is_navi_2 = False
            for device in devices:
-                obj = re.search("((RX)|(Pro)|(PRO))\s+W?6\d00X",device.name)
-                if obj:
-                    isNavi2 = True
+                if re.search(r"((RX)|(Pro)|(PRO))\s+W?6\d00X", device.name):
+                    is_navi_2 = True
+                    break

-            # MetalRT only works on Apple Silicon and Navi2
-            if platform.machine() == 'arm64' or isNavi2:
+            # MetalRT only works on Apple Silicon and Navi2.
+            is_arm64 = platform.machine() == 'arm64'
+            if is_arm64 or is_navi_2:
                col = layout.column()
                col.use_property_split = True
                # Kernel specialization is only supported on Apple Silicon
-                if platform.machine() == 'arm64':
+                if is_arm64:
                    col.prop(self, "kernel_optimization_level")
                col.prop(self, "use_metalrt")

--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -20,7 +20,7 @@ class CyclesPresetPanel(PresetPanel, Panel):
    @staticmethod
    def post_cb(context):
        # Modify an arbitrary built-in scene property to force a depsgraph
-        # update, because add-on properties don't. (see T62325)
+        # update, because add-on properties don't. (see #62325)
        render = context.scene.render
        render.filter_size = render.filter_size

--- a/intern/cycles/blender/curves.cpp
+++ b/intern/cycles/blender/curves.cpp
@@ -3,6 +3,7 @@

 #include <optional>

+#include "BKE_curves.hh"
 #include "blender/sync.h"
 #include "blender/util.h"

@@ -272,10 +273,13 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
  if (hair->num_curves())
    return;

+  Attribute *attr_normal = NULL;
  Attribute *attr_intercept = NULL;
  Attribute *attr_length = NULL;
  Attribute *attr_random = NULL;

+  if (hair->need_attribute(scene, ATTR_STD_VERTEX_NORMAL))
+    attr_normal = hair->attributes.add(ATTR_STD_VERTEX_NORMAL);
  if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT))
    attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
  if (hair->need_attribute(scene, ATTR_STD_CURVE_LENGTH))
@@ -322,6 +326,11 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
        if (attr_intercept)
          attr_intercept->add(time);

+        if (attr_normal) {
+          /* TODO: compute geometry normals. */
+          attr_normal->add(make_float3(1.0f, 0.0f, 0.0f));
+        }
+
        num_curve_keys++;
      }

@@ -885,6 +894,14 @@ static void export_hair_curves(Scene *scene,
  float *attr_length = NULL;
  float *attr_random = NULL;

+  if (hair->need_attribute(scene, ATTR_STD_VERTEX_NORMAL)) {
+    /* Compute geometry normals. */
+    float3 *attr_normal = hair->attributes.add(ATTR_STD_VERTEX_NORMAL)->data_float3();
+    int i = 0;
+    for (BL::FloatVectorValueReadOnly &normal : b_curves.normals) {
+      attr_normal[i++] = get_float3(normal.vector());
+    }
+  }
  if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT)) {
    attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT)->data_float();
  }
--- a/intern/cycles/blender/display_driver.cpp
+++ b/intern/cycles/blender/display_driver.cpp
@@ -54,44 +54,10 @@ int BlenderDisplayShader::get_tex_coord_attrib_location()
 /* --------------------------------------------------------------------
 * BlenderFallbackDisplayShader.
 */
-
-/* TODO move shaders to standalone .glsl file. */
-static const char *FALLBACK_VERTEX_SHADER =
-    "uniform vec2 fullscreen;\n"
-    "in vec2 texCoord;\n"
-    "in vec2 pos;\n"
-    "out vec2 texCoord_interp;\n"
-    "\n"
-    "vec2 normalize_coordinates()\n"
-    "{\n"
-    "   return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
-    "}\n"
-    "\n"
-    "void main()\n"
-    "{\n"
-    "   gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
-    "   texCoord_interp = texCoord;\n"
-    "}\n\0";
-
-static const char *FALLBACK_FRAGMENT_SHADER =
-    "uniform sampler2D image_texture;\n"
-    "in vec2 texCoord_interp;\n"
-    "out vec4 fragColor;\n"
-    "\n"
-    "void main()\n"
-    "{\n"
-    "   fragColor = texture(image_texture, texCoord_interp);\n"
-    "}\n\0";
-
 static GPUShader *compile_fallback_shader(void)
 {
  /* NOTE: Compilation errors are logged to console. */
-  GPUShader *shader = GPU_shader_create(FALLBACK_VERTEX_SHADER,
-                                        FALLBACK_FRAGMENT_SHADER,
-                                        nullptr,
-                                        nullptr,
-                                        nullptr,
-                                        "FallbackCyclesBlitShader");
+  GPUShader *shader = GPU_shader_create_from_info_name("gpu_shader_cycles_display_fallback");
  return shader;
 }

@@ -105,11 +71,12 @@ GPUShader *BlenderFallbackDisplayShader::bind(int width, int height)

  /* Bind shader now to enable uniform assignment. */
  GPU_shader_bind(shader_program_);
-  GPU_shader_uniform_int(shader_program_, image_texture_location_, 0);
+  int slot = 0;
+  GPU_shader_uniform_int_ex(shader_program_, image_texture_location_, 1, 1, &slot);
  float size[2];
  size[0] = width;
  size[1] = height;
-  GPU_shader_uniform_vector(shader_program_, fullscreen_location_, 2, 1, size);
+  GPU_shader_uniform_float_ex(shader_program_, fullscreen_location_, 2, 1, size);
  return shader_program_;
 }

@@ -249,8 +216,13 @@ class DisplayGPUTexture {
    height = texture_height;

    /* Texture must have a minimum size of 1x1. */
-    gpu_texture = GPU_texture_create_2d(
-        "CyclesBlitTexture", max(width, 1), max(height, 1), 1, GPU_RGBA16F, nullptr);
+    gpu_texture = GPU_texture_create_2d("CyclesBlitTexture",
+                                        max(width, 1),
+                                        max(height, 1),
+                                        1,
+                                        GPU_RGBA16F,
+                                        GPU_TEXTURE_USAGE_GENERAL,
+                                        nullptr);

    if (!gpu_texture) {
      LOG(ERROR) << "Error creating texture.";
@@ -733,14 +705,14 @@ static void draw_tile(const float2 &zoom,
  const float zoomed_height = draw_tile.params.size.y * zoom.y;
  if (texture.width != draw_tile.params.size.x || texture.height != draw_tile.params.size.y) {
    /* Resolution divider is different from 1, force nearest interpolation. */
-    GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_DEFAULT, 0, false);
+    GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_DEFAULT, 0);
  }
  else if (zoomed_width - draw_tile.params.size.x > 0.5f ||
           zoomed_height - draw_tile.params.size.y > 0.5f) {
-    GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_DEFAULT, 0, false);
+    GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_DEFAULT, 0);
  }
  else {
-    GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_FILTER, 0, false);
+    GPU_texture_bind_ex(texture.gpu_texture, GPU_SAMPLER_FILTER, 0);
  }

  /* Draw at the parameters for which the texture has been updated for. This allows to always draw
--- a/intern/cycles/blender/image.cpp
+++ b/intern/cycles/blender/image.cpp
@@ -20,7 +20,7 @@ BlenderImageLoader::BlenderImageLoader(BL::Image b_image,
    : b_image(b_image),
      frame(frame),
      tile_number(tile_number),
-      /* Don't free cache for preview render to avoid race condition from T93560, to be fixed
+      /* Don't free cache for preview render to avoid race condition from #93560, to be fixed
       * properly later as we are close to release. */
      free_cache(!is_preview_render && !b_image.has_data())
 {
@@ -72,7 +72,7 @@ bool BlenderImageLoader::load_metadata(const ImageDeviceFeatures &, ImageMetaDat
    metadata.colorspace = u_colorspace_raw;
  }
  else {
-    /* In some cases (e.g. T94135), the colorspace setting in Blender gets updated as part of the
+    /* In some cases (e.g. #94135), the colorspace setting in Blender gets updated as part of the
     * metadata queries in this function, so update the colorspace setting here. */
    PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
    metadata.colorspace = get_enum_identifier(colorspace_ptr, "name");
--- a/intern/cycles/blender/light.cpp
+++ b/intern/cycles/blender/light.cpp
@@ -24,7 +24,7 @@ void BlenderSync::sync_light(BL::Object &b_parent,
  Light *light = light_map.find(key);

  /* Check if the transform was modified, in case a linked collection is moved we do not get a
-   * specific depsgraph update (T88515). This also mimics the behavior for Objects. */
+   * specific depsgraph update (#88515). This also mimics the behavior for Objects. */
  const bool tfm_updated = (light && light->get_tfm() != tfm);

  /* Update if either object or light data changed. */
--- a/intern/cycles/blender/pointcloud.cpp
+++ b/intern/cycles/blender/pointcloud.cpp
@@ -222,7 +222,10 @@ static void export_pointcloud_motion(PointCloud *pointcloud,

  /* Export motion points. */
  const int num_points = pointcloud->num_points();
-  float3 *mP = attr_mP->data_float3() + motion_step * num_points;
+  // Point cloud attributes are stored as float4 with the radius
+  // in the w element. This is explict now as float3 is no longer
+  // interchangeable with float4 as it is packed now.
+  float4 *mP = attr_mP->data_float4() + motion_step * num_points;
  bool have_motion = false;
  const array<float3> &pointcloud_points = pointcloud->get_points();

@@ -231,11 +234,9 @@ static void export_pointcloud_motion(PointCloud *pointcloud,
  std::optional<BL::FloatAttribute> b_attr_radius = find_radius_attribute(b_pointcloud);

  for (int i = 0; i < std::min(num_points, b_points_num); i++) {
-    const float3 co = get_float3(b_attr_position.data[i].vector());
+    const float3 P = get_float3(b_attr_position.data[i].vector());
    const float radius = b_attr_radius ? b_attr_radius->data[i].value() : 0.01f;
-    float3 P = co;
-    P.w = radius;
-    mP[i] = P;
+    mP[i] = make_float4(P.x, P.y, P.z, radius);
    have_motion = have_motion || (P != pointcloud_points[i]);
  }

--- a/intern/cycles/blender/python.cpp
+++ b/intern/cycles/blender/python.cpp
@@ -94,7 +94,7 @@ void python_thread_state_restore(void **python_thread_state)
  *python_thread_state = NULL;
 }

-static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce)
+static const char *PyC_UnicodeAsBytes(PyObject *py_str, PyObject **coerce)
 {
  const char *result = PyUnicode_AsUTF8(py_str);
  if (result) {
@@ -131,8 +131,8 @@ static PyObject *init_func(PyObject * /*self*/, PyObject *args)
  }

  PyObject *path_coerce = nullptr, *user_path_coerce = nullptr;
-  path_init(PyC_UnicodeAsByte(path, &path_coerce),
-            PyC_UnicodeAsByte(user_path, &user_path_coerce));
+  path_init(PyC_UnicodeAsBytes(path, &path_coerce),
+            PyC_UnicodeAsBytes(user_path, &user_path_coerce));
  Py_XDECREF(path_coerce);
  Py_XDECREF(user_path_coerce);

--- a/intern/cycles/blender/session.cpp
+++ b/intern/cycles/blender/session.cpp
@@ -404,7 +404,7 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
     * point we know that we've got everything to render current view layer.
     */
    /* At the moment we only free if we are not doing multi-view
-     * (or if we are rendering the last view). See T58142/D4239 for discussion.
+     * (or if we are rendering the last view). See #58142/D4239 for discussion.
     */
    if (view_index == num_views - 1) {
      free_blender_memory_if_possible();
--- a/intern/cycles/blender/shader.cpp
+++ b/intern/cycles/blender/shader.cpp
@@ -660,6 +660,22 @@ static ShaderNode *add_node(Scene *scene,
                                                    NODE_PRINCIPLED_HAIR_REFLECTANCE));
    node = principled_hair;
  }
+  else if (b_node.is_a(&RNA_ShaderNodeBsdfHairMicrofacet)) {
+    BL::ShaderNodeBsdfHairMicrofacet b_microfacet_hair_node(b_node);
+    MicrofacetHairBsdfNode *microfacet_hair = graph->create_node<MicrofacetHairBsdfNode>();
+    microfacet_hair->set_parametrization(
+        (NodeMicrofacetHairParametrization)get_enum(b_microfacet_hair_node.ptr,
+                                                    "parametrization",
+                                                    NODE_MICROFACET_HAIR_NUM,
+                                                    NODE_MICROFACET_HAIR_REFLECTANCE));
+
+    microfacet_hair->set_distribution_type(
+        (NodeMicrofacetHairDistributionType)get_enum(b_microfacet_hair_node.ptr,
+                                                     "distribution_type",
+                                                     NODE_MICROFACET_HAIR_DISTRIBUTION_TYPE_NUM,
+                                                     NODE_MICROFACET_HAIR_GGX));
+    node = microfacet_hair;
+  }
  else if (b_node.is_a(&RNA_ShaderNodeBsdfPrincipled)) {
    BL::ShaderNodeBsdfPrincipled b_principled_node(b_node);
    PrincipledBsdfNode *principled = graph->create_node<PrincipledBsdfNode>();
--- a/intern/cycles/blender/sync.cpp
+++ b/intern/cycles/blender/sync.cpp
@@ -349,8 +349,7 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)

  bool use_light_tree = get_boolean(cscene, "use_light_tree");
  integrator->set_use_light_tree(use_light_tree);
-  integrator->set_light_sampling_threshold(
-      (use_light_tree) ? 0.0f : get_float(cscene, "light_sampling_threshold"));
+  integrator->set_light_sampling_threshold(get_float(cscene, "light_sampling_threshold"));

  if (integrator->use_light_tree_is_modified()) {
    scene->light_manager->tag_update(scene, LightManager::UPDATE_ALL);
@@ -766,7 +765,7 @@ void BlenderSync::free_data_after_sync(BL::Depsgraph &b_depsgraph)
      (BlenderSession::headless || is_interface_locked) &&
      /* Baking re-uses the depsgraph multiple times, clearing crashes
       * reading un-evaluated mesh data which isn't aligned with the
-       * geometry we're baking, see T71012. */
+       * geometry we're baking, see #71012. */
      !scene->bake_manager->get_baking() &&
      /* Persistent data must main caches for performance and correctness. */
      !is_persistent_data;
--- a/intern/cycles/bvh/build.cpp
+++ b/intern/cycles/bvh/build.cpp
@@ -180,9 +180,9 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair
        curve.bounds_grow(k, &hair->get_curve_keys()[0], curve_radius, bounds);
        const size_t num_keys = hair->get_curve_keys().size();
        const size_t num_steps = hair->get_motion_steps();
-        const float3 *key_steps = curve_attr_mP->data_float3();
+        const float4 *key_steps = curve_attr_mP->data_float4();
        for (size_t step = 0; step < num_steps - 1; step++) {
-          curve.bounds_grow(k, key_steps + step * num_keys, curve_radius, bounds);
+          curve.bounds_grow(k, key_steps + step * num_keys, bounds);
        }
        if (bounds.valid()) {
          int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
@@ -200,7 +200,7 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair
        const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
        const size_t num_steps = hair->get_motion_steps();
        const float3 *curve_keys = &hair->get_curve_keys()[0];
-        const float3 *key_steps = curve_attr_mP->data_float3();
+        const float4 *key_steps = curve_attr_mP->data_float4();
        const size_t num_keys = hair->get_curve_keys().size();
        /* Calculate bounding box of the previous time step.
         * Will be reused later to avoid duplicated work on
--- a/intern/cycles/bvh/embree.cpp
+++ b/intern/cycles/bvh/embree.cpp
@@ -254,20 +254,15 @@ void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
  rtcSetGeometryBuildQuality(geom_id, build_quality);
  rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);

-  unsigned *rtc_indices = (unsigned *)rtcSetNewGeometryBuffer(
-      geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(int) * 3, num_triangles);
-  assert(rtc_indices);
-  if (!rtc_indices) {
-    VLOG_WARNING << "Embree could not create new geometry buffer for mesh " << mesh->name.c_str()
-                 << ".\n";
-    return;
-  }
-  for (size_t j = 0; j < num_triangles; ++j) {
-    Mesh::Triangle t = mesh->get_triangle(j);
-    rtc_indices[j * 3] = t.v[0];
-    rtc_indices[j * 3 + 1] = t.v[1];
-    rtc_indices[j * 3 + 2] = t.v[2];
-  }
+  const int *triangles = mesh->get_triangles().data();
+  rtcSetSharedGeometryBuffer(geom_id,
+                             RTC_BUFFER_TYPE_INDEX,
+                             0,
+                             RTC_FORMAT_UINT3,
+                             triangles,
+                             0,
+                             sizeof(int) * 3,
+                             num_triangles);

  set_tri_vertex_buffer(geom_id, mesh, false);

@@ -309,28 +304,46 @@ void BVHEmbree::set_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh, con
      verts = &attr_mP->data_float3()[t_ * num_verts];
    }

-    float *rtc_verts = (update) ?
-                           (float *)rtcGetGeometryBufferData(geom_id, RTC_BUFFER_TYPE_VERTEX, t) :
-                           (float *)rtcSetNewGeometryBuffer(geom_id,
-                                                            RTC_BUFFER_TYPE_VERTEX,
-                                                            t,
-                                                            RTC_FORMAT_FLOAT3,
-                                                            sizeof(float) * 3,
-                                                            num_verts + 1);
-
-    assert(rtc_verts);
-    if (rtc_verts) {
-      for (size_t j = 0; j < num_verts; ++j) {
-        rtc_verts[0] = verts[j].x;
-        rtc_verts[1] = verts[j].y;
-        rtc_verts[2] = verts[j].z;
-        rtc_verts += 3;
-      }
-    }
-
    if (update) {
      rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
    }
+    else {
+      rtcSetSharedGeometryBuffer(geom_id,
+                                 RTC_BUFFER_TYPE_VERTEX,
+                                 t,
+                                 RTC_FORMAT_FLOAT3,
+                                 verts,
+                                 0,
+                                 sizeof(float3),
+                                 num_verts + 1);
+    }
+  }
+}
+
+/**
+ * Packs the hair motion curve data control variables (CVs) into float4s as [x y z radius]
+ */
+template<typename T>
+void pack_motion_verts(size_t num_curves,
+                       const Hair *hair,
+                       const T *verts,
+                       const float *curve_radius,
+                       float4 *rtc_verts)
+{
+  for (size_t j = 0; j < num_curves; ++j) {
+    Hair::Curve c = hair->get_curve(j);
+    int fk = c.first_key;
+    int k = 1;
+    for (; k < c.num_keys + 1; ++k, ++fk) {
+      rtc_verts[k].x = verts[fk].x;
+      rtc_verts[k].y = verts[fk].y;
+      rtc_verts[k].z = verts[fk].z;
+      rtc_verts[k].w = curve_radius[fk];
+    }
+    /* Duplicate Embree's Catmull-Rom spline CVs at the start and end of each curve. */
+    rtc_verts[0] = rtc_verts[1];
+    rtc_verts[k] = rtc_verts[k - 1];
+    rtc_verts += c.num_keys + 2;
  }
 }

@@ -360,15 +373,10 @@ void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, c
  const int t_mid = (num_motion_steps - 1) / 2;
  const float *curve_radius = &hair->get_curve_radius()[0];
  for (int t = 0; t < num_motion_steps; ++t) {
-    const float3 *verts;
-    if (t == t_mid || attr_mP == NULL) {
-      verts = &hair->get_curve_keys()[0];
-    }
-    else {
-      int t_ = (t > t_mid) ? (t - 1) : t;
-      verts = &attr_mP->data_float3()[t_ * num_keys];
-    }
-
+    // As float4 and float3 are no longer interchangeable the 2 types need to be
+    // handled separately. Attributes are float4s where the radius is stored in w and
+    // the middle motion vector is from the mesh points which are stored float3s with
+    // the radius stored in another array.
    float4 *rtc_verts = (update) ? (float4 *)rtcGetGeometryBufferData(
                                       geom_id, RTC_BUFFER_TYPE_VERTEX, t) :
                                   (float4 *)rtcSetNewGeometryBuffer(geom_id,
@@ -381,18 +389,14 @@ void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, c
    assert(rtc_verts);
    if (rtc_verts) {
      const size_t num_curves = hair->num_curves();
-      for (size_t j = 0; j < num_curves; ++j) {
-        Hair::Curve c = hair->get_curve(j);
-        int fk = c.first_key;
-        int k = 1;
-        for (; k < c.num_keys + 1; ++k, ++fk) {
-          rtc_verts[k] = float3_to_float4(verts[fk]);
-          rtc_verts[k].w = curve_radius[fk];
-        }
-        /* Duplicate Embree's Catmull-Rom spline CVs at the start and end of each curve. */
-        rtc_verts[0] = rtc_verts[1];
-        rtc_verts[k] = rtc_verts[k - 1];
-        rtc_verts += c.num_keys + 2;
+      if (t == t_mid || attr_mP == NULL) {
+        const float3 *verts = &hair->get_curve_keys()[0];
+        pack_motion_verts<float3>(num_curves, hair, verts, curve_radius, rtc_verts);
+      }
+      else {
+        int t_ = (t > t_mid) ? (t - 1) : t;
+        const float4 *verts = &attr_mP->data_float4()[t_ * num_keys];
+        pack_motion_verts<float4>(num_curves, hair, verts, curve_radius, rtc_verts);
      }
    }

@@ -402,6 +406,20 @@ void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, c
  }
 }

+/**
+ * Pack the motion points into a float4 as [x y z radius]
+ */
+template<typename T>
+void pack_motion_points(size_t num_points, const T *verts, const float *radius, float4 *rtc_verts)
+{
+  for (size_t j = 0; j < num_points; ++j) {
+    rtc_verts[j].x = verts[j].x;
+    rtc_verts[j].y = verts[j].y;
+    rtc_verts[j].z = verts[j].z;
+    rtc_verts[j].w = radius[j];
+  }
+}
+
 void BVHEmbree::set_point_vertex_buffer(RTCGeometry geom_id,
                                        const PointCloud *pointcloud,
                                        const bool update)
@@ -421,15 +439,10 @@ void BVHEmbree::set_point_vertex_buffer(RTCGeometry geom_id,
  const int t_mid = (num_motion_steps - 1) / 2;
  const float *radius = pointcloud->get_radius().data();
  for (int t = 0; t < num_motion_steps; ++t) {
-    const float3 *verts;
-    if (t == t_mid || attr_mP == NULL) {
-      verts = pointcloud->get_points().data();
-    }
-    else {
-      int t_ = (t > t_mid) ? (t - 1) : t;
-      verts = &attr_mP->data_float3()[t_ * num_points];
-    }
-
+    // As float4 and float3 are no longer interchangeable the 2 types need to be
+    // handled separately. Attributes are float4s where the radius is stored in w and
+    // the middle motion vector is from the mesh points which are stored float3s with
+    // the radius stored in another array.
    float4 *rtc_verts = (update) ? (float4 *)rtcGetGeometryBufferData(
                                       geom_id, RTC_BUFFER_TYPE_VERTEX, t) :
                                   (float4 *)rtcSetNewGeometryBuffer(geom_id,
@@ -441,9 +454,14 @@ void BVHEmbree::set_point_vertex_buffer(RTCGeometry geom_id,

    assert(rtc_verts);
    if (rtc_verts) {
-      for (size_t j = 0; j < num_points; ++j) {
-        rtc_verts[j] = float3_to_float4(verts[j]);
-        rtc_verts[j].w = radius[j];
+      if (t == t_mid || attr_mP == NULL) {
+        const float3 *verts = pointcloud->get_points().data();
+        pack_motion_points<float3>(num_points, verts, radius, rtc_verts);
+      }
+      else {
+        int t_ = (t > t_mid) ? (t - 1) : t;
+        const float4 *verts = &attr_mP->data_float4()[t_ * num_points];
+        pack_motion_points<float4>(num_points, verts, radius, rtc_verts);
      }
    }

--- a/intern/cycles/bvh/sort.cpp
+++ b/intern/cycles/bvh/sort.cpp
@@ -35,7 +35,7 @@ struct BVHReferenceCompare {

  /* Compare two references.
   *
-   * Returns value is similar to return value of strcmp().
+   * Returns value is similar to return value of `strcmp()`.
   */
  __forceinline int compare(const BVHReference &ra, const BVHReference &rb) const
  {
--- a/intern/cycles/cmake/external_libs.cmake
+++ b/intern/cycles/cmake/external_libs.cmake
@@ -42,12 +42,15 @@ endif()
 ###########################################################################

 if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
-  find_package(HIP)
-  set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
+  set(WITH_CYCLES_HIP_BINARIES OFF)
+  message(STATUS "HIP temporarily disabled due to compiler bugs")

-  if(HIP_FOUND)
-    message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
-  endif()
+  # find_package(HIP)
+  # set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
+
+  # if(HIP_FOUND)
+  #   message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
+  # endif()
 endif()

 if(NOT WITH_HIP_DYNLOAD)
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -53,8 +53,12 @@ void CUDADevice::set_error(const string &error)
 }

 CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
-    : Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
+    : GPUDevice(info, stats, profiler)
 {
+  /* Verify that base class types can be used with specific backend types */
+  static_assert(sizeof(texMemObject) == sizeof(CUtexObject));
+  static_assert(sizeof(arrayMemObject) == sizeof(CUarray));
+
  first_error = true;

  cuDevId = info.num;
@@ -65,12 +69,6 @@ CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)

  need_texture_info = false;

-  device_texture_headroom = 0;
-  device_working_headroom = 0;
-  move_texture_to_host = false;
-  map_host_limit = 0;
-  map_host_used = 0;
-  can_map_host = 0;
  pitch_alignment = 0;

  /* Initialize CUDA. */
@@ -91,8 +89,9 @@ CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
  /* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
   * CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
   * so we can predict which memory to map to host. */
-  cuda_assert(
-      cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
+  int value;
+  cuda_assert(cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
+  can_map_host = value != 0;

  cuda_assert(cuDeviceGetAttribute(
      &pitch_alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
@@ -499,311 +498,57 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
 #  endif
 }

-void CUDADevice::init_host_memory()
-{
-  /* Limit amount of host mapped memory, because allocating too much can
-   * cause system instability. Leave at least half or 4 GB of system
-   * memory free, whichever is smaller. */
-  size_t default_limit = 4 * 1024 * 1024 * 1024LL;
-  size_t system_ram = system_physical_ram();
-
-  if (system_ram > 0) {
-    if (system_ram / 2 > default_limit) {
-      map_host_limit = system_ram - default_limit;
-    }
-    else {
-      map_host_limit = system_ram / 2;
-    }
-  }
-  else {
-    VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
-    map_host_limit = 0;
-  }
-
-  /* Amount of device memory to keep is free after texture memory
-   * and working memory allocations respectively. We set the working
-   * memory limit headroom lower so that some space is left after all
-   * texture memory allocations. */
-  device_working_headroom = 32 * 1024 * 1024LL;   // 32MB
-  device_texture_headroom = 128 * 1024 * 1024LL;  // 128MB
-
-  VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
-            << " bytes. (" << string_human_readable_size(map_host_limit) << ")";
-}
-
-void CUDADevice::load_texture_info()
-{
-  if (need_texture_info) {
-    /* Unset flag before copying, so this does not loop indefinitely if the copy below calls
-     * into 'move_textures_to_host' (which calls 'load_texture_info' again). */
-    need_texture_info = false;
-    texture_info.copy_to_device();
-  }
-}
-
-void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
-{
-  /* Break out of recursive call, which can happen when moving memory on a multi device. */
-  static bool any_device_moving_textures_to_host = false;
-  if (any_device_moving_textures_to_host) {
-    return;
-  }
-
-  /* Signal to reallocate textures in host memory only. */
-  move_texture_to_host = true;
-
-  while (size > 0) {
-    /* Find suitable memory allocation to move. */
-    device_memory *max_mem = NULL;
-    size_t max_size = 0;
-    bool max_is_image = false;
-
-    thread_scoped_lock lock(cuda_mem_map_mutex);
-    foreach (CUDAMemMap::value_type &pair, cuda_mem_map) {
-      device_memory &mem = *pair.first;
-      CUDAMem *cmem = &pair.second;
-
-      /* Can only move textures allocated on this device (and not those from peer devices).
-       * And need to ignore memory that is already on the host. */
-      if (!mem.is_resident(this) || cmem->use_mapped_host) {
-        continue;
-      }
-
-      bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
-                        (&mem != &texture_info);
-      bool is_image = is_texture && (mem.data_height > 1);
-
-      /* Can't move this type of memory. */
-      if (!is_texture || cmem->array) {
-        continue;
-      }
-
-      /* For other textures, only move image textures. */
-      if (for_texture && !is_image) {
-        continue;
-      }
-
-      /* Try to move largest allocation, prefer moving images. */
-      if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
-        max_is_image = is_image;
-        max_size = mem.device_size;
-        max_mem = &mem;
-      }
-    }
-    lock.unlock();
-
-    /* Move to host memory. This part is mutex protected since
-     * multiple CUDA devices could be moving the memory. The
-     * first one will do it, and the rest will adopt the pointer. */
-    if (max_mem) {
-      VLOG_WORK << "Move memory from device to host: " << max_mem->name;
-
-      static thread_mutex move_mutex;
-      thread_scoped_lock lock(move_mutex);
-
-      any_device_moving_textures_to_host = true;
-
-      /* Potentially need to call back into multi device, so pointer mapping
-       * and peer devices are updated. This is also necessary since the device
-       * pointer may just be a key here, so cannot be accessed and freed directly.
-       * Unfortunately it does mean that memory is reallocated on all other
-       * devices as well, which is potentially dangerous when still in use (since
-       * a thread rendering on another devices would only be caught in this mutex
-       * if it so happens to do an allocation at the same time as well. */
-      max_mem->device_copy_to();
-      size = (max_size >= size) ? 0 : size - max_size;
-
-      any_device_moving_textures_to_host = false;
-    }
-    else {
-      break;
-    }
-  }
-
-  /* Unset flag before texture info is reloaded, since it should stay in device memory. */
-  move_texture_to_host = false;
-
-  /* Update texture info array with new pointers. */
-  load_texture_info();
-}
-
-CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_padding)
+void CUDADevice::get_device_memory_info(size_t &total, size_t &free)
 {
  CUDAContextScope scope(this);

-  CUdeviceptr device_pointer = 0;
-  size_t size = mem.memory_size() + pitch_padding;
-
-  CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY;
-  const char *status = "";
-
-  /* First try allocating in device memory, respecting headroom. We make
-   * an exception for texture info. It is small and frequently accessed,
-   * so treat it as working memory.
-   *
-   * If there is not enough room for working memory, we will try to move
-   * textures to host memory, assuming the performance impact would have
-   * been worse for working memory. */
-  bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
-  bool is_image = is_texture && (mem.data_height > 1);
-
-  size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
-
-  size_t total = 0, free = 0;
  cuMemGetInfo(&free, &total);
-
-  /* Move textures to host memory if needed. */
-  if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
-    move_textures_to_host(size + headroom - free, is_texture);
-    cuMemGetInfo(&free, &total);
-  }
-
-  /* Allocate in device memory. */
-  if (!move_texture_to_host && (size + headroom) < free) {
-    mem_alloc_result = cuMemAlloc(&device_pointer, size);
-    if (mem_alloc_result == CUDA_SUCCESS) {
-      status = " in device memory";
-    }
-  }
-
-  /* Fall back to mapped host memory if needed and possible. */
-
-  void *shared_pointer = 0;
-
-  if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) {
-    if (mem.shared_pointer) {
-      /* Another device already allocated host memory. */
-      mem_alloc_result = CUDA_SUCCESS;
-      shared_pointer = mem.shared_pointer;
-    }
-    else if (map_host_used + size < map_host_limit) {
-      /* Allocate host memory ourselves. */
-      mem_alloc_result = cuMemHostAlloc(
-          &shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
-
-      assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) ||
-             (mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0));
-    }
-
-    if (mem_alloc_result == CUDA_SUCCESS) {
-      cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0));
-      map_host_used += size;
-      status = " in host memory";
-    }
-  }
-
-  if (mem_alloc_result != CUDA_SUCCESS) {
-    if (mem.type == MEM_DEVICE_ONLY) {
-      status = " failed, out of device memory";
-      set_error("System is out of GPU memory");
-    }
-    else {
-      status = " failed, out of device and host memory";
-      set_error("System is out of GPU and shared host memory");
-    }
-  }
-
-  if (mem.name) {
-    VLOG_WORK << "Buffer allocate: " << mem.name << ", "
-              << string_human_readable_number(mem.memory_size()) << " bytes. ("
-              << string_human_readable_size(mem.memory_size()) << ")" << status;
-  }
-
-  mem.device_pointer = (device_ptr)device_pointer;
-  mem.device_size = size;
-  stats.mem_alloc(size);
-
-  if (!mem.device_pointer) {
-    return NULL;
-  }
-
-  /* Insert into map of allocations. */
-  thread_scoped_lock lock(cuda_mem_map_mutex);
-  CUDAMem *cmem = &cuda_mem_map[&mem];
-  if (shared_pointer != 0) {
-    /* Replace host pointer with our host allocation. Only works if
-     * CUDA memory layout is the same and has no pitch padding. Also
-     * does not work if we move textures to host during a render,
-     * since other devices might be using the memory. */
-
-    if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
-        mem.host_pointer != shared_pointer) {
-      memcpy(shared_pointer, mem.host_pointer, size);
-
-      /* A Call to device_memory::host_free() should be preceded by
-       * a call to device_memory::device_free() for host memory
-       * allocated by a device to be handled properly. Two exceptions
-       * are here and a call in OptiXDevice::generic_alloc(), where
-       * the current host memory can be assumed to be allocated by
-       * device_memory::host_alloc(), not by a device */
-
-      mem.host_free();
-      mem.host_pointer = shared_pointer;
-    }
-    mem.shared_pointer = shared_pointer;
-    mem.shared_counter++;
-    cmem->use_mapped_host = true;
-  }
-  else {
-    cmem->use_mapped_host = false;
-  }
-
-  return cmem;
 }

-void CUDADevice::generic_copy_to(device_memory &mem)
+bool CUDADevice::alloc_device(void *&device_pointer, size_t size)
 {
-  if (!mem.host_pointer || !mem.device_pointer) {
-    return;
-  }
+  CUDAContextScope scope(this);

-  /* If use_mapped_host of mem is false, the current device only uses device memory allocated by
-   * cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
-   * mem.host_pointer. */
-  thread_scoped_lock lock(cuda_mem_map_mutex);
-  if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
-    const CUDAContextScope scope(this);
-    cuda_assert(
-        cuMemcpyHtoD((CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size()));
-  }
+  CUresult mem_alloc_result = cuMemAlloc((CUdeviceptr *)&device_pointer, size);
+  return mem_alloc_result == CUDA_SUCCESS;
 }

-void CUDADevice::generic_free(device_memory &mem)
+void CUDADevice::free_device(void *device_pointer)
 {
-  if (mem.device_pointer) {
-    CUDAContextScope scope(this);
-    thread_scoped_lock lock(cuda_mem_map_mutex);
-    DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
-    const CUDAMem &cmem = cuda_mem_map[&mem];
+  CUDAContextScope scope(this);

-    /* If cmem.use_mapped_host is true, reference counting is used
-     * to safely free a mapped host memory. */
+  cuda_assert(cuMemFree((CUdeviceptr)device_pointer));
+}

-    if (cmem.use_mapped_host) {
-      assert(mem.shared_pointer);
-      if (mem.shared_pointer) {
-        assert(mem.shared_counter > 0);
-        if (--mem.shared_counter == 0) {
-          if (mem.host_pointer == mem.shared_pointer) {
-            mem.host_pointer = 0;
-          }
-          cuMemFreeHost(mem.shared_pointer);
-          mem.shared_pointer = 0;
-        }
-      }
-      map_host_used -= mem.device_size;
-    }
-    else {
-      /* Free device memory. */
-      cuda_assert(cuMemFree(mem.device_pointer));
-    }
+bool CUDADevice::alloc_host(void *&shared_pointer, size_t size)
+{
+  CUDAContextScope scope(this);

-    stats.mem_free(mem.device_size);
-    mem.device_pointer = 0;
-    mem.device_size = 0;
+  CUresult mem_alloc_result = cuMemHostAlloc(
+      &shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
+  return mem_alloc_result == CUDA_SUCCESS;
+}

-    cuda_mem_map.erase(cuda_mem_map.find(&mem));
-  }
+void CUDADevice::free_host(void *shared_pointer)
+{
+  CUDAContextScope scope(this);
+
+  cuMemFreeHost(shared_pointer);
+}
+
+bool CUDADevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
+{
+  CUDAContextScope scope(this);
+
+  cuda_assert(cuMemHostGetDevicePointer_v2((CUdeviceptr *)&device_pointer, shared_pointer, 0));
+  return true;
+}
+
+void CUDADevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
+{
+  const CUDAContextScope scope(this);
+
+  cuda_assert(cuMemcpyHtoD((CUdeviceptr)device_pointer, host_pointer, size));
 }

 void CUDADevice::mem_alloc(device_memory &mem)
@@ -868,8 +613,8 @@ void CUDADevice::mem_zero(device_memory &mem)

  /* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
   * regardless of mem.host_pointer and mem.shared_pointer. */
-  thread_scoped_lock lock(cuda_mem_map_mutex);
-  if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
+  thread_scoped_lock lock(device_mem_map_mutex);
+  if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
    const CUDAContextScope scope(this);
    cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
  }
@@ -994,19 +739,19 @@ void CUDADevice::tex_alloc(device_texture &mem)
      return;
  }

-  CUDAMem *cmem = NULL;
+  Mem *cmem = NULL;
  CUarray array_3d = NULL;
  size_t src_pitch = mem.data_width * dsize * mem.data_elements;
  size_t dst_pitch = src_pitch;

  if (!mem.is_resident(this)) {
-    thread_scoped_lock lock(cuda_mem_map_mutex);
-    cmem = &cuda_mem_map[&mem];
+    thread_scoped_lock lock(device_mem_map_mutex);
+    cmem = &device_mem_map[&mem];
    cmem->texobject = 0;

    if (mem.data_depth > 1) {
      array_3d = (CUarray)mem.device_pointer;
-      cmem->array = array_3d;
+      cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
    }
    else if (mem.data_height > 0) {
      dst_pitch = align_up(src_pitch, pitch_alignment);
@@ -1050,10 +795,10 @@ void CUDADevice::tex_alloc(device_texture &mem)
    mem.device_size = size;
    stats.mem_alloc(size);

-    thread_scoped_lock lock(cuda_mem_map_mutex);
-    cmem = &cuda_mem_map[&mem];
+    thread_scoped_lock lock(device_mem_map_mutex);
+    cmem = &device_mem_map[&mem];
    cmem->texobject = 0;
-    cmem->array = array_3d;
+    cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
  }
  else if (mem.data_height > 0) {
    /* 2D texture, using pitch aligned linear memory. */
@@ -1137,8 +882,8 @@ void CUDADevice::tex_alloc(device_texture &mem)
    texDesc.filterMode = filter_mode;
    texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;

-    thread_scoped_lock lock(cuda_mem_map_mutex);
-    cmem = &cuda_mem_map[&mem];
+    thread_scoped_lock lock(device_mem_map_mutex);
+    cmem = &device_mem_map[&mem];

    cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));

@@ -1153,9 +898,9 @@ void CUDADevice::tex_free(device_texture &mem)
 {
  if (mem.device_pointer) {
    CUDAContextScope scope(this);
-    thread_scoped_lock lock(cuda_mem_map_mutex);
-    DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
-    const CUDAMem &cmem = cuda_mem_map[&mem];
+    thread_scoped_lock lock(device_mem_map_mutex);
+    DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
+    const Mem &cmem = device_mem_map[&mem];

    if (cmem.texobject) {
      /* Free bindless texture. */
@@ -1164,16 +909,16 @@ void CUDADevice::tex_free(device_texture &mem)

    if (!mem.is_resident(this)) {
      /* Do not free memory here, since it was allocated on a different device. */
-      cuda_mem_map.erase(cuda_mem_map.find(&mem));
+      device_mem_map.erase(device_mem_map.find(&mem));
    }
    else if (cmem.array) {
      /* Free array. */
-      cuArrayDestroy(cmem.array);
+      cuArrayDestroy(reinterpret_cast<CUarray>(cmem.array));
      stats.mem_free(mem.device_size);
      mem.device_pointer = 0;
      mem.device_size = 0;

-      cuda_mem_map.erase(cuda_mem_map.find(&mem));
+      device_mem_map.erase(device_mem_map.find(&mem));
    }
    else {
      lock.unlock();
--- a/intern/cycles/device/cuda/device_impl.h
+++ b/intern/cycles/device/cuda/device_impl.h
@@ -21,7 +21,7 @@ CCL_NAMESPACE_BEGIN

 class DeviceQueue;

-class CUDADevice : public Device {
+class CUDADevice : public GPUDevice {

  friend class CUDAContextScope;

@@ -29,36 +29,11 @@ class CUDADevice : public Device {
  CUdevice cuDevice;
  CUcontext cuContext;
  CUmodule cuModule;
-  size_t device_texture_headroom;
-  size_t device_working_headroom;
-  bool move_texture_to_host;
-  size_t map_host_used;
-  size_t map_host_limit;
-  int can_map_host;
  int pitch_alignment;
  int cuDevId;
  int cuDevArchitecture;
  bool first_error;

-  struct CUDAMem {
-    CUDAMem() : texobject(0), array(0), use_mapped_host(false)
-    {
-    }
-
-    CUtexObject texobject;
-    CUarray array;
-
-    /* If true, a mapped host memory in shared_pointer is being used. */
-    bool use_mapped_host;
-  };
-  typedef map<device_memory *, CUDAMem> CUDAMemMap;
-  CUDAMemMap cuda_mem_map;
-  thread_mutex cuda_mem_map_mutex;
-
-  /* Bindless Textures */
-  device_vector<TextureInfo> texture_info;
-  bool need_texture_info;
-
  CUDADeviceKernels kernels;

  static bool have_precompiled_kernels();
@@ -88,17 +63,13 @@ class CUDADevice : public Device {

  void reserve_local_memory(const uint kernel_features);

-  void init_host_memory();
-
-  void load_texture_info();
-
-  void move_textures_to_host(size_t size, bool for_texture);
-
-  CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
-
-  void generic_copy_to(device_memory &mem);
-
-  void generic_free(device_memory &mem);
+  virtual void get_device_memory_info(size_t &total, size_t &free) override;
+  virtual bool alloc_device(void *&device_pointer, size_t size) override;
+  virtual void free_device(void *device_pointer) override;
+  virtual bool alloc_host(void *&shared_pointer, size_t size) override;
+  virtual void free_host(void *shared_pointer) override;
+  virtual bool transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
+  virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;

  void mem_alloc(device_memory &mem) override;

--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -452,6 +452,320 @@ void *Device::get_cpu_osl_memory()
  return nullptr;
 }

+GPUDevice::~GPUDevice() noexcept(false)
+{
+}
+
+bool GPUDevice::load_texture_info()
+{
+  if (need_texture_info) {
+    /* Unset flag before copying, so this does not loop indefinitely if the copy below calls
+     * into 'move_textures_to_host' (which calls 'load_texture_info' again). */
+    need_texture_info = false;
+    texture_info.copy_to_device();
+    return true;
+  }
+  else {
+    return false;
+  }
+}
+
+void GPUDevice::init_host_memory(size_t preferred_texture_headroom,
+                                 size_t preferred_working_headroom)
+{
+  /* Limit amount of host mapped memory, because allocating too much can
+   * cause system instability. Leave at least half or 4 GB of system
+   * memory free, whichever is smaller. */
+  size_t default_limit = 4 * 1024 * 1024 * 1024LL;
+  size_t system_ram = system_physical_ram();
+
+  if (system_ram > 0) {
+    if (system_ram / 2 > default_limit) {
+      map_host_limit = system_ram - default_limit;
+    }
+    else {
+      map_host_limit = system_ram / 2;
+    }
+  }
+  else {
+    VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
+    map_host_limit = 0;
+  }
+
+  /* Amount of device memory to keep free after texture memory
+   * and working memory allocations respectively. We set the working
+   * memory limit headroom lower than the working one so there
+   * is space left for it. */
+  device_working_headroom = preferred_working_headroom > 0 ? preferred_working_headroom :
+                                                             32 * 1024 * 1024LL;  // 32MB
+  device_texture_headroom = preferred_texture_headroom > 0 ? preferred_texture_headroom :
+                                                             128 * 1024 * 1024LL;  // 128MB
+
+  VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
+            << " bytes. (" << string_human_readable_size(map_host_limit) << ")";
+}
+
+void GPUDevice::move_textures_to_host(size_t size, bool for_texture)
+{
+  /* Break out of recursive call, which can happen when moving memory on a multi device. */
+  static bool any_device_moving_textures_to_host = false;
+  if (any_device_moving_textures_to_host) {
+    return;
+  }
+
+  /* Signal to reallocate textures in host memory only. */
+  move_texture_to_host = true;
+
+  while (size > 0) {
+    /* Find suitable memory allocation to move. */
+    device_memory *max_mem = NULL;
+    size_t max_size = 0;
+    bool max_is_image = false;
+
+    thread_scoped_lock lock(device_mem_map_mutex);
+    foreach (MemMap::value_type &pair, device_mem_map) {
+      device_memory &mem = *pair.first;
+      Mem *cmem = &pair.second;
+
+      /* Can only move textures allocated on this device (and not those from peer devices).
+       * And need to ignore memory that is already on the host. */
+      if (!mem.is_resident(this) || cmem->use_mapped_host) {
+        continue;
+      }
+
+      bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
+                        (&mem != &texture_info);
+      bool is_image = is_texture && (mem.data_height > 1);
+
+      /* Can't move this type of memory. */
+      if (!is_texture || cmem->array) {
+        continue;
+      }
+
+      /* For other textures, only move image textures. */
+      if (for_texture && !is_image) {
+        continue;
+      }
+
+      /* Try to move largest allocation, prefer moving images. */
+      if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
+        max_is_image = is_image;
+        max_size = mem.device_size;
+        max_mem = &mem;
+      }
+    }
+    lock.unlock();
+
+    /* Move to host memory. This part is mutex protected since
+     * multiple backend devices could be moving the memory. The
+     * first one will do it, and the rest will adopt the pointer. */
+    if (max_mem) {
+      VLOG_WORK << "Move memory from device to host: " << max_mem->name;
+
+      static thread_mutex move_mutex;
+      thread_scoped_lock lock(move_mutex);
+
+      any_device_moving_textures_to_host = true;
+
+      /* Potentially need to call back into multi device, so pointer mapping
+       * and peer devices are updated. This is also necessary since the device
+       * pointer may just be a key here, so cannot be accessed and freed directly.
+       * Unfortunately it does mean that memory is reallocated on all other
+       * devices as well, which is potentially dangerous when still in use (since
+       * a thread rendering on another devices would only be caught in this mutex
+       * if it so happens to do an allocation at the same time as well. */
+      max_mem->device_copy_to();
+      size = (max_size >= size) ? 0 : size - max_size;
+
+      any_device_moving_textures_to_host = false;
+    }
+    else {
+      break;
+    }
+  }
+
+  /* Unset flag before texture info is reloaded, since it should stay in device memory. */
+  move_texture_to_host = false;
+
+  /* Update texture info array with new pointers. */
+  load_texture_info();
+}
+
+GPUDevice::Mem *GPUDevice::generic_alloc(device_memory &mem, size_t pitch_padding)
+{
+  void *device_pointer = 0;
+  size_t size = mem.memory_size() + pitch_padding;
+
+  bool mem_alloc_result = false;
+  const char *status = "";
+
+  /* First try allocating in device memory, respecting headroom. We make
+   * an exception for texture info. It is small and frequently accessed,
+   * so treat it as working memory.
+   *
+   * If there is not enough room for working memory, we will try to move
+   * textures to host memory, assuming the performance impact would have
+   * been worse for working memory. */
+  bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
+  bool is_image = is_texture && (mem.data_height > 1);
+
+  size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
+
+  size_t total = 0, free = 0;
+  get_device_memory_info(total, free);
+
+  /* Move textures to host memory if needed. */
+  if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
+    move_textures_to_host(size + headroom - free, is_texture);
+    get_device_memory_info(total, free);
+  }
+
+  /* Allocate in device memory. */
+  if (!move_texture_to_host && (size + headroom) < free) {
+    mem_alloc_result = alloc_device(device_pointer, size);
+    if (mem_alloc_result) {
+      device_mem_in_use += size;
+      status = " in device memory";
+    }
+  }
+
+  /* Fall back to mapped host memory if needed and possible. */
+
+  void *shared_pointer = 0;
+
+  if (!mem_alloc_result && can_map_host && mem.type != MEM_DEVICE_ONLY) {
+    if (mem.shared_pointer) {
+      /* Another device already allocated host memory. */
+      mem_alloc_result = true;
+      shared_pointer = mem.shared_pointer;
+    }
+    else if (map_host_used + size < map_host_limit) {
+      /* Allocate host memory ourselves. */
+      mem_alloc_result = alloc_host(shared_pointer, size);
+
+      assert((mem_alloc_result && shared_pointer != 0) ||
+             (!mem_alloc_result && shared_pointer == 0));
+    }
+
+    if (mem_alloc_result) {
+      assert(transform_host_pointer(device_pointer, shared_pointer));
+      map_host_used += size;
+      status = " in host memory";
+    }
+  }
+
+  if (!mem_alloc_result) {
+    if (mem.type == MEM_DEVICE_ONLY) {
+      status = " failed, out of device memory";
+      set_error("System is out of GPU memory");
+    }
+    else {
+      status = " failed, out of device and host memory";
+      set_error("System is out of GPU and shared host memory");
+    }
+  }
+
+  if (mem.name) {
+    VLOG_WORK << "Buffer allocate: " << mem.name << ", "
+              << string_human_readable_number(mem.memory_size()) << " bytes. ("
+              << string_human_readable_size(mem.memory_size()) << ")" << status;
+  }
+
+  mem.device_pointer = (device_ptr)device_pointer;
+  mem.device_size = size;
+  stats.mem_alloc(size);
+
+  if (!mem.device_pointer) {
+    return NULL;
+  }
+
+  /* Insert into map of allocations. */
+  thread_scoped_lock lock(device_mem_map_mutex);
+  Mem *cmem = &device_mem_map[&mem];
+  if (shared_pointer != 0) {
+    /* Replace host pointer with our host allocation. Only works if
+     * memory layout is the same and has no pitch padding. Also
+     * does not work if we move textures to host during a render,
+     * since other devices might be using the memory. */
+
+    if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
+        mem.host_pointer != shared_pointer) {
+      memcpy(shared_pointer, mem.host_pointer, size);
+
+      /* A Call to device_memory::host_free() should be preceded by
+       * a call to device_memory::device_free() for host memory
+       * allocated by a device to be handled properly. Two exceptions
+       * are here and a call in OptiXDevice::generic_alloc(), where
+       * the current host memory can be assumed to be allocated by
+       * device_memory::host_alloc(), not by a device */
+
+      mem.host_free();
+      mem.host_pointer = shared_pointer;
+    }
+    mem.shared_pointer = shared_pointer;
+    mem.shared_counter++;
+    cmem->use_mapped_host = true;
+  }
+  else {
+    cmem->use_mapped_host = false;
+  }
+
+  return cmem;
+}
+
+void GPUDevice::generic_free(device_memory &mem)
+{
+  if (mem.device_pointer) {
+    thread_scoped_lock lock(device_mem_map_mutex);
+    DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
+    const Mem &cmem = device_mem_map[&mem];
+
+    /* If cmem.use_mapped_host is true, reference counting is used
+     * to safely free a mapped host memory. */
+
+    if (cmem.use_mapped_host) {
+      assert(mem.shared_pointer);
+      if (mem.shared_pointer) {
+        assert(mem.shared_counter > 0);
+        if (--mem.shared_counter == 0) {
+          if (mem.host_pointer == mem.shared_pointer) {
+            mem.host_pointer = 0;
+          }
+          free_host(mem.shared_pointer);
+          mem.shared_pointer = 0;
+        }
+      }
+      map_host_used -= mem.device_size;
+    }
+    else {
+      /* Free device memory. */
+      free_device((void *)mem.device_pointer);
+      device_mem_in_use -= mem.device_size;
+    }
+
+    stats.mem_free(mem.device_size);
+    mem.device_pointer = 0;
+    mem.device_size = 0;
+
+    device_mem_map.erase(device_mem_map.find(&mem));
+  }
+}
+
+void GPUDevice::generic_copy_to(device_memory &mem)
+{
+  if (!mem.host_pointer || !mem.device_pointer) {
+    return;
+  }
+
+  /* If use_mapped_host of mem is false, the current device only uses device memory allocated by
+   * backend device allocation regardless of mem.host_pointer and mem.shared_pointer, and should
+   * copy data from mem.host_pointer. */
+  thread_scoped_lock lock(device_mem_map_mutex);
+  if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
+    copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, mem.memory_size());
+  }
+}
+
 /* DeviceInfo */

 CCL_NAMESPACE_END
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -182,7 +182,7 @@ class Device {
  {
  }

-  /* Return true if device is ready for rendering, or report status if not. */
+  /* Report status and return true if device is ready for rendering. */
  virtual bool is_ready(string & /*status*/) const
  {
    return true;
@@ -309,6 +309,93 @@ class Device {
  static uint devices_initialized_mask;
 };

+/* Device, which is GPU, with some common functionality for GPU backends */
+class GPUDevice : public Device {
+ protected:
+  GPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
+      : Device(info_, stats_, profiler_),
+        texture_info(this, "texture_info", MEM_GLOBAL),
+        need_texture_info(false),
+        can_map_host(false),
+        map_host_used(0),
+        map_host_limit(0),
+        device_texture_headroom(0),
+        device_working_headroom(0),
+        device_mem_map(),
+        device_mem_map_mutex(),
+        move_texture_to_host(false),
+        device_mem_in_use(0)
+  {
+  }
+
+ public:
+  virtual ~GPUDevice() noexcept(false);
+
+  /* For GPUs that can use bindless textures in some way or another. */
+  device_vector<TextureInfo> texture_info;
+  bool need_texture_info;
+  /* Returns true if the texture info was copied to the device (meaning, some more
+   * re-initialization might be needed). */
+  virtual bool load_texture_info();
+
+ protected:
+  /* Memory allocation, only accessed through device_memory. */
+  friend class device_memory;
+
+  bool can_map_host;
+  size_t map_host_used;
+  size_t map_host_limit;
+  size_t device_texture_headroom;
+  size_t device_working_headroom;
+  typedef unsigned long long texMemObject;
+  typedef unsigned long long arrayMemObject;
+  struct Mem {
+    Mem() : texobject(0), array(0), use_mapped_host(false)
+    {
+    }
+
+    texMemObject texobject;
+    arrayMemObject array;
+
+    /* If true, a mapped host memory in shared_pointer is being used. */
+    bool use_mapped_host;
+  };
+  typedef map<device_memory *, Mem> MemMap;
+  MemMap device_mem_map;
+  thread_mutex device_mem_map_mutex;
+  bool move_texture_to_host;
+  /* Simple counter which will try to track amount of used device memory */
+  size_t device_mem_in_use;
+
+  virtual void init_host_memory(size_t preferred_texture_headroom = 0,
+                                size_t preferred_working_headroom = 0);
+  virtual void move_textures_to_host(size_t size, bool for_texture);
+
+  /* Allocation, deallocation and copy functions, with corresponding
+   * support of device/host allocations. */
+  virtual GPUDevice::Mem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
+  virtual void generic_free(device_memory &mem);
+  virtual void generic_copy_to(device_memory &mem);
+
+  /* total - amount of device memory, free - amount of available device memory */
+  virtual void get_device_memory_info(size_t &total, size_t &free) = 0;
+
+  virtual bool alloc_device(void *&device_pointer, size_t size) = 0;
+
+  virtual void free_device(void *device_pointer) = 0;
+
+  virtual bool alloc_host(void *&shared_pointer, size_t size) = 0;
+
+  virtual void free_host(void *shared_pointer) = 0;
+
+  /* This function should return device pointer corresponding to shared pointer, which
+   * is host buffer, allocated in `alloc_host`. The function should `true`, if such
+   * address transformation is possible and `false` otherwise. */
+  virtual bool transform_host_pointer(void *&device_pointer, void *&shared_pointer) = 0;
+
+  virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) = 0;
+};
+
 CCL_NAMESPACE_END

 #endif /* __DEVICE_H__ */
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@@ -53,8 +53,12 @@ void HIPDevice::set_error(const string &error)
 }

 HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
-    : Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
+    : GPUDevice(info, stats, profiler)
 {
+  /* Verify that base class types can be used with specific backend types */
+  static_assert(sizeof(texMemObject) == sizeof(hipTextureObject_t));
+  static_assert(sizeof(arrayMemObject) == sizeof(hArray));
+
  first_error = true;

  hipDevId = info.num;
@@ -65,12 +69,6 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)

  need_texture_info = false;

-  device_texture_headroom = 0;
-  device_working_headroom = 0;
-  move_texture_to_host = false;
-  map_host_limit = 0;
-  map_host_used = 0;
-  can_map_host = 0;
  pitch_alignment = 0;

  /* Initialize HIP. */
@@ -91,7 +89,9 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
  /* hipDeviceMapHost for mapping host memory when out of device memory.
   * hipDeviceLmemResizeToMax for reserving local memory ahead of render,
   * so we can predict which memory to map to host. */
-  hip_assert(hipDeviceGetAttribute(&can_map_host, hipDeviceAttributeCanMapHostMemory, hipDevice));
+  int value;
+  hip_assert(hipDeviceGetAttribute(&value, hipDeviceAttributeCanMapHostMemory, hipDevice));
+  can_map_host = value != 0;

  hip_assert(
      hipDeviceGetAttribute(&pitch_alignment, hipDeviceAttributeTexturePitchAlignment, hipDevice));
@@ -460,305 +460,58 @@ void HIPDevice::reserve_local_memory(const uint kernel_features)
 #  endif
 }

-void HIPDevice::init_host_memory()
-{
-  /* Limit amount of host mapped memory, because allocating too much can
-   * cause system instability. Leave at least half or 4 GB of system
-   * memory free, whichever is smaller. */
-  size_t default_limit = 4 * 1024 * 1024 * 1024LL;
-  size_t system_ram = system_physical_ram();
-
-  if (system_ram > 0) {
-    if (system_ram / 2 > default_limit) {
-      map_host_limit = system_ram - default_limit;
-    }
-    else {
-      map_host_limit = system_ram / 2;
-    }
-  }
-  else {
-    VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
-    map_host_limit = 0;
-  }
-
-  /* Amount of device memory to keep is free after texture memory
-   * and working memory allocations respectively. We set the working
-   * memory limit headroom lower so that some space is left after all
-   * texture memory allocations. */
-  device_working_headroom = 32 * 1024 * 1024LL;   // 32MB
-  device_texture_headroom = 128 * 1024 * 1024LL;  // 128MB
-
-  VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
-            << " bytes. (" << string_human_readable_size(map_host_limit) << ")";
-}
-
-void HIPDevice::load_texture_info()
-{
-  if (need_texture_info) {
-    /* Unset flag before copying, so this does not loop indefinitely if the copy below calls
-     * into 'move_textures_to_host' (which calls 'load_texture_info' again). */
-    need_texture_info = false;
-    texture_info.copy_to_device();
-  }
-}
-
-void HIPDevice::move_textures_to_host(size_t size, bool for_texture)
-{
-  /* Break out of recursive call, which can happen when moving memory on a multi device. */
-  static bool any_device_moving_textures_to_host = false;
-  if (any_device_moving_textures_to_host) {
-    return;
-  }
-
-  /* Signal to reallocate textures in host memory only. */
-  move_texture_to_host = true;
-
-  while (size > 0) {
-    /* Find suitable memory allocation to move. */
-    device_memory *max_mem = NULL;
-    size_t max_size = 0;
-    bool max_is_image = false;
-
-    thread_scoped_lock lock(hip_mem_map_mutex);
-    foreach (HIPMemMap::value_type &pair, hip_mem_map) {
-      device_memory &mem = *pair.first;
-      HIPMem *cmem = &pair.second;
-
-      /* Can only move textures allocated on this device (and not those from peer devices).
-       * And need to ignore memory that is already on the host. */
-      if (!mem.is_resident(this) || cmem->use_mapped_host) {
-        continue;
-      }
-
-      bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
-                        (&mem != &texture_info);
-      bool is_image = is_texture && (mem.data_height > 1);
-
-      /* Can't move this type of memory. */
-      if (!is_texture || cmem->array) {
-        continue;
-      }
-
-      /* For other textures, only move image textures. */
-      if (for_texture && !is_image) {
-        continue;
-      }
-
-      /* Try to move largest allocation, prefer moving images. */
-      if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
-        max_is_image = is_image;
-        max_size = mem.device_size;
-        max_mem = &mem;
-      }
-    }
-    lock.unlock();
-
-    /* Move to host memory. This part is mutex protected since
-     * multiple HIP devices could be moving the memory. The
-     * first one will do it, and the rest will adopt the pointer. */
-    if (max_mem) {
-      VLOG_WORK << "Move memory from device to host: " << max_mem->name;
-
-      static thread_mutex move_mutex;
-      thread_scoped_lock lock(move_mutex);
-
-      any_device_moving_textures_to_host = true;
-
-      /* Potentially need to call back into multi device, so pointer mapping
-       * and peer devices are updated. This is also necessary since the device
-       * pointer may just be a key here, so cannot be accessed and freed directly.
-       * Unfortunately it does mean that memory is reallocated on all other
-       * devices as well, which is potentially dangerous when still in use (since
-       * a thread rendering on another devices would only be caught in this mutex
-       * if it so happens to do an allocation at the same time as well. */
-      max_mem->device_copy_to();
-      size = (max_size >= size) ? 0 : size - max_size;
-
-      any_device_moving_textures_to_host = false;
-    }
-    else {
-      break;
-    }
-  }
-
-  /* Unset flag before texture info is reloaded, since it should stay in device memory. */
-  move_texture_to_host = false;
-
-  /* Update texture info array with new pointers. */
-  load_texture_info();
-}
-
-HIPDevice::HIPMem *HIPDevice::generic_alloc(device_memory &mem, size_t pitch_padding)
+void HIPDevice::get_device_memory_info(size_t &total, size_t &free)
 {
  HIPContextScope scope(this);

-  hipDeviceptr_t device_pointer = 0;
-  size_t size = mem.memory_size() + pitch_padding;
-
-  hipError_t mem_alloc_result = hipErrorOutOfMemory;
-  const char *status = "";
-
-  /* First try allocating in device memory, respecting headroom. We make
-   * an exception for texture info. It is small and frequently accessed,
-   * so treat it as working memory.
-   *
-   * If there is not enough room for working memory, we will try to move
-   * textures to host memory, assuming the performance impact would have
-   * been worse for working memory. */
-  bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
-  bool is_image = is_texture && (mem.data_height > 1);
-
-  size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
-
-  size_t total = 0, free = 0;
  hipMemGetInfo(&free, &total);
-
-  /* Move textures to host memory if needed. */
-  if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
-    move_textures_to_host(size + headroom - free, is_texture);
-    hipMemGetInfo(&free, &total);
-  }
-
-  /* Allocate in device memory. */
-  if (!move_texture_to_host && (size + headroom) < free) {
-    mem_alloc_result = hipMalloc(&device_pointer, size);
-    if (mem_alloc_result == hipSuccess) {
-      status = " in device memory";
-    }
-  }
-
-  /* Fall back to mapped host memory if needed and possible. */
-
-  void *shared_pointer = 0;
-
-  if (mem_alloc_result != hipSuccess && can_map_host) {
-    if (mem.shared_pointer) {
-      /* Another device already allocated host memory. */
-      mem_alloc_result = hipSuccess;
-      shared_pointer = mem.shared_pointer;
-    }
-    else if (map_host_used + size < map_host_limit) {
-      /* Allocate host memory ourselves. */
-      mem_alloc_result = hipHostMalloc(
-          &shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);
-
-      assert((mem_alloc_result == hipSuccess && shared_pointer != 0) ||
-             (mem_alloc_result != hipSuccess && shared_pointer == 0));
-    }
-
-    if (mem_alloc_result == hipSuccess) {
-      hip_assert(hipHostGetDevicePointer(&device_pointer, shared_pointer, 0));
-      map_host_used += size;
-      status = " in host memory";
-    }
-  }
-
-  if (mem_alloc_result != hipSuccess) {
-    status = " failed, out of device and host memory";
-    set_error("System is out of GPU and shared host memory");
-  }
-
-  if (mem.name) {
-    VLOG_WORK << "Buffer allocate: " << mem.name << ", "
-              << string_human_readable_number(mem.memory_size()) << " bytes. ("
-              << string_human_readable_size(mem.memory_size()) << ")" << status;
-  }
-
-  mem.device_pointer = (device_ptr)device_pointer;
-  mem.device_size = size;
-  stats.mem_alloc(size);
-
-  if (!mem.device_pointer) {
-    return NULL;
-  }
-
-  /* Insert into map of allocations. */
-  thread_scoped_lock lock(hip_mem_map_mutex);
-  HIPMem *cmem = &hip_mem_map[&mem];
-  if (shared_pointer != 0) {
-    /* Replace host pointer with our host allocation. Only works if
-     * HIP memory layout is the same and has no pitch padding. Also
-     * does not work if we move textures to host during a render,
-     * since other devices might be using the memory. */
-
-    if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
-        mem.host_pointer != shared_pointer) {
-      memcpy(shared_pointer, mem.host_pointer, size);
-
-      /* A Call to device_memory::host_free() should be preceded by
-       * a call to device_memory::device_free() for host memory
-       * allocated by a device to be handled properly. Two exceptions
-       * are here and a call in OptiXDevice::generic_alloc(), where
-       * the current host memory can be assumed to be allocated by
-       * device_memory::host_alloc(), not by a device */
-
-      mem.host_free();
-      mem.host_pointer = shared_pointer;
-    }
-    mem.shared_pointer = shared_pointer;
-    mem.shared_counter++;
-    cmem->use_mapped_host = true;
-  }
-  else {
-    cmem->use_mapped_host = false;
-  }
-
-  return cmem;
 }

-void HIPDevice::generic_copy_to(device_memory &mem)
+bool HIPDevice::alloc_device(void *&device_pointer, size_t size)
 {
-  if (!mem.host_pointer || !mem.device_pointer) {
-    return;
-  }
+  HIPContextScope scope(this);

-  /* If use_mapped_host of mem is false, the current device only uses device memory allocated by
-   * hipMalloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
-   * mem.host_pointer. */
-  thread_scoped_lock lock(hip_mem_map_mutex);
-  if (!hip_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
-    const HIPContextScope scope(this);
-    hip_assert(
-        hipMemcpyHtoD((hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size()));
-  }
+  hipError_t mem_alloc_result = hipMalloc((hipDeviceptr_t *)&device_pointer, size);
+  return mem_alloc_result == hipSuccess;
 }

-void HIPDevice::generic_free(device_memory &mem)
+void HIPDevice::free_device(void *device_pointer)
 {
-  if (mem.device_pointer) {
-    HIPContextScope scope(this);
-    thread_scoped_lock lock(hip_mem_map_mutex);
-    DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
-    const HIPMem &cmem = hip_mem_map[&mem];
+  HIPContextScope scope(this);

-    /* If cmem.use_mapped_host is true, reference counting is used
-     * to safely free a mapped host memory. */
+  hip_assert(hipFree((hipDeviceptr_t)device_pointer));
+}

-    if (cmem.use_mapped_host) {
-      assert(mem.shared_pointer);
-      if (mem.shared_pointer) {
-        assert(mem.shared_counter > 0);
-        if (--mem.shared_counter == 0) {
-          if (mem.host_pointer == mem.shared_pointer) {
-            mem.host_pointer = 0;
-          }
-          hipHostFree(mem.shared_pointer);
-          mem.shared_pointer = 0;
-        }
-      }
-      map_host_used -= mem.device_size;
-    }
-    else {
-      /* Free device memory. */
-      hip_assert(hipFree(mem.device_pointer));
-    }
+bool HIPDevice::alloc_host(void *&shared_pointer, size_t size)
+{
+  HIPContextScope scope(this);

-    stats.mem_free(mem.device_size);
-    mem.device_pointer = 0;
-    mem.device_size = 0;
+  hipError_t mem_alloc_result = hipHostMalloc(
+      &shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);

-    hip_mem_map.erase(hip_mem_map.find(&mem));
-  }
+  return mem_alloc_result == hipSuccess;
+}
+
+void HIPDevice::free_host(void *shared_pointer)
+{
+  HIPContextScope scope(this);
+
+  hipHostFree(shared_pointer);
+}
+
+bool HIPDevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
+{
+  HIPContextScope scope(this);
+
+  hip_assert(hipHostGetDevicePointer((hipDeviceptr_t *)&device_pointer, shared_pointer, 0));
+  return true;
+}
+
+void HIPDevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
+{
+  const HIPContextScope scope(this);
+
+  hip_assert(hipMemcpyHtoD((hipDeviceptr_t)device_pointer, host_pointer, size));
 }

 void HIPDevice::mem_alloc(device_memory &mem)
@@ -823,8 +576,8 @@ void HIPDevice::mem_zero(device_memory &mem)

  /* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
   * regardless of mem.host_pointer and mem.shared_pointer. */
-  thread_scoped_lock lock(hip_mem_map_mutex);
-  if (!hip_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
+  thread_scoped_lock lock(device_mem_map_mutex);
+  if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
    const HIPContextScope scope(this);
    hip_assert(hipMemsetD8((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size()));
  }
@@ -951,19 +704,19 @@ void HIPDevice::tex_alloc(device_texture &mem)
      return;
  }

-  HIPMem *cmem = NULL;
+  Mem *cmem = NULL;
  hArray array_3d = NULL;
  size_t src_pitch = mem.data_width * dsize * mem.data_elements;
  size_t dst_pitch = src_pitch;

  if (!mem.is_resident(this)) {
-    thread_scoped_lock lock(hip_mem_map_mutex);
-    cmem = &hip_mem_map[&mem];
+    thread_scoped_lock lock(device_mem_map_mutex);
+    cmem = &device_mem_map[&mem];
    cmem->texobject = 0;

    if (mem.data_depth > 1) {
      array_3d = (hArray)mem.device_pointer;
-      cmem->array = array_3d;
+      cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
    }
    else if (mem.data_height > 0) {
      dst_pitch = align_up(src_pitch, pitch_alignment);
@@ -1007,10 +760,10 @@ void HIPDevice::tex_alloc(device_texture &mem)
    mem.device_size = size;
    stats.mem_alloc(size);

-    thread_scoped_lock lock(hip_mem_map_mutex);
-    cmem = &hip_mem_map[&mem];
+    thread_scoped_lock lock(device_mem_map_mutex);
+    cmem = &device_mem_map[&mem];
    cmem->texobject = 0;
-    cmem->array = array_3d;
+    cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
  }
  else if (mem.data_height > 0) {
    /* 2D texture, using pitch aligned linear memory. */
@@ -1095,8 +848,8 @@ void HIPDevice::tex_alloc(device_texture &mem)
    texDesc.filterMode = filter_mode;
    texDesc.flags = HIP_TRSF_NORMALIZED_COORDINATES;

-    thread_scoped_lock lock(hip_mem_map_mutex);
-    cmem = &hip_mem_map[&mem];
+    thread_scoped_lock lock(device_mem_map_mutex);
+    cmem = &device_mem_map[&mem];

    hip_assert(hipTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));

@@ -1111,9 +864,9 @@ void HIPDevice::tex_free(device_texture &mem)
 {
  if (mem.device_pointer) {
    HIPContextScope scope(this);
-    thread_scoped_lock lock(hip_mem_map_mutex);
-    DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
-    const HIPMem &cmem = hip_mem_map[&mem];
+    thread_scoped_lock lock(device_mem_map_mutex);
+    DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
+    const Mem &cmem = device_mem_map[&mem];

    if (cmem.texobject) {
      /* Free bindless texture. */
@@ -1122,16 +875,16 @@ void HIPDevice::tex_free(device_texture &mem)

    if (!mem.is_resident(this)) {
      /* Do not free memory here, since it was allocated on a different device. */
-      hip_mem_map.erase(hip_mem_map.find(&mem));
+      device_mem_map.erase(device_mem_map.find(&mem));
    }
    else if (cmem.array) {
      /* Free array. */
-      hipArrayDestroy(cmem.array);
+      hipArrayDestroy(reinterpret_cast<hArray>(cmem.array));
      stats.mem_free(mem.device_size);
      mem.device_pointer = 0;
      mem.device_size = 0;

-      hip_mem_map.erase(hip_mem_map.find(&mem));
+      device_mem_map.erase(device_mem_map.find(&mem));
    }
    else {
      lock.unlock();
@@ -1153,7 +906,7 @@ bool HIPDevice::should_use_graphics_interop()
   * possible, but from the empiric measurements it can be considerably slower than using naive
   * pixels copy. */

-  /* Disable graphics interop for now, because of driver bug in 21.40. See T92972 */
+  /* Disable graphics interop for now, because of driver bug in 21.40. See #92972 */
 #  if 0
  HIPContextScope scope(this);

--- a/intern/cycles/device/hip/device_impl.h
+++ b/intern/cycles/device/hip/device_impl.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN

 class DeviceQueue;

-class HIPDevice : public Device {
+class HIPDevice : public GPUDevice {

  friend class HIPContextScope;

@@ -26,36 +26,11 @@ class HIPDevice : public Device {
  hipDevice_t hipDevice;
  hipCtx_t hipContext;
  hipModule_t hipModule;
-  size_t device_texture_headroom;
-  size_t device_working_headroom;
-  bool move_texture_to_host;
-  size_t map_host_used;
-  size_t map_host_limit;
-  int can_map_host;
  int pitch_alignment;
  int hipDevId;
  int hipDevArchitecture;
  bool first_error;

-  struct HIPMem {
-    HIPMem() : texobject(0), array(0), use_mapped_host(false)
-    {
-    }
-
-    hipTextureObject_t texobject;
-    hArray array;
-
-    /* If true, a mapped host memory in shared_pointer is being used. */
-    bool use_mapped_host;
-  };
-  typedef map<device_memory *, HIPMem> HIPMemMap;
-  HIPMemMap hip_mem_map;
-  thread_mutex hip_mem_map_mutex;
-
-  /* Bindless Textures */
-  device_vector<TextureInfo> texture_info;
-  bool need_texture_info;
-
  HIPDeviceKernels kernels;

  static bool have_precompiled_kernels();
@@ -81,17 +56,13 @@ class HIPDevice : public Device {
  virtual bool load_kernels(const uint kernel_features) override;
  void reserve_local_memory(const uint kernel_features);

-  void init_host_memory();
-
-  void load_texture_info();
-
-  void move_textures_to_host(size_t size, bool for_texture);
-
-  HIPMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
-
-  void generic_copy_to(device_memory &mem);
-
-  void generic_free(device_memory &mem);
+  virtual void get_device_memory_info(size_t &total, size_t &free) override;
+  virtual bool alloc_device(void *&device_pointer, size_t size) override;
+  virtual void free_device(void *device_pointer) override;
+  virtual bool alloc_host(void *&shared_pointer, size_t size) override;
+  virtual void free_host(void *shared_pointer) override;
+  virtual bool transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
+  virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;

  void mem_alloc(device_memory &mem) override;

--- a/intern/cycles/device/hip/util.h
+++ b/intern/cycles/device/hip/util.h
@@ -51,7 +51,7 @@ static inline bool hipSupportsDevice(const int hipDevId)
  hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
  hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);

-  return (major >= 10);
+  return (major >= 9);
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/device/memory.h
+++ b/intern/cycles/device/memory.h
@@ -108,9 +108,10 @@ template<> struct device_type_traits<uint2> {
 };

 template<> struct device_type_traits<uint3> {
-  static const DataType data_type = TYPE_UINT;
-  static const size_t num_elements = 3;
-  static_assert(sizeof(uint3) == num_elements * datatype_size(data_type));
+  /* uint3 has different size depending on the device, can't use it for interchanging
+   * memory between CPU and GPU.
+   *
+   * Leave body empty to trigger a compile error if used. */
 };

 template<> struct device_type_traits<uint4> {
@@ -132,9 +133,10 @@ template<> struct device_type_traits<int2> {
 };

 template<> struct device_type_traits<int3> {
-  static const DataType data_type = TYPE_INT;
-  static const size_t num_elements = 4;
-  static_assert(sizeof(int3) == num_elements * datatype_size(data_type));
+  /* int3 has different size depending on the device, can't use it for interchanging
+   * memory between CPU and GPU.
+   *
+   * Leave body empty to trigger a compile error if used. */
 };

 template<> struct device_type_traits<int4> {
@@ -247,6 +249,8 @@ class device_memory {
  bool is_resident(Device *sub_device) const;

 protected:
+  friend class Device;
+  friend class GPUDevice;
  friend class CUDADevice;
  friend class OptiXDevice;
  friend class HIPDevice;
--- a/intern/cycles/device/metal/device.mm
+++ b/intern/cycles/device/metal/device.mm
@@ -55,9 +55,8 @@ void device_metal_info(vector<DeviceInfo> &devices)
    info.denoisers = DENOISER_NONE;
    info.id = id;

-    if (MetalInfo::get_device_vendor(device) == METAL_GPU_AMD) {
-      info.has_light_tree = false;
-    }
+    info.has_nanovdb = MetalInfo::get_device_vendor(device) == METAL_GPU_APPLE;
+    info.has_light_tree = MetalInfo::get_device_vendor(device) != METAL_GPU_AMD;

    devices.push_back(info);
    device_index++;
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -29,7 +29,8 @@ class MetalDevice : public Device {
  id<MTLArgumentEncoder> mtlAncillaryArgEncoder =
      nil; /* encoder used for fetching device pointers from MTLBuffers */
  string source[PSO_NUM];
-  string source_md5[PSO_NUM];
+  string kernels_md5[PSO_NUM];
+  string global_defines_md5[PSO_NUM];

  bool capture_enabled = false;

@@ -67,9 +68,12 @@ class MetalDevice : public Device {
  std::recursive_mutex metal_mem_map_mutex;

  /* Bindless Textures */
+  bool is_texture(const TextureInfo &tex);
  device_vector<TextureInfo> texture_info;
  bool need_texture_info;
  id<MTLArgumentEncoder> mtlTextureArgEncoder = nil;
+  id<MTLArgumentEncoder> mtlBufferArgEncoder = nil;
+  id<MTLBuffer> buffer_bindings_1d = nil;
  id<MTLBuffer> texture_bindings_2d = nil;
  id<MTLBuffer> texture_bindings_3d = nil;
  std::vector<id<MTLTexture>> texture_slot_map;
@@ -112,6 +116,10 @@ class MetalDevice : public Device {

  bool use_local_atomic_sort() const;

+  string preprocess_source(MetalPipelineType pso_type,
+                           const uint kernel_features,
+                           string *source = nullptr);
+
  bool make_source_and_check_if_compile_needed(MetalPipelineType pso_type);

  void make_source(MetalPipelineType pso_type, const uint kernel_features);
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -91,11 +91,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
    }
  }

-  texture_bindings_2d = [mtlDevice newBufferWithLength:4096 options:default_storage_mode];
-  texture_bindings_3d = [mtlDevice newBufferWithLength:4096 options:default_storage_mode];
-
-  stats.mem_alloc(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
-
  switch (device_vendor) {
    default:
      break;
@@ -105,6 +100,7 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
    }
    case METAL_GPU_AMD: {
      max_threads_per_threadgroup = 128;
+      use_metalrt = info.use_metalrt;
      break;
    }
    case METAL_GPU_APPLE: {
@@ -155,6 +151,16 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
  arg_desc_texture.dataType = MTLDataTypeTexture;
  arg_desc_texture.access = MTLArgumentAccessReadOnly;
  mtlTextureArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_texture ]];
+  MTLArgumentDescriptor *arg_desc_buffer = [[MTLArgumentDescriptor alloc] init];
+  arg_desc_buffer.dataType = MTLDataTypePointer;
+  arg_desc_buffer.access = MTLArgumentAccessReadOnly;
+  mtlBufferArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_buffer ]];
+
+  buffer_bindings_1d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
+  texture_bindings_2d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
+  texture_bindings_3d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
+  stats.mem_alloc(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
+                  texture_bindings_3d.allocatedSize);

  /* command queue for non-tracing work on the GPU */
  mtlGeneralCommandQueue = [mtlDevice newCommandQueue];
@@ -179,6 +185,8 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
    arg_desc_tex.dataType = MTLDataTypePointer;
    arg_desc_tex.access = MTLArgumentAccessReadOnly;

+    arg_desc_tex.index = index++;
+    [ancillary_desc addObject:[arg_desc_tex copy]]; /* metal_buf_1d */
    arg_desc_tex.index = index++;
    [ancillary_desc addObject:[arg_desc_tex copy]]; /* metal_tex_2d */
    arg_desc_tex.index = index++;
@@ -224,11 +232,15 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
    mtlAncillaryArgEncoder = [mtlDevice newArgumentEncoderWithArguments:ancillary_desc];

    // preparing the blas arg encoder
-    MTLArgumentDescriptor *arg_desc_blas = [[MTLArgumentDescriptor alloc] init];
-    arg_desc_blas.dataType = MTLDataTypeInstanceAccelerationStructure;
-    arg_desc_blas.access = MTLArgumentAccessReadOnly;
-    mtlBlasArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_blas ]];
-    [arg_desc_blas release];
+    if (@available(macos 11.0, *)) {
+      if (use_metalrt) {
+        MTLArgumentDescriptor *arg_desc_blas = [[MTLArgumentDescriptor alloc] init];
+        arg_desc_blas.dataType = MTLDataTypeInstanceAccelerationStructure;
+        arg_desc_blas.access = MTLArgumentAccessReadOnly;
+        mtlBlasArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_blas ]];
+        [arg_desc_blas release];
+      }
+    }

    for (int i = 0; i < ancillary_desc.count; i++) {
      [ancillary_desc[i] release];
@@ -248,22 +260,26 @@ MetalDevice::~MetalDevice()
   * existing_devices_mutex). */
  thread_scoped_lock lock(existing_devices_mutex);

-  for (auto &tex : texture_slot_map) {
-    if (tex) {
-      [tex release];
-      tex = nil;
+  int num_resources = texture_info.size();
+  for (int res = 0; res < num_resources; res++) {
+    if (is_texture(texture_info[res])) {
+      [texture_slot_map[res] release];
+      texture_slot_map[res] = nil;
    }
  }
+
  flush_delayed_free_list();

  if (texture_bindings_2d) {
-    stats.mem_free(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
-
+    stats.mem_free(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
+                   texture_bindings_3d.allocatedSize);
+    [buffer_bindings_1d release];
    [texture_bindings_2d release];
    [texture_bindings_3d release];
  }
  [mtlTextureArgEncoder release];
  [mtlBufferKernelParamsEncoder release];
+  [mtlBufferArgEncoder release];
  [mtlASArgEncoder release];
  [mtlAncillaryArgEncoder release];
  [mtlGeneralCommandQueue release];
@@ -294,7 +310,9 @@ bool MetalDevice::use_local_atomic_sort() const
  return DebugFlags().metal.use_local_atomic_sort;
 }

-void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_features)
+string MetalDevice::preprocess_source(MetalPipelineType pso_type,
+                                      const uint kernel_features,
+                                      string *source)
 {
  string global_defines;
  if (use_adaptive_compilation()) {
@@ -327,6 +345,9 @@ void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_feat
      break;
    case METAL_GPU_APPLE:
      global_defines += "#define __KERNEL_METAL_APPLE__\n";
+#  ifdef WITH_NANOVDB
+      global_defines += "#define WITH_NANOVDB\n";
+#  endif
      break;
  }

@@ -334,6 +355,61 @@ void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_feat
  NSOperatingSystemVersion macos_ver = [processInfo operatingSystemVersion];
  global_defines += "#define __KERNEL_METAL_MACOS__ " + to_string(macos_ver.majorVersion) + "\n";

+  /* Replace specific KernelData "dot" dereferences with a Metal function_constant identifier of
+   * the same character length. Build a string of all active constant values which is then hashed
+   * in order to identify the PSO.
+   */
+  if (pso_type != PSO_GENERIC) {
+    if (source) {
+      const double starttime = time_dt();
+
+#  define KERNEL_STRUCT_BEGIN(name, parent) \
+    string_replace_same_length(*source, "kernel_data." #parent ".", "kernel_data_" #parent "_");
+
+      bool next_member_is_specialized = true;
+
+#  define KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE next_member_is_specialized = false;
+
+#  define KERNEL_STRUCT_MEMBER(parent, _type, name) \
+    if (!next_member_is_specialized) { \
+      string_replace( \
+          *source, "kernel_data_" #parent "_" #name, "kernel_data." #parent ".__unused_" #name); \
+      next_member_is_specialized = true; \
+    }
+
+#  include "kernel/data_template.h"
+
+#  undef KERNEL_STRUCT_MEMBER
+#  undef KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
+#  undef KERNEL_STRUCT_BEGIN
+
+      metal_printf("KernelData patching took %.1f ms\n", (time_dt() - starttime) * 1000.0);
+    }
+
+    /* Opt in to all of available specializations. This can be made more granular for the
+     * PSO_SPECIALIZED_INTERSECT case in order to minimize the number of specialization requests,
+     * but the overhead should be negligible as these are very quick to (re)build and aren't
+     * serialized to disk via MTLBinaryArchives.
+     */
+    global_defines += "#define __KERNEL_USE_DATA_CONSTANTS__\n";
+  }
+
+#  if 0
+  metal_printf("================\n%s================\n",
+               global_defines.c_str());
+#  endif
+
+  if (source) {
+    *source = global_defines + *source;
+  }
+
+  MD5Hash md5;
+  md5.append(global_defines);
+  return md5.get_hex();
+}
+
+void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_features)
+{
  string &source = this->source[pso_type];
  source = "\n#include \"kernel/device/metal/kernel.metal\"\n";
  source = path_source_replace_includes(source, path_get("source"));
@@ -342,62 +418,7 @@ void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_feat
   * With Metal function constants we can generate a single variant of the kernel source which can
   * be repeatedly respecialized.
   */
-  string baked_constants;
-
-  /* Replace specific KernelData "dot" dereferences with a Metal function_constant identifier of
-   * the same character length. Build a string of all active constant values which is then hashed
-   * in order to identify the PSO.
-   */
-  if (pso_type != PSO_GENERIC) {
-    const double starttime = time_dt();
-
-#  define KERNEL_STRUCT_BEGIN(name, parent) \
-    string_replace_same_length(source, "kernel_data." #parent ".", "kernel_data_" #parent "_");
-
-    bool next_member_is_specialized = true;
-
-#  define KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE next_member_is_specialized = false;
-
-    /* Add constants to md5 so that 'get_best_pipeline' is able to return a suitable match. */
-#  define KERNEL_STRUCT_MEMBER(parent, _type, name) \
-    if (next_member_is_specialized) { \
-      baked_constants += string(#parent "." #name "=") + \
-                         to_string(_type(launch_params.data.parent.name)) + "\n"; \
-    } \
-    else { \
-      string_replace( \
-          source, "kernel_data_" #parent "_" #name, "kernel_data." #parent ".__unused_" #name); \
-      next_member_is_specialized = true; \
-    }
-
-#  include "kernel/data_template.h"
-
-    /* Opt in to all of available specializations. This can be made more granular for the
-     * PSO_SPECIALIZED_INTERSECT case in order to minimize the number of specialization requests,
-     * but the overhead should be negligible as these are very quick to (re)build and aren't
-     * serialized to disk via MTLBinaryArchives.
-     */
-    global_defines += "#define __KERNEL_USE_DATA_CONSTANTS__\n";
-
-    metal_printf("KernelData patching took %.1f ms\n", (time_dt() - starttime) * 1000.0);
-  }
-
-  source = global_defines + source;
-#  if 0
-  metal_printf("================\n%s================\n\%s================\n",
-               global_defines.c_str(),
-               baked_constants.c_str());
-#  endif
-
-  /* Generate an MD5 from the source and include any baked constants. This is used when caching
-   * PSOs. */
-  MD5Hash md5;
-  md5.append(baked_constants);
-  md5.append(source);
-  if (use_metalrt) {
-    md5.append(std::to_string(kernel_features & METALRT_FEATURE_MASK));
-  }
-  source_md5[pso_type] = md5.get_hex();
+  global_defines_md5[pso_type] = preprocess_source(pso_type, kernel_features, &source);
 }

 bool MetalDevice::load_kernels(const uint _kernel_features)
@@ -431,9 +452,49 @@ bool MetalDevice::load_kernels(const uint _kernel_features)

 bool MetalDevice::make_source_and_check_if_compile_needed(MetalPipelineType pso_type)
 {
-  if (this->source[pso_type].empty()) {
+  string defines_md5 = preprocess_source(pso_type, kernel_features);
+
+  /* Rebuild the source string if the injected block of #defines has changed. */
+  if (global_defines_md5[pso_type] != defines_md5) {
    make_source(pso_type, kernel_features);
  }
+
+  string constant_values;
+  if (pso_type != PSO_GENERIC) {
+    bool next_member_is_specialized = true;
+
+#  define KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE next_member_is_specialized = false;
+
+    /* Add specialization constants to md5 so that 'get_best_pipeline' is able to return a suitable
+     * match. */
+#  define KERNEL_STRUCT_MEMBER(parent, _type, name) \
+    if (next_member_is_specialized) { \
+      constant_values += string(#parent "." #name "=") + \
+                         to_string(_type(launch_params.data.parent.name)) + "\n"; \
+    } \
+    else { \
+      next_member_is_specialized = true; \
+    }
+
+#  include "kernel/data_template.h"
+
+#  undef KERNEL_STRUCT_MEMBER
+#  undef KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
+
+#  if 0
+    metal_printf("================\n%s================\n",
+                constant_values.c_str());
+#  endif
+  }
+
+  MD5Hash md5;
+  md5.append(constant_values);
+  md5.append(source[pso_type]);
+  if (use_metalrt) {
+    md5.append(string_printf("metalrt_features=%d", kernel_features & METALRT_FEATURE_MASK));
+  }
+  kernels_md5[pso_type] = md5.get_hex();
+
  return MetalDeviceKernels::should_load_kernels(this, pso_type);
 }

@@ -541,6 +602,11 @@ void MetalDevice::compile_and_load(int device_id, MetalPipelineType pso_type)
  }
 }

+bool MetalDevice::is_texture(const TextureInfo &tex)
+{
+  return (tex.depth > 0 || tex.height > 0);
+}
+
 void MetalDevice::load_texture_info()
 {
  if (need_texture_info) {
@@ -552,21 +618,20 @@ void MetalDevice::load_texture_info()

    for (int tex = 0; tex < num_textures; tex++) {
      uint64_t offset = tex * sizeof(void *);
-
-      id<MTLTexture> metal_texture = texture_slot_map[tex];
-      if (!metal_texture) {
-        [mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset];
-        [mtlTextureArgEncoder setTexture:nil atIndex:0];
-        [mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset];
-        [mtlTextureArgEncoder setTexture:nil atIndex:0];
-      }
-      else {
+      if (is_texture(texture_info[tex]) && texture_slot_map[tex]) {
+        id<MTLTexture> metal_texture = texture_slot_map[tex];
        MTLTextureType type = metal_texture.textureType;
        [mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset];
        [mtlTextureArgEncoder setTexture:type == MTLTextureType2D ? metal_texture : nil atIndex:0];
        [mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset];
        [mtlTextureArgEncoder setTexture:type == MTLTextureType3D ? metal_texture : nil atIndex:0];
      }
+      else {
+        [mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset];
+        [mtlTextureArgEncoder setTexture:nil atIndex:0];
+        [mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset];
+        [mtlTextureArgEncoder setTexture:nil atIndex:0];
+      }
    }
    if (default_storage_mode == MTLResourceStorageModeManaged) {
      [texture_bindings_2d didModifyRange:NSMakeRange(0, num_textures * sizeof(void *))];
@@ -585,7 +650,7 @@ void MetalDevice::erase_allocation(device_memory &mem)
  if (it != metal_mem_map.end()) {
    MetalMem *mmem = it->second.get();

-    /* blank out reference to MetalMem* in the launch params (fixes crash T94736) */
+    /* blank out reference to MetalMem* in the launch params (fixes crash #94736) */
    if (mmem->pointer_index >= 0) {
      device_ptr *pointers = (device_ptr *)&launch_params;
      pointers[mmem->pointer_index] = 0;
@@ -739,7 +804,6 @@ void MetalDevice::generic_free(device_memory &mem)
      mem.shared_pointer = 0;

      /* Free device memory. */
-      delayed_free_list.push_back(mmem.mtlBuffer);
      mmem.mtlBuffer = nil;
    }

@@ -861,6 +925,11 @@ void MetalDevice::cancel()

 bool MetalDevice::is_ready(string &status) const
 {
+  if (!error_msg.empty()) {
+    /* Avoid hanging if we had an error. */
+    return true;
+  }
+
  int num_loaded = MetalDeviceKernels::get_loaded_kernel_count(this, PSO_GENERIC);
  if (num_loaded < DEVICE_KERNEL_NUM) {
    status = string_printf("%d / %d render kernels loaded (may take a few minutes the first time)",
@@ -868,6 +937,17 @@ bool MetalDevice::is_ready(string &status) const
                           DEVICE_KERNEL_NUM);
    return false;
  }
+
+  if (int num_requests = MetalDeviceKernels::num_incomplete_specialization_requests()) {
+    status = string_printf("%d kernels to optimize", num_requests);
+  }
+  else if (kernel_specialization_level == PSO_SPECIALIZED_INTERSECT) {
+    status = "Using optimized intersection kernels";
+  }
+  else if (kernel_specialization_level == PSO_SPECIALIZED_SHADE) {
+    status = "Using optimized kernels";
+  }
+
  metal_printf("MetalDevice::is_ready(...) --> true\n");
  return true;
 }
@@ -904,7 +984,7 @@ void MetalDevice::optimize_for_scene(Scene *scene)
  }

  if (specialize_in_background) {
-    if (!MetalDeviceKernels::any_specialization_happening_now()) {
+    if (MetalDeviceKernels::num_incomplete_specialization_requests() == 0) {
      dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
                     specialize_kernels_fn);
    }
@@ -974,7 +1054,7 @@ void MetalDevice::global_free(device_memory &mem)

 void MetalDevice::tex_alloc_as_buffer(device_texture &mem)
 {
-  generic_alloc(mem);
+  MetalDevice::MetalMem *mmem = generic_alloc(mem);
  generic_copy_to(mem);

  /* Resize once */
@@ -983,27 +1063,32 @@ void MetalDevice::tex_alloc_as_buffer(device_texture &mem)
    /* Allocate some slots in advance, to reduce amount
     * of re-allocations. */
    texture_info.resize(round_up(slot + 1, 128));
+    texture_slot_map.resize(round_up(slot + 1, 128));
  }

-  mem.info.data = (uint64_t)mem.device_pointer;
-
-  /* Set Mapping and tag that we need to (re-)upload to device */
  texture_info[slot] = mem.info;
+  uint64_t offset = slot * sizeof(void *);
+  [mtlBufferArgEncoder setArgumentBuffer:buffer_bindings_1d offset:offset];
+  [mtlBufferArgEncoder setBuffer:mmem->mtlBuffer offset:0 atIndex:0];
+  texture_info[slot].data = *(uint64_t *)((uint64_t)buffer_bindings_1d.contents + offset);
+  texture_slot_map[slot] = nil;
  need_texture_info = true;
 }

 void MetalDevice::tex_alloc(device_texture &mem)
 {
  /* Check that dimensions fit within maximum allowable size.
+   * If 1D texture is allocated, use 1D buffer.
   * See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
-  if (mem.data_width > 16384 || mem.data_height > 16384) {
-    set_error(string_printf(
-        "Texture exceeds maximum allowed size of 16384 x 16384 (requested: %zu x %zu)",
-        mem.data_width,
-        mem.data_height));
-    return;
+  if (mem.data_height > 0) {
+    if (mem.data_width > 16384 || mem.data_height > 16384) {
+      set_error(string_printf(
+          "Texture exceeds maximum allowed size of 16384 x 16384 (requested: %zu x %zu)",
+          mem.data_width,
+          mem.data_height));
+      return;
+    }
  }
-
  MTLStorageMode storage_mode = MTLStorageModeManaged;
  if (@available(macos 10.15, *)) {
    if ([mtlDevice hasUnifiedMemory] &&
@@ -1143,8 +1228,9 @@ void MetalDevice::tex_alloc(device_texture &mem)
                  bytesPerRow:src_pitch];
  }
  else {
-    assert(0);
    /* 1D texture, using linear memory. */
+    tex_alloc_as_buffer(mem);
+    return;
  }

  mem.device_pointer = (device_ptr)mtlTexture;
@@ -1168,17 +1254,22 @@ void MetalDevice::tex_alloc(device_texture &mem)
    ssize_t min_buffer_length = sizeof(void *) * texture_info.size();
    if (!texture_bindings_2d || (texture_bindings_2d.length < min_buffer_length)) {
      if (texture_bindings_2d) {
+        delayed_free_list.push_back(buffer_bindings_1d);
        delayed_free_list.push_back(texture_bindings_2d);
        delayed_free_list.push_back(texture_bindings_3d);

-        stats.mem_free(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
+        stats.mem_free(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
+                       texture_bindings_3d.allocatedSize);
      }
+      buffer_bindings_1d = [mtlDevice newBufferWithLength:min_buffer_length
+                                                  options:default_storage_mode];
      texture_bindings_2d = [mtlDevice newBufferWithLength:min_buffer_length
                                                   options:default_storage_mode];
      texture_bindings_3d = [mtlDevice newBufferWithLength:min_buffer_length
                                                   options:default_storage_mode];

-      stats.mem_alloc(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
+      stats.mem_alloc(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
+                      texture_bindings_3d.allocatedSize);
    }
  }

@@ -1205,12 +1296,18 @@ void MetalDevice::tex_alloc(device_texture &mem)

 void MetalDevice::tex_free(device_texture &mem)
 {
+  if (mem.data_depth == 0 && mem.data_height == 0) {
+    generic_free(mem);
+    return;
+  }
+
  if (metal_mem_map.count(&mem)) {
    std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
    MetalMem &mmem = *metal_mem_map.at(&mem);

    assert(texture_slot_map[mem.slot] == mmem.mtlTexture);
-    texture_slot_map[mem.slot] = nil;
+    if (texture_slot_map[mem.slot] == mmem.mtlTexture)
+      texture_slot_map[mem.slot] = nil;

    if (mmem.mtlTexture) {
      /* Free bindless texture. */
--- a/intern/cycles/device/metal/kernel.h
+++ b/intern/cycles/device/metal/kernel.h
@@ -63,8 +63,7 @@ enum MetalPipelineType {
 };

 #  define METALRT_FEATURE_MASK \
-    (KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD | \
-     KERNEL_FEATURE_OBJECT_MOTION)
+    (KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD)

 const char *kernel_type_as_string(MetalPipelineType pso_type);

@@ -76,12 +75,12 @@ struct MetalKernelPipeline {

  id<MTLLibrary> mtlLibrary = nil;
  MetalPipelineType pso_type;
-  string source_md5;
+  string kernels_md5;
  size_t usage_count = 0;

  KernelData kernel_data_;
  bool use_metalrt;
-  uint32_t metalrt_features = 0;
+  uint32_t kernel_features = 0;

  int threads_per_threadgroup;

@@ -104,7 +103,7 @@ struct MetalKernelPipeline {
 /* Cache of Metal kernels for each DeviceKernel. */
 namespace MetalDeviceKernels {

-bool any_specialization_happening_now();
+int num_incomplete_specialization_requests();
 int get_loaded_kernel_count(MetalDevice const *device, MetalPipelineType pso_type);
 bool should_load_kernels(MetalDevice const *device, MetalPipelineType pso_type);
 bool load(MetalDevice *device, MetalPipelineType pso_type);
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -292,7 +292,7 @@ bool ShaderCache::should_load_kernel(DeviceKernel device_kernel,
    /* check whether the kernel has already been requested / cached */
    thread_scoped_lock lock(cache_mutex);
    for (auto &pipeline : pipelines[device_kernel]) {
-      if (pipeline->source_md5 == device->source_md5[pso_type]) {
+      if (pipeline->kernels_md5 == device->kernels_md5[pso_type]) {
        return false;
      }
    }
@@ -332,7 +332,7 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
  memcpy(&pipeline->kernel_data_, &device->launch_params.data, sizeof(pipeline->kernel_data_));
  pipeline->pso_type = pso_type;
  pipeline->mtlDevice = mtlDevice;
-  pipeline->source_md5 = device->source_md5[pso_type];
+  pipeline->kernels_md5 = device->kernels_md5[pso_type];
  pipeline->mtlLibrary = device->mtlLibrary[pso_type];
  pipeline->device_kernel = device_kernel;
  pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;
@@ -344,9 +344,7 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,

  /* metalrt options */
  pipeline->use_metalrt = device->use_metalrt;
-  pipeline->metalrt_features = device->use_metalrt ?
-                                   (device->kernel_features & METALRT_FEATURE_MASK) :
-                                   0;
+  pipeline->kernel_features = device->kernel_features;

  {
    thread_scoped_lock lock(cache_mutex);
@@ -357,65 +355,36 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,

 MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const MetalDevice *device)
 {
-  /* metalrt options */
-  bool use_metalrt = device->use_metalrt;
-  bool device_metalrt_hair = use_metalrt && device->kernel_features & KERNEL_FEATURE_HAIR;
-  bool device_metalrt_hair_thick = use_metalrt &&
-                                   device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
-  bool device_metalrt_pointcloud = use_metalrt &&
-                                   device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
-  bool device_metalrt_motion = use_metalrt &&
-                               device->kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
-
-  MetalKernelPipeline *best_pipeline = nullptr;
-  while (!best_pipeline) {
+  while (running) {
+    /* Search all loaded pipelines with matching kernels_md5 checksums. */
+    MetalKernelPipeline *best_match = nullptr;
    {
      thread_scoped_lock lock(cache_mutex);
-      for (auto &pipeline : pipelines[kernel]) {
-        if (!pipeline->loaded) {
-          /* still loading - ignore */
-          continue;
-        }
-
-        bool pipeline_metalrt_hair = pipeline->metalrt_features & KERNEL_FEATURE_HAIR;
-        bool pipeline_metalrt_hair_thick = pipeline->metalrt_features & KERNEL_FEATURE_HAIR_THICK;
-        bool pipeline_metalrt_pointcloud = pipeline->metalrt_features & KERNEL_FEATURE_POINTCLOUD;
-        bool pipeline_metalrt_motion = use_metalrt &&
-                                       pipeline->metalrt_features & KERNEL_FEATURE_OBJECT_MOTION;
-
-        if (pipeline->use_metalrt != use_metalrt || pipeline_metalrt_hair != device_metalrt_hair ||
-            pipeline_metalrt_hair_thick != device_metalrt_hair_thick ||
-            pipeline_metalrt_pointcloud != device_metalrt_pointcloud ||
-            pipeline_metalrt_motion != device_metalrt_motion) {
-          /* wrong combination of metalrt options */
-          continue;
-        }
-
-        if (pipeline->pso_type != PSO_GENERIC) {
-          if (pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_INTERSECT] ||
-              pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_SHADE]) {
-            best_pipeline = pipeline.get();
+      for (auto &candidate : pipelines[kernel]) {
+        if (candidate->loaded &&
+            candidate->kernels_md5 == device->kernels_md5[candidate->pso_type]) {
+          /* Replace existing match if candidate is more specialized. */
+          if (!best_match || candidate->pso_type > best_match->pso_type) {
+            best_match = candidate.get();
          }
        }
-        else if (!best_pipeline) {
-          best_pipeline = pipeline.get();
-        }
      }
    }

-    if (!best_pipeline) {
-      std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    if (best_match) {
+      if (best_match->usage_count == 0 && best_match->pso_type != PSO_GENERIC) {
+        metal_printf("Swapping in %s version of %s\n",
+                     kernel_type_as_string(best_match->pso_type),
+                     device_kernel_as_string(kernel));
+      }
+      best_match->usage_count += 1;
+      return best_match;
    }
-  }

-  if (best_pipeline->usage_count == 0 && best_pipeline->pso_type != PSO_GENERIC) {
-    metal_printf("Swapping in %s version of %s\n",
-                 kernel_type_as_string(best_pipeline->pso_type),
-                 device_kernel_as_string(kernel));
+    /* Spin until a matching kernel is loaded, or we're shutting down. */
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
  }
-  best_pipeline->usage_count += 1;
-
-  return best_pipeline;
+  return nullptr;
 }

 bool MetalKernelPipeline::should_use_binary_archive() const
@@ -428,11 +397,12 @@ bool MetalKernelPipeline::should_use_binary_archive() const
        return false;
      }
    }
-
-    /* Workaround for Intel GPU having issue using Binary Archives */
-    MetalGPUVendor gpu_vendor = MetalInfo::get_device_vendor(mtlDevice);
-    if (gpu_vendor == METAL_GPU_INTEL) {
-      return false;
+    else {
+      /* Workaround for issues using Binary Archives on non-Apple Silicon systems. */
+      MetalGPUVendor gpu_vendor = MetalInfo::get_device_vendor(mtlDevice);
+      if (gpu_vendor != METAL_GPU_APPLE) {
+        return false;
+      }
    }

    if (pso_type == PSO_GENERIC) {
@@ -440,8 +410,10 @@ bool MetalKernelPipeline::should_use_binary_archive() const
      return true;
    }

-    if (device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND &&
-        device_kernel <= DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW) {
+    if ((device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND &&
+         device_kernel <= DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW) ||
+        (device_kernel >= DEVICE_KERNEL_SHADER_EVAL_DISPLACE &&
+         device_kernel <= DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY)) {
      /* Archive all shade kernels - they take a long time to compile. */
      return true;
    }
@@ -567,18 +539,14 @@ void MetalKernelPipeline::compile()
  NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
  NSArray *linked_functions = nil;

-  bool metalrt_hair = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR);
-  bool metalrt_hair_thick = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR_THICK);
-  bool metalrt_pointcloud = use_metalrt && (metalrt_features & KERNEL_FEATURE_POINTCLOUD);
-
  if (use_metalrt) {
    id<MTLFunction> curve_intersect_default = nil;
    id<MTLFunction> curve_intersect_shadow = nil;
    id<MTLFunction> point_intersect_default = nil;
    id<MTLFunction> point_intersect_shadow = nil;
-    if (metalrt_hair) {
+    if (kernel_features & KERNEL_FEATURE_HAIR) {
      /* Add curve intersection programs. */
-      if (metalrt_hair_thick) {
+      if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
        /* Slower programs for thick hair since that also slows down ribbons.
         * Ideally this should not be needed. */
        curve_intersect_default = rt_intersection_function[METALRT_FUNC_CURVE_ALL];
@@ -589,7 +557,7 @@ void MetalKernelPipeline::compile()
        curve_intersect_shadow = rt_intersection_function[METALRT_FUNC_CURVE_RIBBON_SHADOW];
      }
    }
-    if (metalrt_pointcloud) {
+    if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
      point_intersect_default = rt_intersection_function[METALRT_FUNC_POINT];
      point_intersect_shadow = rt_intersection_function[METALRT_FUNC_POINT_SHADOW];
    }
@@ -674,20 +642,11 @@ void MetalKernelPipeline::compile()
    NSProcessInfo *processInfo = [NSProcessInfo processInfo];
    string osVersion = [[processInfo operatingSystemVersionString] UTF8String];
    MD5Hash local_md5;
-    local_md5.append(source_md5);
+    local_md5.append(kernels_md5);
    local_md5.append(osVersion);
    local_md5.append((uint8_t *)&this->threads_per_threadgroup,
                     sizeof(this->threads_per_threadgroup));

-    string options;
-    if (use_metalrt && kernel_has_intersection(device_kernel)) {
-      /* incorporate any MetalRT specializations into the archive name */
-      options += string_printf(".hair_%d.hair_thick_%d.pointcloud_%d",
-                               metalrt_hair ? 1 : 0,
-                               metalrt_hair_thick ? 1 : 0,
-                               metalrt_pointcloud ? 1 : 0);
-    }
-
    /* Replace non-alphanumerical characters with underscores. */
    string device_name = [mtlDevice.name UTF8String];
    for (char &c : device_name) {
@@ -699,7 +658,7 @@ void MetalKernelPipeline::compile()
    metalbin_name = device_name;
    metalbin_name = path_join(metalbin_name, device_kernel_as_string(device_kernel));
    metalbin_name = path_join(metalbin_name, kernel_type_as_string(pso_type));
-    metalbin_name = path_join(metalbin_name, local_md5.get_hex() + options + ".bin");
+    metalbin_name = path_join(metalbin_name, local_md5.get_hex() + ".bin");

    metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
    path_create_directories(metalbin_path);
@@ -857,16 +816,15 @@ void MetalDeviceKernels::wait_for_all()
  }
 }

-bool MetalDeviceKernels::any_specialization_happening_now()
+int MetalDeviceKernels::num_incomplete_specialization_requests()
 {
  /* Return true if any ShaderCaches have ongoing specialization requests (typically there will be
   * only 1). */
+  int total = 0;
  for (int i = 0; i < g_shaderCacheCount; i++) {
-    if (g_shaderCache[i].second->incomplete_specialization_requests > 0) {
-      return true;
-    }
+    total += g_shaderCache[i].second->incomplete_specialization_requests;
  }
-  return false;
+  return total;
 }

 int MetalDeviceKernels::get_loaded_kernel_count(MetalDevice const *device,
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -477,17 +477,21 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
  [metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->texture_bindings_3d
                                            offset:0
                                           atIndex:1];
+  [metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->buffer_bindings_1d
+                                            offset:0
+                                           atIndex:2];
+
  if (@available(macos 12.0, *)) {
    if (metal_device_->use_metalrt) {
      if (metal_device_->bvhMetalRT) {
        id<MTLAccelerationStructure> accel_struct = metal_device_->bvhMetalRT->accel_struct;
-        [metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:2];
+        [metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:3];
        [metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_buffer
                                                  offset:0
-                                                 atIndex:7];
+                                                 atIndex:8];
        [metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_lookup_buffer
                                                  offset:0
-                                                 atIndex:8];
+                                                 atIndex:9];
      }

      for (int table = 0; table < METALRT_TABLE_NUM; table++) {
@@ -497,13 +501,13 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
                                                              atIndex:1];
          [metal_device_->mtlAncillaryArgEncoder
              setIntersectionFunctionTable:metal_kernel_pso->intersection_func_table[table]
-                                   atIndex:3 + table];
+                                   atIndex:4 + table];
          [mtlComputeCommandEncoder useResource:metal_kernel_pso->intersection_func_table[table]
                                          usage:MTLResourceUsageRead];
        }
        else {
          [metal_device_->mtlAncillaryArgEncoder setIntersectionFunctionTable:nil
-                                                                      atIndex:3 + table];
+                                                                      atIndex:4 + table];
        }
      }
    }
@@ -874,6 +878,7 @@ void MetalDeviceQueue::prepare_resources(DeviceKernel kernel)
  /* ancillaries */
  [mtlComputeEncoder_ useResource:metal_device_->texture_bindings_2d usage:MTLResourceUsageRead];
  [mtlComputeEncoder_ useResource:metal_device_->texture_bindings_3d usage:MTLResourceUsageRead];
+  [mtlComputeEncoder_ useResource:metal_device_->buffer_bindings_1d usage:MTLResourceUsageRead];
 }

 id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel kernel)
--- a/intern/cycles/device/metal/util.mm
+++ b/intern/cycles/device/metal/util.mm
@@ -103,7 +103,7 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
  }

  /* If the system has both an AMD GPU (discrete) and an Intel one (integrated), prefer the AMD
-   * one. This can be overriden with CYCLES_METAL_FORCE_INTEL. */
+   * one. This can be overridden with CYCLES_METAL_FORCE_INTEL. */
  bool has_usable_amd_gpu = false;
  if (@available(macos 12.3, *)) {
    for (id<MTLDevice> device in MTLCopyAllDevices()) {
--- a/intern/cycles/integrator/render_scheduler.cpp
+++ b/intern/cycles/integrator/render_scheduler.cpp
@@ -886,7 +886,7 @@ int RenderScheduler::get_num_samples_during_navigation(int resolution_divider) c
 {
  /* Special trick for fast navigation: schedule multiple samples during fast navigation
   * (which will prefer to use lower resolution to keep up with refresh rate). This gives more
-   * usable visual feedback for artists. There are a couple of tricks though. */
+   * usable visual feedback for artists. */

  if (is_denoise_active_during_update()) {
    /* When denoising is used during navigation prefer using a higher resolution with less samples
@@ -896,25 +896,12 @@ int RenderScheduler::get_num_samples_during_navigation(int resolution_divider) c
    return 1;
  }

-  if (resolution_divider <= pixel_size_) {
-    /* When resolution divider is at or below pixel size, schedule one sample. This doesn't effect
-     * the sample count at this resolution division, but instead assists in the calculation of
-     * the resolution divider. */
-    return 1;
-  }
-
-  if (resolution_divider == pixel_size_ * 2) {
-    /* When resolution divider is the previous step to the final resolution, schedule two samples.
-     * This is so that rendering on lower resolution does not exceed time that it takes to render
-     * first sample at the full resolution. */
-    return 2;
-  }
-
-  /* Always render 4 samples, even if scene is configured for less.
-   * The idea here is to have enough information on the screen. Resolution divider of 2 allows us
-   * to have 4 time extra samples, so overall worst case timing is the same as the final resolution
-   * at one sample. */
-  return 4;
+  /* Schedule samples equal to the resolution divider up to a maximum of 4.
+   * The idea is to have enough information on the screen by increasing the sample count as the
+   * resolution is decreased. */
+  /* NOTE: Changing this formula will change the formula in
+   * `RenderScheduler::calculate_resolution_divider_for_time()`. */
+  return min(max(1, resolution_divider / pixel_size_), 4);
 }

 bool RenderScheduler::work_need_adaptive_filter() const
@@ -1100,9 +1087,10 @@ void RenderScheduler::update_start_resolution_divider()
  /* TODO(sergey): Need to add hysteresis to avoid resolution divider bouncing around when actual
   * render time is somewhere on a boundary between two resolutions. */

-  /* Never increase resolution to higher than the pixel size (which is possible if the scene is
-   * simple and compute device is fast). */
-  start_resolution_divider_ = max(resolution_divider_for_update, pixel_size_);
+  /* Don't let resolution drop below the desired one. It's better to be slow than provide an
+   * unreadable viewport render. */
+  start_resolution_divider_ = min(resolution_divider_for_update,
+                                  default_start_resolution_divider_);

  VLOG_WORK << "Calculated resolution divider is " << start_resolution_divider_;
 }
@@ -1187,24 +1175,24 @@ void RenderScheduler::check_time_limit_reached()

 int RenderScheduler::calculate_resolution_divider_for_time(double desired_time, double actual_time)
 {
-  /* TODO(sergey): There should a non-iterative analytical formula here. */
+  const double ratio_between_times = actual_time / desired_time;

-  int resolution_divider = 1;
+  /* We can pass `ratio_between_times` to `get_num_samples_during_navigation()` to get our
+   * navigation samples because the equation for calculating the resolution divider is as follows:
+   * `actual_time / desired_time = sqr(resolution_divider) / sample_count`.
+   * While `resolution_divider` is less than or equal to 4, `resolution_divider = sample_count`
+   * (This relationship is determined in `get_num_samples_during_navigation()`). With some
+   * substitution we end up with `actual_time / desired_time = resolution_divider` while the
+   * resolution divider is less than or equal to 4. Once the resolution divider increases above 4,
+   * the relationship of `actual_time / desired_time = resolution_divider` is no longer true,
+   * however the sample count retrieved from `get_num_samples_during_navigation()` is still
+   * accurate if we continue using this assumption. It should be noted that the interaction between
+   * `pixel_size`, sample count, and resolution divider are automatically accounted for and that's
+   * why `pixel_size` isn't included in any of the equations. */
+  const int navigation_samples = get_num_samples_during_navigation(
+      ceil_to_int(ratio_between_times));

-  /* This algorithm iterates through resolution dividers until a divider is found that achieves
-   * the desired render time. A limit of default_start_resolution_divider_ is put in place as the
-   * maximum resolution divider to avoid an unreadable viewport due to a low resolution.
-   * pre_resolution_division_samples and post_resolution_division_samples are used in this
-   * calculation to better predict the performance impact of changing resolution divisions as
-   * the sample count can also change between resolution divisions. */
-  while (actual_time > desired_time && resolution_divider < default_start_resolution_divider_) {
-    int pre_resolution_division_samples = get_num_samples_during_navigation(resolution_divider);
-    resolution_divider = resolution_divider * 2;
-    int post_resolution_division_samples = get_num_samples_during_navigation(resolution_divider);
-    actual_time /= 4.0 * pre_resolution_division_samples / post_resolution_division_samples;
-  }
-
-  return resolution_divider;
+  return ceil_to_int(sqrt(navigation_samples * ratio_between_times));
 }

 int calculate_resolution_divider_for_resolution(int width, int height, int resolution)
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -113,8 +113,6 @@ set(SRC_KERNEL_CLOSURE_HEADERS
  closure/bsdf_microfacet_multi_impl.h
  closure/bsdf_oren_nayar.h
  closure/bsdf_phong_ramp.h
-  closure/bsdf_reflection.h
-  closure/bsdf_refraction.h
  closure/bsdf_toon.h
  closure/bsdf_transparent.h
  closure/bsdf_util.h
@@ -126,6 +124,7 @@ set(SRC_KERNEL_CLOSURE_HEADERS
  closure/bsdf_principled_diffuse.h
  closure/bsdf_principled_sheen.h
  closure/bsdf_hair_principled.h
+  closure/bsdf_hair_microfacet.h
 )

 set(SRC_KERNEL_SVM_HEADERS
@@ -412,11 +411,12 @@ if(WITH_CYCLES_CUDA_BINARIES)
  # warn for other versions
  if((CUDA_VERSION STREQUAL "101") OR
     (CUDA_VERSION STREQUAL "102") OR
-     (CUDA_VERSION_MAJOR STREQUAL "11"))
+     (CUDA_VERSION_MAJOR STREQUAL "11") OR
+     (CUDA_VERSION_MAJOR STREQUAL "12"))
  else()
    message(WARNING
      "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
-      "build may succeed but only CUDA 11, 10.2 and 10.1 have been tested")
+      "build may succeed but only CUDA 12, 11, 10.2 and 10.1 have been tested")
  endif()

  # build for each arch
@@ -514,6 +514,16 @@ if(WITH_CYCLES_CUDA_BINARIES)
      else()
        message(STATUS "CUDA binaries for ${arch} require CUDA 10 or earlier, skipped.")
      endif()
+    elseif(${arch} MATCHES ".*_3.")
+      if(DEFINED CUDA11_NVCC_EXECUTABLE)
+        set(cuda_nvcc_executable ${CUDA11_NVCC_EXECUTABLE})
+        set(cuda_toolkit_root_dir ${CUDA11_TOOLKIT_ROOT_DIR})
+      elseif("${CUDA_VERSION}" LESS 120) # Support for sm_35, sm_37 was removed in CUDA 12
+        set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE})
+        set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR})
+      else()
+        message(STATUS "CUDA binaries for ${arch} require CUDA 11 or earlier, skipped.")
+      endif()
    elseif(${arch} MATCHES ".*_7." AND "${CUDA_VERSION}" LESS 100)
      message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
    elseif(${arch} MATCHES ".*_8.")
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -11,13 +11,12 @@
 #include "kernel/closure/bsdf_diffuse_ramp.h"
 #include "kernel/closure/bsdf_microfacet.h"
 #include "kernel/closure/bsdf_microfacet_multi.h"
-#include "kernel/closure/bsdf_reflection.h"
-#include "kernel/closure/bsdf_refraction.h"
 #include "kernel/closure/bsdf_transparent.h"
 #include "kernel/closure/bsdf_ashikhmin_shirley.h"
 #include "kernel/closure/bsdf_toon.h"
 #include "kernel/closure/bsdf_hair.h"
 #include "kernel/closure/bsdf_hair_principled.h"
+#include "kernel/closure/bsdf_hair_microfacet.h"
 #include "kernel/closure/bsdf_principled_diffuse.h"
 #include "kernel/closure/bsdf_principled_sheen.h"
 #include "kernel/closure/bssrdf.h"
@@ -110,8 +109,8 @@ ccl_device_inline bool bsdf_is_transmission(ccl_private const ShaderClosure *sc,
 ccl_device_inline int bsdf_sample(KernelGlobals kg,
                                  ccl_private ShaderData *sd,
                                  ccl_private const ShaderClosure *sc,
-                                  float randu,
-                                  float randv,
+                                  const int path_flag,
+                                  const float3 rand,
                                  ccl_private Spectrum *eval,
                                  ccl_private float3 *wo,
                                  ccl_private float *pdf,
@@ -125,110 +124,160 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,

  switch (sc->type) {
    case CLOSURE_BSDF_DIFFUSE_ID:
-      label = bsdf_diffuse_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
+      label = bsdf_diffuse_sample(sc, Ng, sd->wi, rand.x, rand.y, eval, wo, pdf);
      *sampled_roughness = one_float2();
      *eta = 1.0f;
      break;
 #if defined(__SVM__) || defined(__OSL__)
    case CLOSURE_BSDF_OREN_NAYAR_ID:
-      label = bsdf_oren_nayar_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
+      label = bsdf_oren_nayar_sample(sc, Ng, sd->wi, rand.x, rand.y, eval, wo, pdf);
      *sampled_roughness = one_float2();
      *eta = 1.0f;
      break;
 #  ifdef __OSL__
    case CLOSURE_BSDF_PHONG_RAMP_ID:
      label = bsdf_phong_ramp_sample(
-          sc, Ng, sd->wi, randu, randv, eval, wo, pdf, sampled_roughness);
+          sc, Ng, sd->wi, rand.x, rand.y, eval, wo, pdf, sampled_roughness);
      *eta = 1.0f;
      break;
    case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
-      label = bsdf_diffuse_ramp_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
+      label = bsdf_diffuse_ramp_sample(sc, Ng, sd->wi, rand.x, rand.y, eval, wo, pdf);
      *sampled_roughness = one_float2();
      *eta = 1.0f;
      break;
 #  endif
    case CLOSURE_BSDF_TRANSLUCENT_ID:
-      label = bsdf_translucent_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
+      label = bsdf_translucent_sample(sc, Ng, sd->wi, rand.x, rand.y, eval, wo, pdf);
      *sampled_roughness = one_float2();
      *eta = 1.0f;
      break;
-    case CLOSURE_BSDF_REFLECTION_ID:
-      label = bsdf_reflection_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf, eta);
-      *sampled_roughness = zero_float2();
-      break;
-    case CLOSURE_BSDF_REFRACTION_ID:
-      label = bsdf_refraction_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf, eta);
-      *sampled_roughness = zero_float2();
-      break;
    case CLOSURE_BSDF_TRANSPARENT_ID:
-      label = bsdf_transparent_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
+      label = bsdf_transparent_sample(sc, Ng, sd->wi, rand.x, rand.y, eval, wo, pdf);
      *sampled_roughness = zero_float2();
      *eta = 1.0f;
      break;
+    case CLOSURE_BSDF_REFLECTION_ID:
+    case CLOSURE_BSDF_REFRACTION_ID:
+    case CLOSURE_BSDF_SHARP_GLASS_ID:
+      label = bsdf_microfacet_sharp_sample(sc,
+                                           path_flag,
+                                           Ng,
+                                           sd->wi,
+                                           rand.x,
+                                           rand.y,
+                                           rand.z,
+                                           eval,
+                                           wo,
+                                           pdf,
+                                           sampled_roughness,
+                                           eta);
+      break;
    case CLOSURE_BSDF_MICROFACET_GGX_ID:
-    case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
    case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
    case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
-      label = bsdf_microfacet_ggx_sample(
-          sc, Ng, sd->wi, randu, randv, eval, wo, pdf, sampled_roughness, eta);
+    case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
+      label = bsdf_microfacet_ggx_sample(sc,
+                                         path_flag,
+                                         Ng,
+                                         sd->wi,
+                                         rand.x,
+                                         rand.y,
+                                         rand.z,
+                                         eval,
+                                         wo,
+                                         pdf,
+                                         sampled_roughness,
+                                         eta);
      break;
    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
-    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
-      label = bsdf_microfacet_multi_ggx_sample(
-          kg, sc, Ng, sd->wi, randu, randv, eval, wo, pdf, &sd->lcg_state, sampled_roughness, eta);
+      label = bsdf_microfacet_multi_ggx_sample(kg,
+                                               sc,
+                                               Ng,
+                                               sd->wi,
+                                               rand.x,
+                                               rand.y,
+                                               eval,
+                                               wo,
+                                               pdf,
+                                               &sd->lcg_state,
+                                               sampled_roughness,
+                                               eta);
      break;
    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
-    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
-      label = bsdf_microfacet_multi_ggx_glass_sample(
-          kg, sc, Ng, sd->wi, randu, randv, eval, wo, pdf, &sd->lcg_state, sampled_roughness, eta);
+      label = bsdf_microfacet_multi_ggx_glass_sample(kg,
+                                                     sc,
+                                                     Ng,
+                                                     sd->wi,
+                                                     rand.x,
+                                                     rand.y,
+                                                     eval,
+                                                     wo,
+                                                     pdf,
+                                                     &sd->lcg_state,
+                                                     sampled_roughness,
+                                                     eta);
      break;
    case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
    case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
-      label = bsdf_microfacet_beckmann_sample(
-          sc, Ng, sd->wi, randu, randv, eval, wo, pdf, sampled_roughness, eta);
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID:
+      label = bsdf_microfacet_beckmann_sample(sc,
+                                              path_flag,
+                                              Ng,
+                                              sd->wi,
+                                              rand.x,
+                                              rand.y,
+                                              rand.z,
+                                              eval,
+                                              wo,
+                                              pdf,
+                                              sampled_roughness,
+                                              eta);
      break;
    case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
      label = bsdf_ashikhmin_shirley_sample(
-          sc, Ng, sd->wi, randu, randv, eval, wo, pdf, sampled_roughness);
+          sc, Ng, sd->wi, rand.x, rand.y, eval, wo, pdf, sampled_roughness);
      *eta = 1.0f;
      break;
    case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
-      label = bsdf_ashikhmin_velvet_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
+      label = bsdf_ashikhmin_velvet_sample(sc, Ng, sd->wi, rand.x, rand.y, eval, wo, pdf);
      *sampled_roughness = one_float2();
      *eta = 1.0f;
      break;
    case CLOSURE_BSDF_DIFFUSE_TOON_ID:
-      label = bsdf_diffuse_toon_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
+      label = bsdf_diffuse_toon_sample(sc, Ng, sd->wi, rand.x, rand.y, eval, wo, pdf);
      *sampled_roughness = one_float2();
      *eta = 1.0f;
      break;
    case CLOSURE_BSDF_GLOSSY_TOON_ID:
-      label = bsdf_glossy_toon_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
+      label = bsdf_glossy_toon_sample(sc, Ng, sd->wi, rand.x, rand.y, eval, wo, pdf);
      // double check if this is valid
      *sampled_roughness = one_float2();
      *eta = 1.0f;
      break;
    case CLOSURE_BSDF_HAIR_REFLECTION_ID:
      label = bsdf_hair_reflection_sample(
-          sc, Ng, sd->wi, randu, randv, eval, wo, pdf, sampled_roughness);
+          sc, Ng, sd->wi, rand.x, rand.y, eval, wo, pdf, sampled_roughness);
      *eta = 1.0f;
      break;
    case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
      label = bsdf_hair_transmission_sample(
-          sc, Ng, sd->wi, randu, randv, eval, wo, pdf, sampled_roughness);
+          sc, Ng, sd->wi, rand.x, rand.y, eval, wo, pdf, sampled_roughness);
      *eta = 1.0f;
      break;
    case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
      label = bsdf_principled_hair_sample(
-          kg, sc, sd, randu, randv, eval, wo, pdf, sampled_roughness, eta);
+          kg, sc, sd, rand.x, rand.y, rand.z, eval, wo, pdf, sampled_roughness, eta);
+      break;
+    case CLOSURE_BSDF_HAIR_MICROFACET_ID:
+      label = bsdf_microfacet_hair_sample(kg, sc, sd, rand, eval, wo, pdf, sampled_roughness, eta);
      break;
    case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
-      label = bsdf_principled_diffuse_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
+      label = bsdf_principled_diffuse_sample(sc, Ng, sd->wi, rand.x, rand.y, eval, wo, pdf);
      *sampled_roughness = one_float2();
      *eta = 1.0f;
      break;
    case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
-      label = bsdf_principled_sheen_sample(sc, Ng, sd->wi, randu, randv, eval, wo, pdf);
+      label = bsdf_principled_sheen_sample(sc, Ng, sd->wi, rand.x, rand.y, eval, wo, pdf);
      *sampled_roughness = one_float2();
      *eta = 1.0f;
      break;
@@ -277,7 +326,6 @@ ccl_device_inline void bsdf_roughness_eta(const KernelGlobals kg,
                                          ccl_private float *eta)
 {
 #ifdef __SVM__
-  bool refractive = false;
  float alpha = 1.0f;
 #endif
  switch (sc->type) {
@@ -305,54 +353,32 @@ ccl_device_inline void bsdf_roughness_eta(const KernelGlobals kg,
      *roughness = one_float2();
      *eta = 1.0f;
      break;
-    case CLOSURE_BSDF_REFLECTION_ID: {
-      ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
-      *roughness = zero_float2();
-      *eta = bsdf->ior;
-      break;
-    }
-    case CLOSURE_BSDF_REFRACTION_ID: {
-      ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
-      *roughness = zero_float2();
-      // do we need to inverse eta??
-      *eta = bsdf->ior;
-      break;
-    }
    case CLOSURE_BSDF_TRANSPARENT_ID:
      *roughness = zero_float2();
      *eta = 1.0f;
      break;
+    case CLOSURE_BSDF_REFLECTION_ID:
+    case CLOSURE_BSDF_REFRACTION_ID:
+    case CLOSURE_BSDF_SHARP_GLASS_ID:
    case CLOSURE_BSDF_MICROFACET_GGX_ID:
-    case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
    case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
-    case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: {
+    case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID: {
      ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
      *roughness = make_float2(bsdf->alpha_x, bsdf->alpha_y);
-      refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
-      *eta = refractive ? 1.0f / bsdf->ior : bsdf->ior;
+      *eta = CLOSURE_IS_REFRACTIVE(bsdf->type) ? 1.0f / bsdf->ior : bsdf->ior;
      break;
    }
    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
-    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID: {
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: {
      ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
      *roughness = make_float2(bsdf->alpha_x, bsdf->alpha_y);
      *eta = bsdf->ior;
      break;
    }
-    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
-    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID: {
-      ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
-      *roughness = make_float2(bsdf->alpha_x, bsdf->alpha_y);
-      *eta = bsdf->ior;
-      break;
-    }
-    case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
-    case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: {
-      ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
-      *roughness = make_float2(bsdf->alpha_x, bsdf->alpha_y);
-      refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
-      *eta = refractive ? 1.0f / bsdf->ior : bsdf->ior;
-    } break;
    case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: {
      ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
      *roughness = make_float2(bsdf->alpha_x, bsdf->alpha_y);
@@ -387,6 +413,11 @@ ccl_device_inline void bsdf_roughness_eta(const KernelGlobals kg,
      *roughness = make_float2(alpha, alpha);
      *eta = ((ccl_private PrincipledHairBSDF *)sc)->eta;
      break;
+    case CLOSURE_BSDF_HAIR_MICROFACET_ID:
+      alpha = ((ccl_private MicrofacetHairBSDF *)sc)->roughness;
+      *roughness = make_float2(alpha, alpha);
+      *eta = ((ccl_private MicrofacetHairBSDF *)sc)->eta;
+      break;
    case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
      *roughness = one_float2();
      *eta = 1.0f;
@@ -432,38 +463,26 @@ ccl_device_inline int bsdf_label(const KernelGlobals kg,
    case CLOSURE_BSDF_TRANSLUCENT_ID:
      label = LABEL_TRANSMIT | LABEL_DIFFUSE;
      break;
-    case CLOSURE_BSDF_REFLECTION_ID:
-      label = LABEL_REFLECT | LABEL_SINGULAR;
-      break;
-    case CLOSURE_BSDF_REFRACTION_ID:
-      label = LABEL_TRANSMIT | LABEL_SINGULAR;
-      break;
    case CLOSURE_BSDF_TRANSPARENT_ID:
      label = LABEL_TRANSMIT | LABEL_TRANSPARENT;
      break;
+    case CLOSURE_BSDF_REFLECTION_ID:
+    case CLOSURE_BSDF_REFRACTION_ID:
+    case CLOSURE_BSDF_SHARP_GLASS_ID:
    case CLOSURE_BSDF_MICROFACET_GGX_ID:
-    case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
    case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
-    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
-    case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: {
-      ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
-      label = (bsdf->alpha_x * bsdf->alpha_y <= 1e-7f) ? LABEL_REFLECT | LABEL_SINGULAR :
-                                                         LABEL_REFLECT | LABEL_GLOSSY;
-      break;
-    }
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
    case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
-    case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: {
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID:
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: {
      ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
-      label = (bsdf->alpha_x * bsdf->alpha_y <= 1e-7f) ? LABEL_TRANSMIT | LABEL_SINGULAR :
-                                                         LABEL_TRANSMIT | LABEL_GLOSSY;
+      label = ((bsdf_is_transmission(sc, wo)) ? LABEL_TRANSMIT : LABEL_REFLECT) |
+              ((bsdf->alpha_x * bsdf->alpha_y <= 1e-7f) ? LABEL_SINGULAR : LABEL_GLOSSY);
      break;
    }
-    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
-    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
-      label = (bsdf_is_transmission(sc, wo)) ? LABEL_TRANSMIT | LABEL_GLOSSY :
-                                               LABEL_REFLECT | LABEL_GLOSSY;
-      break;
    case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
      label = LABEL_REFLECT | LABEL_GLOSSY;
      break;
@@ -483,6 +502,7 @@ ccl_device_inline int bsdf_label(const KernelGlobals kg,
      label = LABEL_TRANSMIT | LABEL_GLOSSY;
      break;
    case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
+    case CLOSURE_BSDF_HAIR_MICROFACET_ID:
      if (bsdf_is_transmission(sc, wo))
        label = LABEL_TRANSMIT | LABEL_GLOSSY;
      else
@@ -546,31 +566,29 @@ ccl_device_inline
    case CLOSURE_BSDF_TRANSLUCENT_ID:
      eval = bsdf_translucent_eval(sc, sd->wi, wo, pdf);
      break;
-    case CLOSURE_BSDF_REFLECTION_ID:
-      eval = bsdf_reflection_eval(sc, sd->wi, wo, pdf);
-      break;
-    case CLOSURE_BSDF_REFRACTION_ID:
-      eval = bsdf_refraction_eval(sc, sd->wi, wo, pdf);
-      break;
    case CLOSURE_BSDF_TRANSPARENT_ID:
      eval = bsdf_transparent_eval(sc, sd->wi, wo, pdf);
      break;
+    case CLOSURE_BSDF_REFLECTION_ID:
+    case CLOSURE_BSDF_REFRACTION_ID:
+    case CLOSURE_BSDF_SHARP_GLASS_ID:
+      eval = bsdf_microfacet_sharp_eval(sc, sd->N, sd->wi, wo, pdf);
+      break;
    case CLOSURE_BSDF_MICROFACET_GGX_ID:
-    case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
    case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
    case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
      eval = bsdf_microfacet_ggx_eval(sc, sd->N, sd->wi, wo, pdf);
      break;
    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
-    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
      eval = bsdf_microfacet_multi_ggx_eval(sc, sd->N, sd->wi, wo, pdf, &sd->lcg_state);
      break;
    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
-    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
      eval = bsdf_microfacet_multi_ggx_glass_eval(sc, sd->wi, wo, pdf, &sd->lcg_state);
      break;
    case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
    case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID:
      eval = bsdf_microfacet_beckmann_eval(sc, sd->N, sd->wi, wo, pdf);
      break;
    case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
@@ -588,6 +606,9 @@ ccl_device_inline
    case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
      eval = bsdf_principled_hair_eval(kg, sd, sc, wo, pdf);
      break;
+    case CLOSURE_BSDF_HAIR_MICROFACET_ID:
+      eval = bsdf_microfacet_hair_eval(kg, sd, sc, wo, pdf);
+      break;
    case CLOSURE_BSDF_HAIR_REFLECTION_ID:
      eval = bsdf_hair_reflection_eval(sc, sd->wi, wo, pdf);
      break;
@@ -634,19 +655,18 @@ ccl_device void bsdf_blur(KernelGlobals kg, ccl_private ShaderClosure *sc, float
 #if defined(__SVM__) || defined(__OSL__)
  switch (sc->type) {
    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
-    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
-    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
      bsdf_microfacet_multi_ggx_blur(sc, roughness);
      break;
    case CLOSURE_BSDF_MICROFACET_GGX_ID:
-    case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
    case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
    case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
      bsdf_microfacet_ggx_blur(sc, roughness);
      break;
    case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
    case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID:
      bsdf_microfacet_beckmann_blur(sc, roughness);
      break;
    case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
@@ -655,6 +675,9 @@ ccl_device void bsdf_blur(KernelGlobals kg, ccl_private ShaderClosure *sc, float
    case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
      bsdf_principled_hair_blur(sc, roughness);
      break;
+    case CLOSURE_BSDF_HAIR_MICROFACET_ID:
+      bsdf_microfacet_hair_blur(sc, roughness);
+      break;
    default:
      break;
  }
@@ -675,21 +698,15 @@ ccl_device_inline Spectrum bsdf_albedo(ccl_private const ShaderData *sd,
   * TODO(lukas): Consider calling this function to determine the sample_weight? Would be a bit of
   * extra overhead though. */
 #if defined(__SVM__) || defined(__OSL__)
-  switch (sc->type) {
-    case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
-    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
-    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
-    case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
-      albedo *= microfacet_fresnel((ccl_private const MicrofacetBsdf *)sc, sd->wi, sc->N);
-      break;
-    case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
-      albedo *= ((ccl_private const PrincipledSheenBsdf *)sc)->avg_value;
-      break;
-    case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
-      albedo *= bsdf_principled_hair_albedo(sc);
-      break;
-    default:
-      break;
+  if (CLOSURE_IS_BSDF_MICROFACET(sc->type)) {
+    albedo *= microfacet_fresnel((ccl_private const MicrofacetBsdf *)sc, sd->wi, sc->N, false);
+  }
+  else if (sc->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_ID) {
+    albedo *= ((ccl_private const PrincipledSheenBsdf *)sc)->avg_value;
+  }
+  else if (sc->type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID ||
+           sc->type == CLOSURE_BSDF_HAIR_MICROFACET_ID) {
+    albedo *= bsdf_hair_albedo(sd, sc);
  }
 #endif
  return albedo;
--- a/intern/cycles/kernel/closure/bsdf_hair_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_hair_microfacet.h
@@ -0,0 +1,887 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2018-2022 Blender Foundation */
+
+/* This code implements the paper [A Microfacet-based Hair Scattering
+ * Model](https://onlinelibrary.wiley.com/doi/full/10.1111/cgf.14588) by Weizhen Huang, Matthias B.
+ * Hullin and Johannes Hanika. */
+
+#pragma once
+
+#ifndef __KERNEL_GPU__
+#  include <fenv.h>
+#endif
+
+#include "kernel/util/color.h"
+
+CCL_NAMESPACE_BEGIN
+
+typedef struct MicrofacetHairExtra {
+  /* TODO: is this necessary? */
+  float R;
+  float TT;
+  float TRT;
+
+  /* Geometry data. */
+  float4 geom;
+} MicrofacetHairExtra;
+
+typedef struct MicrofacetHairBSDF {
+  SHADER_CLOSURE_BASE;
+
+  /* Absorption coefficient. */
+  Spectrum sigma;
+  /* Microfacet distribution roughness. */
+  float roughness;
+  /* Cuticle tilt angle. */
+  float tilt;
+  /* IOR. */
+  float eta;
+
+  /* GGX/Beckmann. */
+  int distribution_type;
+
+  /* The ratio of the minor axis to the major axis. */
+  float aspect_ratio;
+
+  /* Extra closure. */
+  ccl_private MicrofacetHairExtra *extra;
+} MicrofacetHairBSDF;
+
+static_assert(sizeof(ShaderClosure) >= sizeof(MicrofacetHairBSDF),
+              "MicrofacetHairBSDF is too large!");
+static_assert(sizeof(ShaderClosure) >= sizeof(MicrofacetHairExtra),
+              "MicrofacetHairExtra is too large!");
+
+#ifdef __HAIR__
+/* Set up the hair closure. */
+ccl_device int bsdf_microfacet_hair_setup(ccl_private ShaderData *sd,
+                                          ccl_private MicrofacetHairBSDF *bsdf)
+{
+  bsdf->type = CLOSURE_BSDF_HAIR_MICROFACET_ID;
+
+  bsdf->roughness = clamp(bsdf->roughness, 0.001f, 1.0f);
+
+  /* Compute local frame. The Y axis is aligned with the curve tangent; the X axis is perpendicular
+   to the ray direction for circular cross-sections, or aligned with the major axis for elliptical
+   cross-sections. */
+  const float3 Y = safe_normalize(sd->dPdu);
+  const float3 X = safe_normalize(cross(Y, sd->wi));
+
+  /* h -1..0..1 means the rays goes from grazing the hair, to hitting it at the center, to grazing
+   * the other edge. This is the cosine of the angle between sd->N and X. */
+  const float h = (sd->type & PRIMITIVE_CURVE_RIBBON) ? -sd->v : -dot(X, sd->N);
+
+  kernel_assert(fabsf(h) < 1.0f + 1e-4f);
+  kernel_assert(isfinite_safe(X));
+  kernel_assert(isfinite_safe(h));
+
+  if (bsdf->aspect_ratio != 1.0f) {
+    if (bsdf->aspect_ratio > 1.0f) {
+      bsdf->aspect_ratio = 1.0f / bsdf->aspect_ratio;
+
+      /* Switch major and minor axis. */
+      const float3 minor_axis = safe_normalize(cross(
+          sd->dPdu, make_float3(bsdf->extra->geom.x, bsdf->extra->geom.y, bsdf->extra->geom.z)));
+      const float3 major_axis = safe_normalize(cross(minor_axis, sd->dPdu));
+
+      bsdf->extra->geom = make_float4(major_axis.x, major_axis.y, major_axis.z, h);
+    }
+    else {
+      bsdf->extra->geom.w = h;
+    }
+  }
+  else {
+    /* Align local frame with the ray direction so that `phi_i == 0`. */
+    bsdf->extra->geom = make_float4(X.x, X.y, X.z, h);
+  }
+
+  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG | SD_BSDF_HAS_TRANSMISSION;
+}
+
+#endif /* __HAIR__ */
+
+/* -------------------------------------------------------------------- */
+/** \name Hair coordinate system utils.
+ * \{ */
+
+/* Returns sin(theta) of the given direction. */
+ccl_device_inline float sin_theta(const float3 w)
+{
+  return w.y;
+}
+
+/* Returns cos(theta) of the given direction. */
+ccl_device_inline float cos_theta(const float3 w)
+{
+  return safe_sqrtf(sqr(w.x) + sqr(w.z));
+}
+
+/* Returns tan(theta) of the given direction. */
+ccl_device_inline float tan_theta(const float3 w)
+{
+  return sin_theta(w) / cos_theta(w);
+}
+
+/* Returns sin(phi) and cos(phi) of the given direction. */
+ccl_device float sin_phi(const float3 w)
+{
+  return w.x / cos_theta(w);
+}
+
+ccl_device float2 sincos_phi(const float3 w)
+{
+  float c = cos_theta(w);
+  return make_float2(w.x / c, w.z / c);
+}
+
+/* Extract the theta coordinate from the given direction.
+ * -pi < theta < pi */
+ccl_device_inline float dir_theta(const float3 w)
+{
+  return atan2f(sin_theta(w), cos_theta(w));
+}
+
+/* Extract the phi coordinate from the given direction, assuming phi(wi) = 0.
+ * -pi < phi < pi */
+ccl_device_inline float dir_phi(const float3 w)
+{
+  return atan2f(w.x, w.z);
+}
+
+/* Extract theta and phi coordinates from the given direction, assuming phi(wi) = 0.
+ * -pi/2 < theta < pi/2, -pi < phi < pi */
+ccl_device_inline float2 dir_sph(const float3 w)
+{
+  return make_float2(dir_theta(w), dir_phi(w));
+}
+
+/* Conversion between gamma and phi. Notations see Figure 5 in the paper. */
+ccl_device_inline float to_phi(float gamma, float b)
+{
+  if (b == 1.0f) {
+    return gamma;
+  }
+  float sin_gamma, cos_gamma;
+  fast_sincosf(gamma, &sin_gamma, &cos_gamma);
+  return atan2f(b * sin_gamma, cos_gamma);
+}
+
+ccl_device_inline float to_gamma(float phi, float b)
+{
+  if (b == 1.0f) {
+    return phi;
+  }
+  float sin_phi, cos_phi;
+  fast_sincosf(phi, &sin_phi, &cos_phi);
+  return atan2f(sin_phi, b * cos_phi);
+}
+
+/* Compute the coordinate on the ellipse, given gamma and the aspect ratio between the minor axis
+ * and the major axis. */
+ccl_device_inline float2 to_point(float gamma, float b)
+{
+  float sin_gamma, cos_gamma;
+  fast_sincosf(gamma, &sin_gamma, &cos_gamma);
+  return make_float2(sin_gamma, b * cos_gamma);
+}
+
+/* Compute the vector direction given by theta and gamma. */
+ccl_device_inline float3 sphg_dir(float theta, float gamma, float b)
+{
+  float sin_theta, cos_theta, sin_gamma, cos_gamma, sin_phi, cos_phi;
+
+  fast_sincosf(theta, &sin_theta, &cos_theta);
+  fast_sincosf(gamma, &sin_gamma, &cos_gamma);
+
+  if (b == 1.0f) {
+    sin_phi = sin_gamma;
+    cos_phi = cos_gamma;
+  }
+  else {
+    float tan_gamma = sin_gamma / cos_gamma;
+    float tan_phi = b * tan_gamma;
+    cos_phi = signf(cos_gamma) / sqrtf(sqr(tan_phi) + 1.0f);
+    sin_phi = cos_phi * tan_phi;
+  }
+  return make_float3(sin_phi * cos_theta, sin_theta, cos_phi * cos_theta);
+}
+
+ccl_device_inline float arc_length(float e2, float gamma)
+{
+  return e2 == 0 ? 1.0f : sqrtf(1.0f - e2 * sqr(sinf(gamma)));
+}
+
+ccl_device_inline float projected_radius(float e2, float phi)
+{
+  return e2 == 0 ? 1.0f : sqrtf(1.0f - e2 * sqr(sinf(phi)));
+}
+
+/** \} */
+
+/* Sample microfacets from a tilted mesonormal. */
+template<MicrofacetType m_type>
+ccl_device_inline float3 sample_wh(
+    KernelGlobals kg, const float roughness, const float3 wi, const float3 wm, const float2 rand)
+{
+  /* Coordinate transformation for microfacet sampling. */
+  float3 s, t;
+  const float3 n = wm;
+  make_orthonormals(n, &s, &t);
+
+  const float3 wi_wm = make_float3(dot(wi, s), dot(wi, t), dot(wi, n));
+
+  const float3 wh_wm =
+      (m_type == MicrofacetType::GGX) ?
+          microfacet_ggx_sample_vndf(wi_wm, roughness, roughness, rand.x, rand.y) :
+          microfacet_beckmann_sample_vndf(wi_wm, roughness, roughness, rand.x, rand.y);
+
+  const float3 wh = wh_wm.x * s + wh_wm.y * t + wh_wm.z * n;
+  return wh;
+}
+
+/* Check micronormal/mesonormal direct visiblity from direction v. */
+ccl_device_inline bool microfacet_visible(const float3 v, const float3 m, const float3 h)
+{
+  return (dot(v, h) > 0.0f && dot(v, m) > 0.0f);
+}
+
+/* Check micronormal/mesonormal direct visiblity from directinos wi and wo. */
+ccl_device_inline bool microfacet_visible(const float3 wi,
+                                          const float3 wo,
+                                          const float3 m,
+                                          const float3 h)
+{
+  return microfacet_visible(wi, m, h) && microfacet_visible(wo, m, h);
+}
+
+/* Compute fresnel reflection. Also return the dot product of the refracted ray and the normal as
+ * `cos_theta_t`, as it is used when computing the direction of the refracted ray. */
+ccl_device float fresnel(float cos_theta_i, float eta, ccl_private float *cos_theta_t)
+{
+  kernel_assert(!isnan_safe(cos_theta_i));
+
+  /* Special cases. */
+  if (eta == 1.0f) {
+    return 0.0f;
+  }
+  if (cos_theta_i == 0.0f) {
+    return 1.0f;
+  }
+
+  cos_theta_i = fabsf(cos_theta_i);
+
+  /* Using Snell's law, calculate the squared cosine of the angle between the surface normal and
+   * the transmitted ray. */
+  const float cos_theta_t_sqr = 1.0f - (1.0f - cos_theta_i * cos_theta_i) / (eta * eta);
+  *cos_theta_t = safe_sqrtf(cos_theta_t_sqr);
+
+  if (cos_theta_t_sqr <= 0) {
+    /* Total internal reflection. */
+    return 1.0f;
+  }
+
+  /* Amplitudes of reflected waves. */
+  const float a_s = (cos_theta_i - eta * (*cos_theta_t)) / (cos_theta_i + eta * (*cos_theta_t));
+  const float a_p = (*cos_theta_t - eta * cos_theta_i) / (*cos_theta_t + eta * cos_theta_i);
+
+  /* Adjust the sign of the transmitted direction to be relative to the surface normal. */
+  *cos_theta_t = -(*cos_theta_t);
+
+  return 0.5f * (sqr(a_s) + sqr(a_p));
+}
+
+/* Refract the incident ray, given the cosine of the refraction angle and the inverse IOR. */
+ccl_device_inline float3 refract_angle(const float3 incident,
+                                       const float3 normal,
+                                       const float cos_theta_t,
+                                       const float inv_eta)
+{
+  return inv_eta * incident - (inv_eta * dot(normal, incident) + cos_theta_t) * normal;
+}
+
+template<MicrofacetType m_type>
+ccl_device float3 bsdf_microfacet_hair_eval_r(ccl_private const ShaderClosure *sc,
+                                              const float3 wi,
+                                              const float3 wo)
+{
+  ccl_private MicrofacetHairBSDF *bsdf = (ccl_private MicrofacetHairBSDF *)sc;
+  const float tilt = -bsdf->tilt;
+  const float roughness = bsdf->roughness;
+  const float roughness2 = sqr(roughness);
+  const float eta = bsdf->eta;
+
+  if (bsdf->extra->R <= 0.0f) {
+    return zero_float3();
+  }
+
+  /* Get elliptical cross section characteristic. Assuming major axis is 1. */
+  const float b = bsdf->aspect_ratio;
+  const float e2 = 1.0f - sqr(b); /* Squared Eccentricity. */
+  const bool is_circular = (b == 1.0f);
+
+  const float phi_i = is_circular ? 0.0f : dir_phi(wi);
+  const float phi_o = dir_phi(wo);
+  const float3 wh = normalize(wi + wo);
+
+  /* dot(wi, wmi) > 0 */
+  const float tan_tilt = tanf(tilt);
+  float phi_m_max1 = acosf(fmaxf(-tan_tilt * tan_theta(wi), 0.0f)) + phi_i;
+  if (isnan_safe(phi_m_max1)) {
+    return zero_float3();
+  }
+  float phi_m_min1 = -phi_m_max1 + 2.0f * phi_i;
+
+  /* dot(wo, wmi) > 0 */
+  float phi_m_max2 = acosf(fmaxf(-tan_tilt * tan_theta(wo), 0.0f)) + phi_o;
+  if (isnan_safe(phi_m_max2)) {
+    return zero_float3();
+  }
+  float phi_m_min2 = -phi_m_max2 + 2.0f * phi_o;
+
+  if (!is_circular) {
+    /* Try to wrap range. */
+    if ((phi_m_max2 - phi_m_min1) > M_2PI_F) {
+      phi_m_min2 -= M_2PI_F;
+      phi_m_max2 -= M_2PI_F;
+    }
+    if ((phi_m_max1 - phi_m_min2) > M_2PI_F) {
+      phi_m_min1 -= M_2PI_F;
+      phi_m_max1 -= M_2PI_F;
+    }
+  }
+
+  const float phi_m_min = fmaxf(phi_m_min1, phi_m_min2) + 1e-3f;
+  const float phi_m_max = fminf(phi_m_max1, phi_m_max2) - 1e-3f;
+  if (phi_m_min > phi_m_max) {
+    return zero_float3();
+  }
+
+  const float gamma_m_min = to_gamma(phi_m_min, b);
+  float gamma_m_max = to_gamma(phi_m_max, b);
+  if (gamma_m_max < gamma_m_min) {
+    gamma_m_max += M_2PI_F;
+  }
+
+  /* Maximal sample resolution. */
+  float res = roughness * 0.7f;
+  /* Number of intervals should be even. */
+  const size_t intervals = 2 * (size_t)ceilf((gamma_m_max - gamma_m_min) / res * 0.5f);
+
+  /* Modified resolution based on numbers of intervals. */
+  res = (gamma_m_max - gamma_m_min) / float(intervals);
+
+  /* Integrate using Composite Simpson's 1/3 rule. */
+  float integral = 0.0f;
+  for (size_t i = 0; i <= intervals; i++) {
+    const float gamma_m = gamma_m_min + i * res;
+    const float3 wm = sphg_dir(tilt, gamma_m, b);
+
+    if (microfacet_visible(wi, wo, make_float3(wm.x, 0.0f, wm.z), wh)) {
+      const float weight = (i == 0 || i == intervals) ? 0.5f : (i % 2 + 1);
+      /* NOTE: using separable masking and shadowing as one factor cancels out in `sample()`. */
+      const float G = bsdf_G<m_type>(roughness2, dot(wm, wi)) *
+                      bsdf_G<m_type>(roughness2, dot(wm, wo));
+      integral += weight * bsdf_D<m_type>(roughness2, dot(wm, wh)) * G * arc_length(e2, gamma_m);
+    }
+  }
+
+  integral *= (2.0f / 3.0f * res);
+
+  const float F = fresnel_dielectric_cos(dot(wi, wh), eta);
+
+  return make_float3(bsdf->extra->R * 0.125f * F * integral / projected_radius(e2, phi_i));
+}
+
+template<MicrofacetType m_type>
+ccl_device float3 bsdf_microfacet_hair_eval_tt_trt(KernelGlobals kg,
+                                                   ccl_private const ShaderClosure *sc,
+                                                   const float3 wi,
+                                                   const float3 wo,
+                                                   uint rng_quadrature)
+{
+  ccl_private MicrofacetHairBSDF *bsdf = (ccl_private MicrofacetHairBSDF *)sc;
+  const float tilt = -bsdf->tilt;
+  const float roughness = bsdf->roughness;
+  const float roughness2 = sqr(roughness);
+  const float eta = bsdf->eta;
+
+  if (bsdf->extra->TT <= 0.0f && bsdf->extra->TRT <= 0.0f) {
+    return zero_float3();
+  }
+
+  /* Get elliptical cross section characteristic. Assuming major axis is 1. */
+  const float b = bsdf->aspect_ratio;
+  const float e2 = 1.0f - sqr(b); /* Squared Eccentricity. */
+  const bool is_circular = (b == 1.0f);
+
+  const float phi_i = is_circular ? 0.0f : dir_phi(wi);
+
+  const float tan_tilt = tanf(tilt);
+  const float phi_m_max = acosf(fmaxf(-tan_tilt * tan_theta(wi), 0.0f)) + phi_i;
+  if (isnan_safe(phi_m_max)) {
+    /* Early detection of dot(wi, wmi) < 0. */
+    return zero_float3();
+  }
+  const float phi_m_min = -phi_m_max + 2.0f * phi_i;
+
+  if (tan_tilt * tan_theta(wo) < -1.0f) {
+    /* Early detection of dot(wo, wmo) < 0. */
+    return zero_float3();
+  }
+
+  const float3 mu_a = bsdf->sigma;
+  const float inv_eta = 1.0f / eta;
+
+  const float gamma_m_min = to_gamma(phi_m_min, b) + 1e-3f;
+  float gamma_m_max = to_gamma(phi_m_max, b) - 1e-3f;
+  if (gamma_m_max < gamma_m_min) {
+    gamma_m_max += M_2PI_F;
+  }
+
+  float res = roughness * 0.8f;
+  const size_t intervals = 2 * (size_t)ceilf((gamma_m_max - gamma_m_min) / res * 0.5f);
+  res = (gamma_m_max - gamma_m_min) / intervals;
+
+  float3 S_tt = zero_float3();
+  float3 S_trt = zero_float3();
+  for (size_t i = 0; i <= intervals; i++) {
+
+    const float gamma_mi = gamma_m_min + i * res;
+
+    const float3 wmi = sphg_dir(tilt, gamma_mi, b);
+    const float3 wmi_ = sphg_dir(0.0f, gamma_mi, b);
+
+    /* Sample wh1. */
+    const float2 sample1 = make_float2(lcg_step_float(&rng_quadrature),
+                                       lcg_step_float(&rng_quadrature));
+
+    const float3 wh1 = sample_wh<m_type>(kg, roughness, wi, wmi, sample1);
+    const float cos_hi1 = dot(wi, wh1);
+    if (!(cos_hi1 > 0)) {
+      continue;
+    }
+
+    float cos_theta_t1;
+    const float T1 = 1.0f - fresnel(cos_hi1, eta, &cos_theta_t1);
+
+    /* Refraction at the first interface. */
+    const float3 wt = -refract_angle(wi, wh1, cos_theta_t1, inv_eta);
+    const float phi_t = dir_phi(wt);
+    const float gamma_mt = 2.0f * to_phi(phi_t, b) - gamma_mi;
+    const float3 wmt = sphg_dir(-tilt, gamma_mt, b);
+    const float3 wmt_ = sphg_dir(0.0f, gamma_mt, b);
+
+    const float cos_mi1 = dot(wi, wmi);
+    const float cos_mo1 = dot(-wt, wmi);
+    const float cos_mi2 = dot(-wt, wmt);
+    const float G1o = bsdf_G<m_type>(roughness2, cos_mo1);
+    if (!microfacet_visible(wi, -wt, wmi, wh1) || !microfacet_visible(wi, -wt, wmi_, wh1)) {
+      continue;
+    }
+
+    const float weight = (i == 0 || i == intervals) ? 0.5f : (i % 2 + 1);
+
+    const float3 A_t = exp(mu_a / cos_theta(wt) *
+                           (is_circular ?
+                                2.0f * cosf(gamma_mi - phi_t) :
+                                -len(to_point(gamma_mi, b) - to_point(gamma_mt + M_PI_F, b))));
+
+    /* TT */
+    if (bsdf->extra->TT > 0.0f) {
+      if (dot(wo, wt) >= inv_eta - 1e-5f) { /* Total internal reflection otherwise. */
+        float3 wh2 = -wt + inv_eta * wo;
+        const float rcp_norm_wh2 = 1.0f / len(wh2);
+        wh2 *= rcp_norm_wh2;
+        const float cos_mh2 = dot(wmt, wh2);
+        if (cos_mh2 >= 0.0f) { /* Microfacet visiblity from macronormal. */
+          const float cos_hi2 = dot(-wt, wh2);
+          const float cos_ho2 = dot(-wo, wh2);
+          const float cos_mo2 = dot(-wo, wmt);
+
+          const float T2 = 1.0f - fresnel_dielectric_cos(cos_hi2, inv_eta);
+          const float D2 = bsdf_D<m_type>(roughness2, cos_mh2);
+          const float G2 = bsdf_G<m_type>(roughness2, cos_mi2) *
+                           bsdf_G<m_type>(roughness2, cos_mo2);
+
+          const float3 result = weight * T1 * T2 * D2 * G1o * G2 * A_t / cos_mo1 * cos_mi1 *
+                                cos_hi2 * cos_ho2 * sqr(rcp_norm_wh2);
+
+          if (isfinite_safe(result)) {
+            S_tt += bsdf->extra->TT * result * arc_length(e2, gamma_mt);
+          }
+        }
+      }
+    }
+
+    /* TRT */
+    if (bsdf->extra->TRT > 0.0f) {
+      /* Sample wh2. */
+      const float2 sample2 = make_float2(lcg_step_float(&rng_quadrature),
+                                         lcg_step_float(&rng_quadrature));
+      const float3 wh2 = sample_wh<m_type>(kg, roughness, -wt, wmt, sample2);
+      const float cos_hi2 = dot(-wt, wh2);
+      if (!(cos_hi2 > 0)) {
+        continue;
+      }
+      const float R2 = fresnel_dielectric_cos(cos_hi2, inv_eta);
+
+      const float3 wtr = -reflect(wt, wh2);
+      if (dot(-wtr, wo) < inv_eta - 1e-5f) {
+        /* Total internal reflection. */
+        continue;
+      }
+
+      if (!microfacet_visible(-wt, -wtr, wmt, wh2) || !microfacet_visible(-wt, -wtr, wmt_, wh2)) {
+        continue;
+      }
+
+      const float phi_tr = dir_phi(wtr);
+      const float gamma_mtr = gamma_mi - 2.0f * (to_phi(phi_t, b) - to_phi(phi_tr, b)) + M_PI_F;
+      const float3 wmtr = sphg_dir(-tilt, gamma_mtr, b);
+      const float3 wmtr_ = sphg_dir(0.0f, gamma_mtr, b);
+
+      float3 wh3 = wtr + inv_eta * wo;
+      const float rcp_norm_wh3 = 1.0f / len(wh3);
+      wh3 *= rcp_norm_wh3;
+      const float cos_mh3 = dot(wmtr, wh3);
+      if (cos_mh3 < 0.0f || !microfacet_visible(wtr, -wo, wmtr, wh3) ||
+          !microfacet_visible(wtr, -wo, wmtr_, wh3)) {
+        continue;
+      }
+
+      const float cos_hi3 = dot(wh3, wtr);
+      const float cos_ho3 = dot(wh3, -wo);
+
+      const float T3 = 1.0f - fresnel_dielectric_cos(cos_hi3, inv_eta);
+      const float D3 = bsdf_D<m_type>(roughness2, cos_mh3);
+
+      const float3 A_tr = exp(mu_a / cos_theta(wtr) *
+                              (is_circular ?
+                                   2.0f * cosf(phi_tr - gamma_mt) :
+                                   -len(to_point(gamma_mtr, b) - to_point(gamma_mt, b))));
+
+      const float cos_mo2 = dot(wmt, -wtr);
+      const float G2o = bsdf_G<m_type>(roughness2, cos_mo2);
+      const float G3 = bsdf_G<m_type>(roughness2, dot(wmtr, wtr)) *
+                       bsdf_G<m_type>(roughness2, dot(wmtr, -wo));
+
+      const float3 result = weight * T1 * R2 * T3 * D3 * G1o * G2o * G3 * A_t * A_tr /
+                            (cos_mo1 * cos_mo2) * cos_mi1 * cos_mi2 * cos_hi3 * cos_ho3 *
+                            sqr(rcp_norm_wh3);
+
+      if (isfinite_safe(result)) {
+        S_trt += bsdf->extra->TRT * result * arc_length(e2, gamma_mtr);
+      }
+    }
+  }
+
+  return (S_tt + S_trt) / 3.0f * res * sqr(inv_eta) * projected_radius(e2, phi_i);
+}
+
+template<MicrofacetType m_type>
+ccl_device int bsdf_microfacet_hair_sample(const KernelGlobals kg,
+                                           ccl_private const ShaderClosure *sc,
+                                           ccl_private ShaderData *sd,
+                                           float3 rand,
+                                           ccl_private Spectrum *eval,
+                                           ccl_private float3 *wo,
+                                           ccl_private float *pdf,
+                                           ccl_private float2 *sampled_roughness,
+                                           ccl_private float *eta)
+{
+  ccl_private MicrofacetHairBSDF *bsdf = (ccl_private MicrofacetHairBSDF *)sc;
+
+  *sampled_roughness = make_float2(bsdf->roughness, bsdf->roughness);
+  *eta = bsdf->eta;
+  const float inv_eta = 1.0f / *eta;
+
+  if (bsdf->extra->R <= 0.0f && bsdf->extra->TT <= 0.0f && bsdf->extra->TRT <= 0.0f) {
+    /* Early out for inactive lobe. */
+    *pdf = 0.0f;
+    return LABEL_NONE;
+  }
+
+  /* Get local coordinate system:
+   * . X major axis.
+   * . Y along the fiber tangent.
+   * . Z minor axis. */
+  const float3 X = float4_to_float3(bsdf->extra->geom);
+  const float3 Z = safe_normalize(cross(X, sd->dPdu));
+  const float3 Y = safe_normalize(cross(Z, X));
+
+  /* Transform wi from global coordinate system to local. */
+  const float3 wi = make_float3(dot(sd->wi, X), dot(sd->wi, Y), dot(sd->wi, Z));
+
+  /* Get elliptical cross section characteristic. Assuming major axis is 1. */
+  const float b = bsdf->aspect_ratio;
+  const float e2 = 1.0f - sqr(b); /* Squared Eccentricity. */
+  const bool is_circular = (b == 1.0f);
+
+  /* Macronormal. */
+  const float2 sincos_phi_i = sincos_phi(wi);
+  const float sin_phi_i = sincos_phi_i.x;
+  const float cos_phi_i = sincos_phi_i.y;
+  const float d_i = projected_radius(e2, sin_phi_i);
+
+  /* Treat as transparent material if intersection lies outside of the projected radius. */
+  if (fabsf(bsdf->extra->geom.w) > d_i) {
+    *wo = -sd->wi;
+    *pdf = 1;
+    *eval = one_spectrum();
+    return LABEL_TRANSMIT | LABEL_TRANSPARENT;
+  }
+
+  const float tilt = -bsdf->tilt;
+  const float roughness = bsdf->roughness;
+  const float roughness2 = sqr(roughness);
+
+  /* Generate samples. */
+  float sample_lobe = rand.x;
+  const float sample_h = rand.y;
+  const float2 sample_h1 = make_float2(rand.z, lcg_step_float(&sd->lcg_state));
+  const float2 sample_h2 = make_float2(lcg_step_float(&sd->lcg_state),
+                                       lcg_step_float(&sd->lcg_state));
+  const float2 sample_h3 = make_float2(lcg_step_float(&sd->lcg_state),
+                                       lcg_step_float(&sd->lcg_state));
+
+  const float h = sample_h * 2.0f - 1.0f;
+  const float gamma_mi = is_circular ?
+                             asinf(h) :
+                             atan2f(cos_phi_i, -b * sin_phi_i) -
+                                 acosf(h * d_i / sqrtf(sqr(cos_phi_i) + sqr(b * sin_phi_i)));
+
+  const float3 wmi_ = sphg_dir(0, gamma_mi, b); /* Macronormal. */
+
+  /* Mesonormal. */
+  float st, ct;
+  fast_sincosf(tilt, &st, &ct);
+  const float3 wmi = make_float3(wmi_.x * ct, st, wmi_.z * ct);
+
+  if (dot(wmi, wi) < 0.0f || dot(wmi_, wi) < 0.0f) {
+    /* Macro/mesonormal invisible. */
+    *pdf = 0.0f;
+    return LABEL_NONE;
+  }
+
+  /* Sample R lobe. */
+  const float3 wh1 = sample_wh<m_type>(kg, roughness, wi, wmi, sample_h1);
+  const float3 wr = -reflect(wi, wh1);
+
+  /* Ensure that this is a valid sample. */
+  if (!(dot(wr, wh1) > 0.0f) || !(dot(wr, wmi) > 0.0f) || !microfacet_visible(wi, wr, wmi_, wh1)) {
+    *pdf = 0.0f;
+    return LABEL_NONE;
+  }
+
+  float cos_theta_t1;
+  const float R1 = fresnel(dot(wi, wh1), *eta, &cos_theta_t1);
+  const float3 R = make_float3(bsdf->extra->R * R1);
+
+  /* Sample TT lobe. */
+  const float3 wt = -refract_angle(wi, wh1, cos_theta_t1, inv_eta);
+  const float phi_t = dir_phi(wt);
+
+  const float gamma_mt = 2.0f * to_phi(phi_t, b) - gamma_mi;
+  const float3 wmt = sphg_dir(-tilt, gamma_mt, b);
+  const float3 wmt_ = sphg_dir(0.0f, gamma_mt, b);
+
+  const float3 wh2 = sample_wh<m_type>(kg, roughness, -wt, wmt, sample_h2);
+
+  const float3 wtr = -reflect(wt, wh2);
+
+  float3 wh3;
+  float3 wtt, wtrt;
+  float3 wmtr, wmtr_;
+  float3 TT = zero_float3();
+  float3 TRT = zero_float3();
+
+  if (dot(wh2, -wt) > 0.0f && dot(wmt, -wt) > 0.0f && microfacet_visible(-wt, -wtr, wmt_, wh2)) {
+    const float3 mu_a = bsdf->sigma;
+    const float3 A_t = exp(mu_a / cos_theta(wt) *
+                           (is_circular ?
+                                2.0f * cosf(phi_t - gamma_mi) :
+                                -len(to_point(gamma_mi, b) - to_point(gamma_mt + M_PI_F, b))));
+
+    float cos_theta_t2;
+    const float R2 = fresnel(dot(-wt, wh2), inv_eta, &cos_theta_t2);
+    const float3 T1 = make_float3(1.0f - R1);
+    const float3 T2 = make_float3(1.0f - R2);
+
+    wtt = -refract_angle(-wt, wh2, cos_theta_t2, *eta);
+
+    if (dot(wmt, -wtt) > 0.0f && cos_theta_t2 != 0.0f) {
+      TT = bsdf->extra->TT * T1 * A_t * T2;
+    }
+
+    /* Sample TRT lobe. */
+    const float phi_tr = dir_phi(wtr);
+    const float gamma_mtr = gamma_mi - 2.0f * (to_phi(phi_t, b) - to_phi(phi_tr, b)) + M_PI_F;
+    wmtr = sphg_dir(-tilt, gamma_mtr, b);
+
+    wh3 = sample_wh<m_type>(kg, roughness, wtr, wmtr, sample_h3);
+
+    float cos_theta_t3;
+    const float R3 = fresnel(dot(wtr, wh3), inv_eta, &cos_theta_t3);
+
+    wtrt = -refract_angle(wtr, wh3, cos_theta_t3, *eta);
+
+    if (cos_theta_t3 != 0.0f && dot(wtr, wh3) > 0.0f && dot(wmtr, wtr) > 0.0f &&
+        dot(wmtr, -wtrt) > 0.0f &&
+        microfacet_visible(wtr, -wtrt, make_float3(wmtr.x, 0.0f, wmtr.z), wh3)) {
+      const float3 T3 = make_float3(1.0f - R3);
+
+      const float3 A_tr = exp(mu_a / cos_theta(wtr) *
+                              (is_circular ?
+                                   2.0f * cos(phi_tr - gamma_mt) :
+                                   -len(to_point(gamma_mt, b) - to_point(gamma_mtr, b))));
+
+      TRT = bsdf->extra->TRT * T1 * R2 * T3 * A_t * A_tr;
+    }
+  }
+
+  /* Select lobe based on energy. */
+  const float r = average(R);
+  const float tt = average(TT);
+  const float trt = average(TRT);
+  const float total_energy = r + tt + trt;
+
+  if (total_energy == 0.0f) {
+    *pdf = 0.0f;
+    return LABEL_NONE;
+  }
+
+  float3 local_O;
+  float visibility = 0.0f;
+  int label = LABEL_GLOSSY;
+
+  sample_lobe *= total_energy;
+  if (sample_lobe < r) {
+    local_O = wr;
+    *eval = rgb_to_spectrum(R / r * total_energy);
+
+    if (microfacet_visible(wi, wr, wmi_, wh1)) {
+      visibility = bsdf_G<m_type>(roughness2, dot(wmi, wr));
+    }
+
+    label |= LABEL_REFLECT;
+  }
+  else if (sample_lobe < (r + tt)) {
+    local_O = wtt;
+    *eval = rgb_to_spectrum(TT / tt * total_energy);
+
+    if (microfacet_visible(wi, -wt, wmi_, wh1) && microfacet_visible(-wt, -wtt, wmt_, wh2)) {
+      visibility = bsdf_G<m_type>(roughness2, dot(wmi, -wt)) *
+                   bsdf_G<m_type>(roughness2, dot(wmt, -wtt));
+    }
+
+    label |= LABEL_TRANSMIT;
+  }
+  else { /* if (sample_lobe >= (r + tt)) */
+    local_O = wtrt;
+    *eval = rgb_to_spectrum(TRT / trt * total_energy);
+
+    if (microfacet_visible(wi, -wt, wmi_, wh1)) {
+      visibility = bsdf_G<m_type>(roughness2, dot(wmi, -wt)) *
+                   bsdf_G<m_type>(roughness2, dot(wmt, -wtr)) *
+                   bsdf_G<m_type>(roughness2, dot(wmtr, -wtrt));
+    }
+
+    label |= LABEL_TRANSMIT;
+  }
+
+  *eval *= visibility;
+  *wo = local_O.x * X + local_O.y * Y + local_O.z * Z;
+
+  /* Ensure the same pdf is returned for BSDF and emitter sampling. The importance sampling pdf is
+   * already factored in the value so this value is only used for MIS. */
+  *pdf = 1.0f;
+
+  return label;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// main sample and eval functions selecting model
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ccl_device Spectrum bsdf_microfacet_hair_eval(KernelGlobals kg,
+                                              ccl_private const ShaderData *sd,
+                                              ccl_private const ShaderClosure *sc,
+                                              const float3 wo,
+                                              ccl_private float *pdf)
+{
+  ccl_private MicrofacetHairBSDF *bsdf = (ccl_private MicrofacetHairBSDF *)sc;
+
+  /* Get local coordinate system:
+   * . X major axis.
+   * . Y along the fiber tangent.
+   * . Z minor axis. */
+  const float3 X = float4_to_float3(bsdf->extra->geom);
+  const float3 Z = safe_normalize(cross(X, sd->dPdu));
+  const float3 Y = safe_normalize(cross(Z, X));
+
+  /* Transform wi/wo from global coordinate system to local. */
+  const float3 local_I = make_float3(dot(sd->wi, X), dot(sd->wi, Y), dot(sd->wi, Z));
+  const float3 local_O = make_float3(dot(wo, X), dot(wo, Y), dot(wo, Z));
+
+  /* Treat as transparent material if intersection lies outside of the projected radius. */
+  const float e2 = 1.0f - sqr(bsdf->aspect_ratio);
+  if (fabsf(bsdf->extra->geom.w) > projected_radius(e2, dir_phi(local_I))) {
+    *pdf = 0.0f;
+    return zero_spectrum();
+  }
+
+  /* Evaluate. */
+  float3 R;
+  if (bsdf->distribution_type == NODE_MICROFACET_HAIR_BECKMANN) {
+    R = bsdf_microfacet_hair_eval_r<MicrofacetType::BECKMANN>(sc, local_I, local_O) +
+        bsdf_microfacet_hair_eval_tt_trt<MicrofacetType::BECKMANN>(
+            kg, sc, local_I, local_O, sd->lcg_state);
+  }
+  else {
+    R = bsdf_microfacet_hair_eval_r<MicrofacetType::GGX>(sc, local_I, local_O) +
+        bsdf_microfacet_hair_eval_tt_trt<MicrofacetType::GGX>(
+            kg, sc, local_I, local_O, sd->lcg_state);
+  }
+
+  /* TODO: better estimation of the pdf */
+  *pdf = 1.0f;
+
+  return rgb_to_spectrum(R / cos_theta(local_I));
+}
+
+ccl_device int bsdf_microfacet_hair_sample(KernelGlobals kg,
+                                           ccl_private const ShaderClosure *sc,
+                                           ccl_private ShaderData *sd,
+                                           float3 rand,
+                                           ccl_private Spectrum *eval,
+                                           ccl_private float3 *wo,
+                                           ccl_private float *pdf,
+                                           ccl_private float2 *sampled_roughness,
+                                           ccl_private float *eta)
+{
+  ccl_private MicrofacetHairBSDF *bsdf = (ccl_private MicrofacetHairBSDF *)sc;
+
+  if (bsdf->distribution_type == NODE_MICROFACET_HAIR_BECKMANN) {
+    return bsdf_microfacet_hair_sample<MicrofacetType::BECKMANN>(
+        kg, sc, sd, rand, eval, wo, pdf, sampled_roughness, eta);
+  }
+  return bsdf_microfacet_hair_sample<MicrofacetType::GGX>(
+      kg, sc, sd, rand, eval, wo, pdf, sampled_roughness, eta);
+}
+
+/* Implements Filter Glossy by capping the effective roughness. */
+ccl_device void bsdf_microfacet_hair_blur(ccl_private ShaderClosure *sc, float roughness)
+{
+  ccl_private MicrofacetHairBSDF *bsdf = (ccl_private MicrofacetHairBSDF *)sc;
+
+  bsdf->roughness = fmaxf(roughness, bsdf->roughness);
+}
+
+/* Hair Albedo. */
+ccl_device float3 bsdf_microfacet_hair_albedo(ccl_private const ShaderClosure *sc)
+{
+  ccl_private MicrofacetHairBSDF *bsdf = (ccl_private MicrofacetHairBSDF *)sc;
+  return exp(-sqrt(bsdf->sigma) * bsdf_hair_albedo_roughness_scale(bsdf->roughness));
+}
+
+CCL_NAMESPACE_END
--- a/intern/cycles/kernel/closure/bsdf_hair_principled.h
+++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h
@@ -338,6 +338,7 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,
                                           ccl_private ShaderData *sd,
                                           float randu,
                                           float randv,
+                                           float randw,
                                           ccl_private Spectrum *eval,
                                           ccl_private float3 *wo,
                                           ccl_private float *pdf,
@@ -357,11 +358,6 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,

  const float3 local_O = make_float3(dot(sd->wi, X), dot(sd->wi, Y), dot(sd->wi, Z));

-  float2 u[2];
-  u[0] = make_float2(randu, randv);
-  u[1].x = lcg_step_float(&sd->lcg_state);
-  u[1].y = lcg_step_float(&sd->lcg_state);
-
  const float sin_theta_o = local_O.x;
  const float cos_theta_o = cos_from_sin(sin_theta_o);
  const float phi_o = atan2f(local_O.z, local_O.y);
@@ -385,11 +381,12 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,

  int p = 0;
  for (; p < 3; p++) {
-    if (u[0].x < Ap_energy[p]) {
+    if (randw < Ap_energy[p]) {
      break;
    }
-    u[0].x -= Ap_energy[p];
+    randw -= Ap_energy[p];
  }
+  randw /= Ap_energy[p];

  float v = bsdf->v;
  if (p == 1) {
@@ -399,10 +396,9 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,
    v *= 4.0f;
  }

-  u[1].x = max(u[1].x, 1e-5f);
-  const float fac = 1.0f + v * logf(u[1].x + (1.0f - u[1].x) * expf(-2.0f / v));
-  float sin_theta_i = -fac * sin_theta_o +
-                      cos_from_sin(fac) * cosf(M_2PI_F * u[1].y) * cos_theta_o;
+  randw = max(randw, 1e-5f);
+  const float fac = 1.0f + v * logf(randw + (1.0f - randw) * expf(-2.0f / v));
+  float sin_theta_i = -fac * sin_theta_o + cos_from_sin(fac) * cosf(M_2PI_F * randv) * cos_theta_o;
  float cos_theta_i = cos_from_sin(sin_theta_i);

  float angles[6];
@@ -414,10 +410,10 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,

  float phi;
  if (p < 3) {
-    phi = delta_phi(p, gamma_o, gamma_t) + sample_trimmed_logistic(u[0].y, bsdf->s);
+    phi = delta_phi(p, gamma_o, gamma_t) + sample_trimmed_logistic(randu, bsdf->s);
  }
  else {
-    phi = M_2PI_F * u[0].y;
+    phi = M_2PI_F * randu;
  }
  const float phi_i = phi_o + phi;

@@ -469,31 +465,36 @@ ccl_device void bsdf_principled_hair_blur(ccl_private ShaderClosure *sc, float r
  bsdf->m0_roughness = fmaxf(roughness, bsdf->m0_roughness);
 }

-/* Hair Albedo */
-
-ccl_device_inline float bsdf_principled_hair_albedo_roughness_scale(
-    const float azimuthal_roughness)
+/* Hair Albedo. Also used by `bsdf_hair_microfacet.h` */
+ccl_device_inline float bsdf_hair_albedo_roughness_scale(const float azimuthal_roughness)
 {
  const float x = azimuthal_roughness;
  return (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x + 5.969f;
 }

-ccl_device Spectrum bsdf_principled_hair_albedo(ccl_private const ShaderClosure *sc)
+ccl_device Spectrum bsdf_hair_albedo(ccl_private const ShaderData *sd,
+                                     ccl_private const ShaderClosure *sc)
 {
  ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc;
-  return exp(-sqrt(bsdf->sigma) * bsdf_principled_hair_albedo_roughness_scale(bsdf->v));
+
+  const float cos_theta_o = cos_from_sin(dot(sd->wi, safe_normalize(sd->dPdu)));
+  const float cos_gamma_o = cos_from_sin(bsdf->extra->geom.w);
+  const float f = fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta);
+
+  const float roughness_scale = bsdf_hair_albedo_roughness_scale(bsdf->v);
+  /* TODO(lukas): Adding the Fresnel term here as a workaround until the proper refactor. */
+  return exp(-sqrt(bsdf->sigma) * roughness_scale) + make_spectrum(f);
 }

-ccl_device_inline Spectrum
-bsdf_principled_hair_sigma_from_reflectance(const Spectrum color, const float azimuthal_roughness)
+ccl_device_inline Spectrum bsdf_hair_sigma_from_reflectance(const Spectrum color,
+                                                            const float azimuthal_roughness)
 {
-  const Spectrum sigma = log(color) /
-                         bsdf_principled_hair_albedo_roughness_scale(azimuthal_roughness);
+  const Spectrum sigma = log(color) / bsdf_hair_albedo_roughness_scale(azimuthal_roughness);
  return sigma * sigma;
 }

-ccl_device_inline Spectrum bsdf_principled_hair_sigma_from_concentration(const float eumelanin,
-                                                                         const float pheomelanin)
+ccl_device_inline Spectrum bsdf_hair_sigma_from_concentration(const float eumelanin,
+                                                              const float pheomelanin)
 {
  const float3 eumelanin_color = make_float3(0.506f, 0.841f, 1.653f);
  const float3 pheomelanin_color = make_float3(0.343f, 0.733f, 1.924f);
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -17,19 +17,52 @@
 CCL_NAMESPACE_BEGIN

 enum MicrofacetType {
+  GGX = 0,
  BECKMANN,
-  GGX,
+  SHARP,
 };

-typedef struct MicrofacetExtra {
-  Spectrum color, cspec0;
-} MicrofacetExtra;
+enum MicrofacetFresnel {
+  NONE = 0,
+  DIELECTRIC,
+  DIELECTRIC_TINT, /* used by the OSL MaterialX closures */
+  CONDUCTOR,
+  GENERALIZED_SCHLICK,
+  CONSTANT, /* only needed by MultiGGX */
+  PRINCIPLED_V1,
+};
+
+typedef struct FresnelPrincipledV1 {
+  Spectrum color; /* only needed by MultiGGX */
+  Spectrum cspec0;
+} FresnelPrincipledV1;
+
+typedef struct FresnelConstant {
+  Spectrum color;
+} FresnelConstant;
+
+typedef struct FresnelDielectricTint {
+  Spectrum reflection_tint;
+  Spectrum transmission_tint;
+} FresnelDielectricTint;
+
+typedef struct FresnelConductor {
+  Spectrum n, k;
+} FresnelConductor;
+
+typedef struct FresnelGeneralizedSchlick {
+  Spectrum reflection_tint;
+  Spectrum transmission_tint;
+  Spectrum f0, f90;
+  float exponent;
+} FresnelGeneralizedSchlick;

 typedef struct MicrofacetBsdf {
  SHADER_CLOSURE_BASE;

  float alpha_x, alpha_y, ior;
-  ccl_private MicrofacetExtra *extra;
+  int fresnel_type;
+  ccl_private void *fresnel;
  float3 T;
 } MicrofacetBsdf;

@@ -183,14 +216,57 @@ ccl_device_forceinline float3 microfacet_ggx_sample_vndf(const float3 wi,
 * Else it is simply white
 */
 ccl_device_forceinline Spectrum microfacet_fresnel(ccl_private const MicrofacetBsdf *bsdf,
-                                                   float3 wi,
-                                                   float3 H)
+                                                   const float3 wi,
+                                                   const float3 H,
+                                                   const bool refraction)
 {
-  if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(bsdf->type)) {
-    return interpolate_fresnel_color(wi, H, bsdf->ior, bsdf->extra->cspec0);
+  if (bsdf->fresnel_type == MicrofacetFresnel::PRINCIPLED_V1) {
+    kernel_assert(!refraction);
+    ccl_private FresnelPrincipledV1 *fresnel = (ccl_private FresnelPrincipledV1 *)bsdf->fresnel;
+    return interpolate_fresnel_color(wi, H, bsdf->ior, fresnel->cspec0);
  }
-  else if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
-    return make_spectrum(fresnel_dielectric_cos(dot(wi, H), bsdf->ior));
+  else if (bsdf->fresnel_type == MicrofacetFresnel::DIELECTRIC) {
+    const float F = fresnel_dielectric_cos(dot(wi, H), bsdf->ior);
+    return make_spectrum(refraction ? 1.0f - F : F);
+  }
+  else if (bsdf->fresnel_type == MicrofacetFresnel::DIELECTRIC_TINT) {
+    ccl_private FresnelDielectricTint *fresnel = (ccl_private FresnelDielectricTint *)
+                                                     bsdf->fresnel;
+    const float F = fresnel_dielectric_cos(dot(wi, H), bsdf->ior);
+    return refraction ? (1.0f - F) * fresnel->transmission_tint : F * fresnel->reflection_tint;
+  }
+  else if (bsdf->fresnel_type == MicrofacetFresnel::CONDUCTOR) {
+    kernel_assert(!refraction);
+    ccl_private FresnelConductor *fresnel = (ccl_private FresnelConductor *)bsdf->fresnel;
+    return fresnel_conductor(dot(wi, H), fresnel->n, fresnel->k);
+  }
+  else if (bsdf->fresnel_type == MicrofacetFresnel::GENERALIZED_SCHLICK) {
+    ccl_private FresnelGeneralizedSchlick *fresnel = (ccl_private FresnelGeneralizedSchlick *)
+                                                         bsdf->fresnel;
+    float cosI = dot(wi, H);
+    if (bsdf->ior < 1.0f) {
+      /* When going from a higher to a lower IOR, we must use the transmitted angle. */
+      float sinT2 = (1.0f - sqr(cosI)) / sqr(bsdf->ior);
+      if (sinT2 >= 1.0f) {
+        /* Total internal reflection */
+        return refraction ? zero_spectrum() : fresnel->reflection_tint;
+      }
+      cosI = safe_sqrtf(1.0f - sinT2);
+    }
+    /* TODO(lukas): Is a special case for exponent==5 worth it? */
+    const float s = powf(1.0f - cosI, fresnel->exponent);
+    const Spectrum F = mix(fresnel->f0, fresnel->f90, s);
+    if (refraction) {
+      return (one_spectrum() - F) * fresnel->transmission_tint;
+    }
+    else {
+      return F * fresnel->reflection_tint;
+    }
+  }
+  else if (bsdf->fresnel_type == MicrofacetFresnel::CONSTANT) {
+    kernel_assert(!refraction);
+    ccl_private FresnelConstant *fresnel = (ccl_private FresnelConstant *)bsdf->fresnel;
+    return fresnel->color;
  }
  else {
    return one_spectrum();
@@ -200,7 +276,7 @@ ccl_device_forceinline Spectrum microfacet_fresnel(ccl_private const MicrofacetB
 ccl_device_forceinline void bsdf_microfacet_adjust_weight(ccl_private const ShaderData *sd,
                                                          ccl_private MicrofacetBsdf *bsdf)
 {
-  bsdf->sample_weight *= average(microfacet_fresnel(bsdf, sd->wi, bsdf->N));
+  bsdf->sample_weight *= average(microfacet_fresnel(bsdf, sd->wi, bsdf->N, false));
 }

 /* Generalized Trowbridge-Reitz for clearcoat. */
@@ -227,8 +303,8 @@ ccl_device_inline float bsdf_lambda_from_sqr_alpha_tan_n(float sqr_alpha_tan_n)
    return 0.5f * (sqrtf(1.0f + sqr_alpha_tan_n) - 1.0f);
  }
  else {
-    /* m_type == MicrofacetType::BECKMANN
-     * Approximation from below Equation 69. */
+    kernel_assert(m_type == MicrofacetType::BECKMANN);
+    /* Approximation from below Equation 69. */
    if (sqr_alpha_tan_n < 0.39f) {
      /* Equivalent to a >= 1.6f, but also handles sqr_alpha_tan_n == 0.0f cleanly. */
      return 0.0f;
@@ -251,6 +327,12 @@ ccl_device_inline float bsdf_aniso_lambda(float alpha_x, float alpha_y, float3 V
  return bsdf_lambda_from_sqr_alpha_tan_n<m_type>(sqr_alpha_tan_n);
 }

+/* Monodirectional shadowing-masking term. */
+template<MicrofacetType m_type> ccl_device_inline float bsdf_G(float alpha2, float cos_N)
+{
+  return 1.0f / (1.0f + bsdf_lambda<m_type>(alpha2, cos_N));
+}
+
 /* Combined shadowing-masking term. */
 template<MicrofacetType m_type>
 ccl_device_inline float bsdf_G(float alpha2, float cos_NI, float cos_NO)
@@ -267,7 +349,7 @@ template<MicrofacetType m_type> ccl_device_inline float bsdf_D(float alpha2, flo
    return expf((1.0f - 1.0f / cos_NH2) / alpha2) / (M_PI_F * alpha2 * sqr(cos_NH2));
  }
  else {
-    /* m_type == MicrofacetType::GGX */
+    kernel_assert(m_type == MicrofacetType::GGX);
    return alpha2 / (M_PI_F * sqr(1.0f + (alpha2 - 1.0f) * cos_NH2));
  }
 }
@@ -284,7 +366,7 @@ ccl_device_inline float bsdf_aniso_D(float alpha_x, float alpha_y, float3 H)
    return expf(-(sqr(H.x) + sqr(H.y)) / cos_NH2) / (M_PI_F * alpha2 * sqr(cos_NH2));
  }
  else {
-    /* m_type == MicrofacetType::GGX */
+    kernel_assert(m_type == MicrofacetType::GGX);
    return M_1_PI_F / (alpha2 * sqr(len_squared(H)));
  }
 }
@@ -296,9 +378,14 @@ ccl_device Spectrum bsdf_microfacet_eval(ccl_private const ShaderClosure *sc,
                                         const float3 wo,
                                         ccl_private float *pdf)
 {
+  if (m_type == MicrofacetType::SHARP) {
+    *pdf = 0.0f;
+    return zero_spectrum();
+  }
+
  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
-  const bool m_refractive = (bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID) ||
-                            (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID);
+  const bool m_refractive = CLOSURE_IS_REFRACTIVE(bsdf->type);
+  const bool m_glass = CLOSURE_IS_GLASS(bsdf->type);

  const float3 N = bsdf->N;
  const float cos_NI = dot(N, wi);
@@ -308,14 +395,23 @@ ccl_device Spectrum bsdf_microfacet_eval(ccl_private const ShaderClosure *sc,
  const float alpha_x = bsdf->alpha_x;
  const float alpha_y = bsdf->alpha_y;

-  if ((cos_NI <= 0) || ((cos_NgO < 0.0f) != m_refractive) || ((cos_NO < 0.0f) != m_refractive) ||
-      (alpha_x * alpha_y <= 1e-7f)) {
+  const bool is_refraction = (cos_NO < 0.0f);
+
+  /* Check whether the pair of directions is valid for evaluation:
+   * - Incoming direction has to be in the upper hemisphere (Cycles convention)
+   * - Specular cases can't be evaluated, only sampled.
+   * - The outgoing direction has to be the in the same hemisphere w.r.t. both normals.
+   * - Purely reflective closures can't have refraction.
+   * - Purely refractive closures can't have reflection.
+   */
+  if ((cos_NI <= 0) || (alpha_x * alpha_y <= 1e-7f) || ((cos_NgO < 0.0f) != is_refraction) ||
+      (is_refraction && !m_refractive) || (!is_refraction && m_refractive && !m_glass)) {
    *pdf = 0.0f;
    return zero_spectrum();
  }

  /* Compute half vector. */
-  float3 H = m_refractive ? -(bsdf->ior * wo + wi) : (wi + wo);
+  float3 H = is_refraction ? -(bsdf->ior * wo + wi) : (wi + wo);
  const float inv_len_H = 1.0f / len(H);
  H *= inv_len_H;

@@ -323,7 +419,7 @@ ccl_device Spectrum bsdf_microfacet_eval(ccl_private const ShaderClosure *sc,
  float D, lambdaI, lambdaO;

  /* TODO: add support for anisotropic transmission. */
-  if (alpha_x == alpha_y || m_refractive) { /* Isotropic. */
+  if (alpha_x == alpha_y || is_refraction) { /* Isotropic. */
    float alpha2 = alpha_x * alpha_y;

    if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
@@ -354,23 +450,31 @@ ccl_device Spectrum bsdf_microfacet_eval(ccl_private const ShaderClosure *sc,
    lambdaO = bsdf_aniso_lambda<m_type>(alpha_x, alpha_y, local_O);
  }

-  const float common = D / cos_NI *
-                       (m_refractive ?
-                            sqr(bsdf->ior * inv_len_H) * fabsf(dot(H, wi) * dot(H, wo)) :
-                            0.25f);
+  float common = D / cos_NI *
+                 (is_refraction ? sqr(bsdf->ior * inv_len_H) * fabsf(dot(H, wi) * dot(H, wo)) :
+                                  0.25f);

-  *pdf = common / (1.0f + lambdaI);
+  float lobe_pdf = 1.0f;
+  if (m_glass) {
+    float fresnel = fresnel_dielectric_cos(dot(H, wi), bsdf->ior);
+    float reflect_pdf = (fresnel == 1.0f) ? 1.0f : clamp(fresnel, 0.125f, 0.875f);
+    lobe_pdf = is_refraction ? (1.0f - reflect_pdf) : reflect_pdf;
+  }

-  const Spectrum F = microfacet_fresnel(bsdf, wo, H);
+  *pdf = common * lobe_pdf / (1.0f + lambdaI);
+
+  const Spectrum F = microfacet_fresnel(bsdf, wi, H, is_refraction);
  return F * common / (1.0f + lambdaO + lambdaI);
 }

 template<MicrofacetType m_type>
 ccl_device int bsdf_microfacet_sample(ccl_private const ShaderClosure *sc,
+                                      const int path_flag,
                                      float3 Ng,
                                      float3 wi,
                                      float randu,
                                      float randv,
+                                      float randw,
                                      ccl_private Spectrum *eval,
                                      ccl_private float3 *wo,
                                      ccl_private float *pdf,
@@ -380,70 +484,116 @@ ccl_device int bsdf_microfacet_sample(ccl_private const ShaderClosure *sc,
  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;

  const float m_eta = bsdf->ior;
-  const bool m_refractive = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID) ||
-                            (bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID);
-  int label = m_refractive ? LABEL_TRANSMIT : LABEL_REFLECT;
+  const bool m_refractive = CLOSURE_IS_REFRACTIVE(bsdf->type);
+  const float alpha_x = bsdf->alpha_x;
+  const float alpha_y = bsdf->alpha_y;
+  bool m_singular = (m_type == MicrofacetType::SHARP) || (alpha_x * alpha_y <= 1e-7f);

  const float3 N = bsdf->N;
  const float cos_NI = dot(N, wi);
  if (cos_NI <= 0) {
-    return label | LABEL_GLOSSY;
+    *eval = zero_spectrum();
+    *pdf = 0.0f;
+    return (m_refractive ? LABEL_TRANSMIT : LABEL_REFLECT) |
+           (m_singular ? LABEL_SINGULAR : LABEL_GLOSSY);
  }

-  float3 X, Y;
-  const float alpha_x = bsdf->alpha_x;
-  const float alpha_y = bsdf->alpha_y;
-  if (alpha_x == alpha_y) {
-    make_orthonormals(N, &X, &Y);
+  float3 H;
+  float cos_NH, cos_HI;
+  float3 local_H, local_I, X, Y; /* Needed for anisotropic microfacets later. */
+  if (m_singular) {
+    H = N;
+    cos_NH = 1.0f;
+    cos_HI = cos_NI;
  }
  else {
-    make_orthonormals_tangent(N, bsdf->T, &X, &Y);
+    if (alpha_x == alpha_y) {
+      make_orthonormals(N, &X, &Y);
+    }
+    else {
+      make_orthonormals_tangent(N, bsdf->T, &X, &Y);
+    }
+
+    /* Importance sampling with distribution of visible normals. Vectors are transformed to local
+     * space before and after sampling. */
+    local_I = make_float3(dot(X, wi), dot(Y, wi), cos_NI);
+    if (m_type == MicrofacetType::GGX) {
+      local_H = microfacet_ggx_sample_vndf(local_I, alpha_x, alpha_y, randu, randv);
+    }
+    else {
+      /* m_type == MicrofacetType::BECKMANN */
+      local_H = microfacet_beckmann_sample_vndf(local_I, alpha_x, alpha_y, randu, randv);
+    }
+
+    H = X * local_H.x + Y * local_H.y + N * local_H.z;
+    cos_NH = local_H.z;
+    cos_HI = dot(H, wi);
  }

-  /* Importance sampling with distribution of visible normals. Vectors are transformed to local
-   * space before and after sampling. */
-  const float3 local_I = make_float3(dot(X, wi), dot(Y, wi), cos_NI);
-  float3 local_H;
-  if (m_type == MicrofacetType::GGX) {
-    local_H = microfacet_ggx_sample_vndf(local_I, alpha_x, alpha_y, randu, randv);
-  }
-  else {
-    /* m_type == MicrofacetType::BECKMANN */
-    local_H = microfacet_beckmann_sample_vndf(local_I, alpha_x, alpha_y, randu, randv);
-  }
-
-  const float3 H = X * local_H.x + Y * local_H.y + N * local_H.z;
-  const float cos_NH = local_H.z;
-  const float cos_HI = dot(H, wi);
-
-  bool valid = false;
+  bool valid;
+  bool do_refract;
+  float lobe_pdf;
  if (m_refractive) {
-    float3 R, T;
    bool inside;
+    float fresnel = fresnel_dielectric(m_eta, H, wi, wo, &inside);
+    valid = !inside;

-    float fresnel = fresnel_dielectric(m_eta, H, wi, &R, &T, &inside);
-    *wo = T;
+    /* For glass closures, we decide between reflection and refraction here. */
+    if (CLOSURE_IS_GLASS(bsdf->type)) {
+      if (fresnel == 1.0f) {
+        /* TIR, reflection is the only option. */
+        do_refract = false;
+        lobe_pdf = 1.0f;
+      }
+      else {
+        /* Decide between reflection and refraction, using defensive sampling to avoid
+         * excessive noise for reflection highlights. */
+        float reflect_pdf = (path_flag & PATH_RAY_CAMERA) ? clamp(fresnel, 0.125f, 0.875f) :
+                                                            fresnel;
+        do_refract = (randw >= reflect_pdf);
+        lobe_pdf = do_refract ? (1.0f - reflect_pdf) : reflect_pdf;
+      }
+    }
+    else {
+      /* For pure refractive closures, refraction is the only option. */
+      do_refract = true;
+      lobe_pdf = 1.0f;
+      valid = valid && (fresnel != 1.0f);
+    }
+  }
+  else {
+    /* Pure reflective closure, reflection is the only option. */
+    valid = true;
+    lobe_pdf = 1.0f;
+    do_refract = false;
+  }

-    valid = !inside && fresnel != 1.0f;
+  int label;
+  if (do_refract) {
+    /* wo was already set to the refracted direction by fresnel_dielectric. */
+    // valid = valid && (dot(Ng, *wo) < 0);
+    label = LABEL_TRANSMIT;
+    /* If the IOR is close enough to 1.0, just treat the interaction as specular. */
+    m_singular = m_singular || (fabsf(m_eta - 1.0f) < 1e-4f);
  }
  else {
    /* Eq. 39 - compute actual reflected direction */
    *wo = 2 * cos_HI * H - wi;
-
-    valid = dot(Ng, *wo) > 0;
+    valid = valid && (dot(Ng, *wo) > 0);
+    label = LABEL_REFLECT;
  }

  if (!valid) {
    *eval = zero_spectrum();
    *pdf = 0.0f;
-    return label | LABEL_GLOSSY;
+    return label | (m_singular ? LABEL_SINGULAR : LABEL_GLOSSY);
  }

-  if (alpha_x * alpha_y <= 1e-7f || (m_refractive && fabsf(m_eta - 1.0f) < 1e-4f)) {
+  if (m_singular) {
    label |= LABEL_SINGULAR;
    /* Some high number for MIS. */
-    *pdf = 1e6f;
-    *eval = make_spectrum(1e6f) * microfacet_fresnel(bsdf, *wo, H);
+    *pdf = lobe_pdf * 1e6f;
+    *eval = make_spectrum(1e6f) * microfacet_fresnel(bsdf, wi, H, do_refract);
  }
  else {
    label |= LABEL_GLOSSY;
@@ -451,7 +601,7 @@ ccl_device int bsdf_microfacet_sample(ccl_private const ShaderClosure *sc,
    float D, lambdaI, lambdaO;

    /* TODO: add support for anisotropic transmission. */
-    if (alpha_x == alpha_y || m_refractive) { /* Isotropic. */
+    if (alpha_x == alpha_y || do_refract) { /* Isotropic. */
      float alpha2 = alpha_x * alpha_y;

      if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
@@ -479,21 +629,65 @@ ccl_device int bsdf_microfacet_sample(ccl_private const ShaderClosure *sc,

    const float cos_HO = dot(H, *wo);
    const float common = D / cos_NI *
-                         (m_refractive ? fabsf(cos_HI * cos_HO) / sqr(cos_HO + cos_HI / m_eta) :
-                                         0.25f);
+                         (do_refract ? fabsf(cos_HI * cos_HO) / sqr(cos_HO + cos_HI / m_eta) :
+                                       0.25f);

-    *pdf = common / (1.0f + lambdaI);
+    *pdf = common * lobe_pdf / (1.0f + lambdaI);

-    Spectrum F = microfacet_fresnel(bsdf, *wo, H);
+    const Spectrum F = microfacet_fresnel(bsdf, wi, H, do_refract);
    *eval = F * common / (1.0f + lambdaI + lambdaO);
  }

  *sampled_roughness = make_float2(alpha_x, alpha_y);
-  *eta = m_refractive ? 1.0f / m_eta : m_eta;
+  *eta = do_refract ? 1.0f / m_eta : m_eta;

  return label;
 }

+/* Fresnel term setup functions. These get called after the distribution-specific setup functions
+ * like bsdf_microfacet_ggx_setup. */
+
+ccl_device void bsdf_microfacet_setup_fresnel_principledv1(
+    ccl_private MicrofacetBsdf *bsdf,
+    ccl_private const ShaderData *sd,
+    ccl_private FresnelPrincipledV1 *fresnel)
+{
+  fresnel->cspec0 = saturate(fresnel->cspec0);
+
+  bsdf->fresnel_type = MicrofacetFresnel::PRINCIPLED_V1;
+  bsdf->fresnel = fresnel;
+  bsdf_microfacet_adjust_weight(sd, bsdf);
+}
+
+ccl_device void bsdf_microfacet_setup_fresnel_conductor(ccl_private MicrofacetBsdf *bsdf,
+                                                        ccl_private const ShaderData *sd,
+                                                        ccl_private FresnelConductor *fresnel)
+{
+  bsdf->fresnel_type = MicrofacetFresnel::CONDUCTOR;
+  bsdf->fresnel = fresnel;
+  bsdf_microfacet_adjust_weight(sd, bsdf);
+}
+
+ccl_device void bsdf_microfacet_setup_fresnel_dielectric_tint(
+    ccl_private MicrofacetBsdf *bsdf,
+    ccl_private const ShaderData *sd,
+    ccl_private FresnelDielectricTint *fresnel)
+{
+  bsdf->fresnel_type = MicrofacetFresnel::DIELECTRIC_TINT;
+  bsdf->fresnel = fresnel;
+  bsdf_microfacet_adjust_weight(sd, bsdf);
+}
+
+ccl_device void bsdf_microfacet_setup_fresnel_generalized_schlick(
+    ccl_private MicrofacetBsdf *bsdf,
+    ccl_private const ShaderData *sd,
+    ccl_private FresnelGeneralizedSchlick *fresnel)
+{
+  bsdf->fresnel_type = MicrofacetFresnel::GENERALIZED_SCHLICK;
+  bsdf->fresnel = fresnel;
+  bsdf_microfacet_adjust_weight(sd, bsdf);
+}
+
 /* GGX microfacet with Smith shadow-masking from:
 *
 * Microfacet Models for Refraction through Rough Surfaces
@@ -509,37 +703,22 @@ ccl_device int bsdf_microfacet_sample(ccl_private const ShaderClosure *sc,

 ccl_device int bsdf_microfacet_ggx_setup(ccl_private MicrofacetBsdf *bsdf)
 {
-  bsdf->extra = NULL;
-
  bsdf->alpha_x = saturatef(bsdf->alpha_x);
  bsdf->alpha_y = saturatef(bsdf->alpha_y);

+  bsdf->fresnel_type = MicrofacetFresnel::NONE;
  bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ID;

  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }

-ccl_device int bsdf_microfacet_ggx_fresnel_setup(ccl_private MicrofacetBsdf *bsdf,
-                                                 ccl_private const ShaderData *sd)
-{
-  bsdf->extra->cspec0 = saturate(bsdf->extra->cspec0);
-
-  bsdf->alpha_x = saturatef(bsdf->alpha_x);
-  bsdf->alpha_y = saturatef(bsdf->alpha_y);
-
-  bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID;
-
-  bsdf_microfacet_adjust_weight(sd, bsdf);
-
-  return SD_BSDF | SD_BSDF_HAS_EVAL;
-}
-
 ccl_device int bsdf_microfacet_ggx_clearcoat_setup(ccl_private MicrofacetBsdf *bsdf,
                                                   ccl_private const ShaderData *sd)
 {
  bsdf->alpha_x = saturatef(bsdf->alpha_x);
  bsdf->alpha_y = bsdf->alpha_x;

+  bsdf->fresnel_type = MicrofacetFresnel::DIELECTRIC;
  bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID;

  bsdf_microfacet_adjust_weight(sd, bsdf);
@@ -549,16 +728,26 @@ ccl_device int bsdf_microfacet_ggx_clearcoat_setup(ccl_private MicrofacetBsdf *b

 ccl_device int bsdf_microfacet_ggx_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
 {
-  bsdf->extra = NULL;
-
  bsdf->alpha_x = saturatef(bsdf->alpha_x);
  bsdf->alpha_y = bsdf->alpha_x;

+  bsdf->fresnel_type = MicrofacetFresnel::NONE;
  bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;

  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
 }

+ccl_device int bsdf_microfacet_ggx_glass_setup(ccl_private MicrofacetBsdf *bsdf)
+{
+  bsdf->alpha_x = saturatef(bsdf->alpha_x);
+  bsdf->alpha_y = bsdf->alpha_x;
+
+  bsdf->fresnel_type = MicrofacetFresnel::DIELECTRIC;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID;
+
+  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
+}
+
 ccl_device void bsdf_microfacet_ggx_blur(ccl_private ShaderClosure *sc, float roughness)
 {
  ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
@@ -577,10 +766,12 @@ ccl_device Spectrum bsdf_microfacet_ggx_eval(ccl_private const ShaderClosure *sc
 }

 ccl_device int bsdf_microfacet_ggx_sample(ccl_private const ShaderClosure *sc,
+                                          const int path_flag,
                                          float3 Ng,
                                          float3 wi,
                                          float randu,
                                          float randv,
+                                          float randw,
                                          ccl_private Spectrum *eval,
                                          ccl_private float3 *wo,
                                          ccl_private float *pdf,
@@ -588,7 +779,7 @@ ccl_device int bsdf_microfacet_ggx_sample(ccl_private const ShaderClosure *sc,
                                          ccl_private float *eta)
 {
  return bsdf_microfacet_sample<MicrofacetType::GGX>(
-      sc, Ng, wi, randu, randv, eval, wo, pdf, sampled_roughness, eta);
+      sc, path_flag, Ng, wi, randu, randv, randw, eval, wo, pdf, sampled_roughness, eta);
 }

 /* Beckmann microfacet with Smith shadow-masking from:
@@ -601,6 +792,7 @@ ccl_device int bsdf_microfacet_beckmann_setup(ccl_private MicrofacetBsdf *bsdf)
  bsdf->alpha_x = saturatef(bsdf->alpha_x);
  bsdf->alpha_y = saturatef(bsdf->alpha_y);

+  bsdf->fresnel_type = MicrofacetFresnel::NONE;
  bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID;
  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
@@ -610,10 +802,21 @@ ccl_device int bsdf_microfacet_beckmann_refraction_setup(ccl_private MicrofacetB
  bsdf->alpha_x = saturatef(bsdf->alpha_x);
  bsdf->alpha_y = bsdf->alpha_x;

+  bsdf->fresnel_type = MicrofacetFresnel::NONE;
  bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
 }

+ccl_device int bsdf_microfacet_beckmann_glass_setup(ccl_private MicrofacetBsdf *bsdf)
+{
+  bsdf->alpha_x = saturatef(bsdf->alpha_x);
+  bsdf->alpha_y = bsdf->alpha_x;
+
+  bsdf->fresnel_type = MicrofacetFresnel::DIELECTRIC;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID;
+  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
+}
+
 ccl_device void bsdf_microfacet_beckmann_blur(ccl_private ShaderClosure *sc, float roughness)
 {
  ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
@@ -632,10 +835,12 @@ ccl_device Spectrum bsdf_microfacet_beckmann_eval(ccl_private const ShaderClosur
 }

 ccl_device int bsdf_microfacet_beckmann_sample(ccl_private const ShaderClosure *sc,
+                                               const int path_flag,
                                               float3 Ng,
                                               float3 wi,
                                               float randu,
                                               float randv,
+                                               float randw,
                                               ccl_private Spectrum *eval,
                                               ccl_private float3 *wo,
                                               ccl_private float *pdf,
@@ -643,7 +848,63 @@ ccl_device int bsdf_microfacet_beckmann_sample(ccl_private const ShaderClosure *
                                               ccl_private float *eta)
 {
  return bsdf_microfacet_sample<MicrofacetType::BECKMANN>(
-      sc, Ng, wi, randu, randv, eval, wo, pdf, sampled_roughness, eta);
+      sc, path_flag, Ng, wi, randu, randv, randw, eval, wo, pdf, sampled_roughness, eta);
+}
+
+/* Specular interface, not really a microfacet model but close enough that sharing code makes
+ * sense. */
+
+ccl_device int bsdf_reflection_setup(ccl_private MicrofacetBsdf *bsdf)
+{
+  bsdf->fresnel_type = MicrofacetFresnel::NONE;
+  bsdf->type = CLOSURE_BSDF_REFLECTION_ID;
+  bsdf->alpha_x = 0.0f;
+  bsdf->alpha_y = 0.0f;
+  return SD_BSDF;
+}
+
+ccl_device int bsdf_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
+{
+  bsdf->fresnel_type = MicrofacetFresnel::NONE;
+  bsdf->type = CLOSURE_BSDF_REFRACTION_ID;
+  bsdf->alpha_x = 0.0f;
+  bsdf->alpha_y = 0.0f;
+  return SD_BSDF | SD_BSDF_HAS_TRANSMISSION;
+}
+
+ccl_device int bsdf_sharp_glass_setup(ccl_private MicrofacetBsdf *bsdf)
+{
+  bsdf->fresnel_type = MicrofacetFresnel::DIELECTRIC;
+  bsdf->type = CLOSURE_BSDF_SHARP_GLASS_ID;
+  bsdf->alpha_x = 0.0f;
+  bsdf->alpha_y = 0.0f;
+  return SD_BSDF | SD_BSDF_HAS_TRANSMISSION;
+}
+
+ccl_device Spectrum bsdf_microfacet_sharp_eval(ccl_private const ShaderClosure *sc,
+                                               const float3 Ng,
+                                               const float3 wi,
+                                               const float3 wo,
+                                               ccl_private float *pdf)
+{
+  return bsdf_microfacet_eval<MicrofacetType::SHARP>(sc, Ng, wi, wo, pdf);
+}
+
+ccl_device int bsdf_microfacet_sharp_sample(ccl_private const ShaderClosure *sc,
+                                            const int path_flag,
+                                            float3 Ng,
+                                            float3 wi,
+                                            float randu,
+                                            float randv,
+                                            float randw,
+                                            ccl_private Spectrum *eval,
+                                            ccl_private float3 *wo,
+                                            ccl_private float *pdf,
+                                            ccl_private float2 *sampled_roughness,
+                                            ccl_private float *eta)
+{
+  return bsdf_microfacet_sample<MicrofacetType::SHARP>(
+      sc, path_flag, Ng, wi, randu, randv, randw, eval, wo, pdf, sampled_roughness, eta);
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
@@ -377,8 +377,6 @@ ccl_device int bsdf_microfacet_multi_ggx_common_setup(ccl_private MicrofacetBsdf
 {
  bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
  bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f);
-  bsdf->extra->color = saturate(bsdf->extra->color);
-  bsdf->extra->cspec0 = saturate(bsdf->extra->cspec0);

  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG;
 }
@@ -388,6 +386,10 @@ ccl_device int bsdf_microfacet_multi_ggx_setup(ccl_private MicrofacetBsdf *bsdf)
  if (is_zero(bsdf->T))
    bsdf->T = make_float3(1.0f, 0.0f, 0.0f);

+  ccl_private FresnelConstant *fresnel = (ccl_private FresnelConstant *)bsdf->fresnel;
+  fresnel->color = saturate(fresnel->color);
+
+  bsdf->fresnel_type = MicrofacetFresnel::CONSTANT;
  bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;

  return bsdf_microfacet_multi_ggx_common_setup(bsdf);
@@ -399,7 +401,12 @@ ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(ccl_private MicrofacetBsd
  if (is_zero(bsdf->T))
    bsdf->T = make_float3(1.0f, 0.0f, 0.0f);

-  bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID;
+  ccl_private FresnelPrincipledV1 *fresnel = (ccl_private FresnelPrincipledV1 *)bsdf->fresnel;
+  fresnel->color = saturate(fresnel->color);
+  fresnel->cspec0 = saturate(fresnel->cspec0);
+
+  bsdf->fresnel_type = MicrofacetFresnel::PRINCIPLED_V1;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;

  bsdf_microfacet_adjust_weight(sd, bsdf);

@@ -410,6 +417,10 @@ ccl_device int bsdf_microfacet_multi_ggx_refraction_setup(ccl_private Microfacet
 {
  bsdf->alpha_y = bsdf->alpha_x;

+  ccl_private FresnelConstant *fresnel = (ccl_private FresnelConstant *)bsdf->fresnel;
+  fresnel->color = saturate(fresnel->color);
+
+  bsdf->fresnel_type = MicrofacetFresnel::CONSTANT;
  bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID;

  return bsdf_microfacet_multi_ggx_common_setup(bsdf);
@@ -439,7 +450,21 @@ ccl_device Spectrum bsdf_microfacet_multi_ggx_eval(ccl_private const ShaderClosu
    return zero_spectrum();
  }

-  bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID);
+  Spectrum color, cspec0;
+  bool use_fresnel;
+  if (bsdf->fresnel_type == MicrofacetFresnel::PRINCIPLED_V1) {
+    ccl_private FresnelPrincipledV1 *fresnel = (ccl_private FresnelPrincipledV1 *)bsdf->fresnel;
+    use_fresnel = true;
+    color = fresnel->color;
+    cspec0 = fresnel->cspec0;
+  }
+  else {
+    kernel_assert(bsdf->fresnel_type == MicrofacetFresnel::CONSTANT);
+    ccl_private FresnelConstant *fresnel = (ccl_private FresnelConstant *)bsdf->fresnel;
+    use_fresnel = false;
+    color = fresnel->color;
+    cspec0 = zero_spectrum();
+  }

  bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y);
  if (is_aniso)
@@ -463,13 +488,13 @@ ccl_device Spectrum bsdf_microfacet_multi_ggx_eval(ccl_private const ShaderClosu
  return mf_eval_glossy(local_I,
                        local_O,
                        true,
-                        bsdf->extra->color,
+                        color,
                        bsdf->alpha_x,
                        bsdf->alpha_y,
                        lcg_state,
                        bsdf->ior,
                        use_fresnel,
-                        bsdf->extra->cspec0);
+                        cspec0);
 }

 ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals kg,
@@ -509,7 +534,21 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals kg,
    return LABEL_REFLECT | LABEL_SINGULAR;
  }

-  bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID);
+  Spectrum color, cspec0;
+  bool use_fresnel;
+  if (bsdf->fresnel_type == MicrofacetFresnel::PRINCIPLED_V1) {
+    ccl_private FresnelPrincipledV1 *fresnel = (ccl_private FresnelPrincipledV1 *)bsdf->fresnel;
+    use_fresnel = true;
+    color = fresnel->color;
+    cspec0 = fresnel->cspec0;
+  }
+  else {
+    kernel_assert(bsdf->fresnel_type == MicrofacetFresnel::CONSTANT);
+    ccl_private FresnelConstant *fresnel = (ccl_private FresnelConstant *)bsdf->fresnel;
+    use_fresnel = false;
+    color = fresnel->color;
+    cspec0 = zero_spectrum();
+  }

  *eta = bsdf->ior;
  *sampled_roughness = make_float2(bsdf->alpha_x, bsdf->alpha_y);
@@ -525,13 +564,13 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals kg,

  *eval = mf_sample_glossy(local_I,
                           &local_O,
-                           bsdf->extra->color,
+                           color,
                           bsdf->alpha_x,
                           bsdf->alpha_y,
                           lcg_state,
                           bsdf->ior,
                           use_fresnel,
-                           bsdf->extra->cspec0);
+                           cspec0);
  *wo = X * local_O.x + Y * local_O.y + Z * local_O.z;

  /* Ensure that the light direction is on the outside w.r.t. the geometry normal. */
@@ -557,8 +596,11 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_setup(ccl_private MicrofacetBsdf
  bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
  bsdf->alpha_y = bsdf->alpha_x;
  bsdf->ior = max(0.0f, bsdf->ior);
-  bsdf->extra->color = saturate(bsdf->extra->color);

+  ccl_private FresnelConstant *fresnel = (ccl_private FresnelConstant *)bsdf->fresnel;
+  fresnel->color = saturate(fresnel->color);
+
+  bsdf->fresnel_type = MicrofacetFresnel::CONSTANT;
  bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID;

  return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG | SD_BSDF_HAS_TRANSMISSION;
@@ -570,10 +612,13 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(ccl_private Microfa
  bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f);
  bsdf->alpha_y = bsdf->alpha_x;
  bsdf->ior = max(0.0f, bsdf->ior);
-  bsdf->extra->color = saturate(bsdf->extra->color);
-  bsdf->extra->cspec0 = saturate(bsdf->extra->cspec0);

-  bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID;
+  ccl_private FresnelPrincipledV1 *fresnel = (ccl_private FresnelPrincipledV1 *)bsdf->fresnel;
+  fresnel->color = saturate(fresnel->color);
+  fresnel->cspec0 = saturate(fresnel->cspec0);
+
+  bsdf->fresnel_type = MicrofacetFresnel::PRINCIPLED_V1;
+  bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID;

  bsdf_microfacet_adjust_weight(sd, bsdf);

@@ -601,21 +646,35 @@ ccl_device Spectrum bsdf_microfacet_multi_ggx_glass_eval(ccl_private const Shade
  float3 local_O = make_float3(dot(wo, X), dot(wo, Y), dot(wo, Z));

  const bool is_transmission = local_O.z < 0.0f;
-  const bool use_fresnel = !is_transmission &&
-                           (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID);
+
+  Spectrum color, cspec0;
+  bool use_fresnel;
+  if (bsdf->fresnel_type == MicrofacetFresnel::PRINCIPLED_V1) {
+    ccl_private FresnelPrincipledV1 *fresnel = (ccl_private FresnelPrincipledV1 *)bsdf->fresnel;
+    use_fresnel = true;
+    color = fresnel->color;
+    cspec0 = is_transmission ? fresnel->color : fresnel->cspec0;
+  }
+  else {
+    kernel_assert(bsdf->fresnel_type == MicrofacetFresnel::CONSTANT);
+    ccl_private FresnelConstant *fresnel = (ccl_private FresnelConstant *)bsdf->fresnel;
+    use_fresnel = false;
+    color = fresnel->color;
+    cspec0 = zero_spectrum();
+  }

  *pdf = mf_glass_pdf(local_I, local_O, bsdf->alpha_x, bsdf->ior);
  kernel_assert(*pdf >= 0.f);
  return mf_eval_glass(local_I,
                       local_O,
                       !is_transmission,
-                       bsdf->extra->color,
+                       color,
                       bsdf->alpha_x,
                       bsdf->alpha_y,
                       lcg_state,
                       bsdf->ior,
-                       use_fresnel,
-                       (is_transmission) ? bsdf->extra->color : bsdf->extra->cspec0);
+                       !is_transmission && use_fresnel,
+                       cspec0);
 }

 ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals kg,
@@ -640,14 +699,14 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals kg,
  *sampled_roughness = make_float2(bsdf->alpha_x, bsdf->alpha_y);

  if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
-    float3 R, T;
+    float3 T;
    bool inside;
-    float fresnel = fresnel_dielectric(bsdf->ior, Z, wi, &R, &T, &inside);
+    float fresnel = fresnel_dielectric(bsdf->ior, Z, wi, &T, &inside);

    *pdf = 1e6f;
    *eval = make_spectrum(1e6f);
    if (randu < fresnel) {
-      *wo = R;
+      *wo = 2 * dot(Z, wi) * Z - wi;
      return LABEL_REFLECT | LABEL_SINGULAR;
    }
    else {
@@ -656,7 +715,21 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals kg,
    }
  }

-  bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID);
+  Spectrum color, cspec0;
+  bool use_fresnel;
+  if (bsdf->fresnel_type == MicrofacetFresnel::PRINCIPLED_V1) {
+    ccl_private FresnelPrincipledV1 *fresnel = (ccl_private FresnelPrincipledV1 *)bsdf->fresnel;
+    use_fresnel = true;
+    color = fresnel->color;
+    cspec0 = fresnel->cspec0;
+  }
+  else {
+    kernel_assert(bsdf->fresnel_type == MicrofacetFresnel::CONSTANT);
+    ccl_private FresnelConstant *fresnel = (ccl_private FresnelConstant *)bsdf->fresnel;
+    use_fresnel = false;
+    color = fresnel->color;
+    cspec0 = zero_spectrum();
+  }

  make_orthonormals(Z, &X, &Y);

@@ -665,13 +738,13 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals kg,

  *eval = mf_sample_glass(local_I,
                          &local_O,
-                          bsdf->extra->color,
+                          color,
                          bsdf->alpha_x,
                          bsdf->alpha_y,
                          lcg_state,
                          bsdf->ior,
                          use_fresnel,
-                          bsdf->extra->cspec0);
+                          cspec0);
  *pdf = mf_glass_pdf(local_I, local_O, bsdf->alpha_x, bsdf->ior);
  kernel_assert(*pdf >= 0.f);
  *eval *= *pdf;
--- a/intern/cycles/kernel/closure/bsdf_reflection.h
+++ b/intern/cycles/kernel/closure/bsdf_reflection.h
@@ -1,61 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- *
- * Adapted from Open Shading Language
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011-2022 Blender Foundation. */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* REFLECTION */
-
-ccl_device int bsdf_reflection_setup(ccl_private MicrofacetBsdf *bsdf)
-{
-  bsdf->type = CLOSURE_BSDF_REFLECTION_ID;
-  return SD_BSDF;
-}
-
-ccl_device Spectrum bsdf_reflection_eval(ccl_private const ShaderClosure *sc,
-                                         const float3 wi,
-                                         const float3 wo,
-                                         ccl_private float *pdf)
-{
-  *pdf = 0.0f;
-  return zero_spectrum();
-}
-
-ccl_device int bsdf_reflection_sample(ccl_private const ShaderClosure *sc,
-                                      float3 Ng,
-                                      float3 wi,
-                                      float randu,
-                                      float randv,
-                                      ccl_private Spectrum *eval,
-                                      ccl_private float3 *wo,
-                                      ccl_private float *pdf,
-                                      ccl_private float *eta)
-{
-  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
-  float3 N = bsdf->N;
-  *eta = bsdf->ior;
-
-  // only one direction is possible
-  float cosNI = dot(N, wi);
-  if (cosNI > 0) {
-    *wo = (2 * cosNI) * N - wi;
-    if (dot(Ng, *wo) > 0) {
-      /* Some high number for MIS. */
-      *pdf = 1e6f;
-      *eval = make_spectrum(1e6f);
-    }
-  }
-  else {
-    *pdf = 0.0f;
-    *eval = zero_spectrum();
-  }
-  return LABEL_REFLECT | LABEL_SINGULAR;
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/kernel/closure/bsdf_refraction.h
+++ b/intern/cycles/kernel/closure/bsdf_refraction.h
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- *
- * Adapted from Open Shading Language
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011-2022 Blender Foundation. */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* REFRACTION */
-
-ccl_device int bsdf_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
-{
-  bsdf->type = CLOSURE_BSDF_REFRACTION_ID;
-  return SD_BSDF;
-}
-
-ccl_device Spectrum bsdf_refraction_eval(ccl_private const ShaderClosure *sc,
-                                         const float3 wi,
-                                         const float3 wo,
-                                         ccl_private float *pdf)
-{
-  *pdf = 0.0f;
-  return zero_spectrum();
-}
-
-ccl_device int bsdf_refraction_sample(ccl_private const ShaderClosure *sc,
-                                      float3 Ng,
-                                      float3 wi,
-                                      float randu,
-                                      float randv,
-                                      ccl_private Spectrum *eval,
-                                      ccl_private float3 *wo,
-                                      ccl_private float *pdf,
-                                      ccl_private float *eta)
-{
-  ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
-  float m_eta = bsdf->ior;
-
-  *eta = 1.0f / m_eta;
-  float3 N = bsdf->N;
-
-  float3 R, T;
-  bool inside;
-  float fresnel;
-  fresnel = fresnel_dielectric(m_eta, N, wi, &R, &T, &inside);
-
-  if (!inside && fresnel != 1.0f) {
-    /* Some high number for MIS. */
-    *pdf = 1e6f;
-    *eval = make_spectrum(1e6f);
-    *wo = T;
-  }
-  else {
-    *pdf = 0.0f;
-    *eval = zero_spectrum();
-  }
-  return LABEL_TRANSMIT | LABEL_SINGULAR;
-}
-
-CCL_NAMESPACE_END
--- a/intern/cycles/kernel/closure/bsdf_util.h
+++ b/intern/cycles/kernel/closure/bsdf_util.h
@@ -10,12 +10,8 @@

 CCL_NAMESPACE_BEGIN

-ccl_device float fresnel_dielectric(float eta,
-                                    const float3 N,
-                                    const float3 I,
-                                    ccl_private float3 *R,
-                                    ccl_private float3 *T,
-                                    ccl_private bool *is_inside)
+ccl_device float fresnel_dielectric(
+    float eta, const float3 N, const float3 I, ccl_private float3 *T, ccl_private bool *is_inside)
 {
  float cos = dot(N, I), neta;
  float3 Nn;
@@ -35,9 +31,6 @@ ccl_device float fresnel_dielectric(float eta,
    *is_inside = true;
  }

-  // compute reflection
-  *R = (2 * cos) * Nn - I;
-
  float arg = 1 - (neta * neta * (1 - (cos * cos)));
  if (arg < 0) {
    *T = make_float3(0.0f, 0.0f, 0.0f);
@@ -71,17 +64,23 @@ ccl_device float fresnel_dielectric_cos(float cosi, float eta)
  return 1.0f;  // TIR(no refracted component)
 }

-ccl_device float3 fresnel_conductor(float cosi, const float3 eta, const float3 k)
+ccl_device Spectrum fresnel_conductor(float cosi, const Spectrum eta, const Spectrum k)
 {
-  float3 cosi2 = make_float3(cosi * cosi, cosi * cosi, cosi * cosi);
-  float3 one = make_float3(1.0f, 1.0f, 1.0f);
-  float3 tmp_f = eta * eta + k * k;
-  float3 tmp = tmp_f * cosi2;
-  float3 Rparl2 = (tmp - (2.0f * eta * cosi) + one) / (tmp + (2.0f * eta * cosi) + one);
-  float3 Rperp2 = (tmp_f - (2.0f * eta * cosi) + cosi2) / (tmp_f + (2.0f * eta * cosi) + cosi2);
+  Spectrum cosi2 = make_spectrum(cosi * cosi);
+  Spectrum one = make_spectrum(1.0f);
+  Spectrum tmp_f = eta * eta + k * k;
+  Spectrum tmp = tmp_f * cosi2;
+  Spectrum Rparl2 = (tmp - (2.0f * eta * cosi) + one) / (tmp + (2.0f * eta * cosi) + one);
+  Spectrum Rperp2 = (tmp_f - (2.0f * eta * cosi) + cosi2) / (tmp_f + (2.0f * eta * cosi) + cosi2);
  return (Rparl2 + Rperp2) * 0.5f;
 }

+ccl_device float ior_from_F0(Spectrum f0)
+{
+  const float sqrt_f0 = sqrtf(clamp(average(f0), 0.0f, 0.99f));
+  return (1.0f + sqrt_f0) / (1.0f - sqrt_f0);
+}
+
 ccl_device float schlick_fresnel(float u)
 {
  float m = clamp(1.0f - u, 0.0f, 1.0f);
--- a/intern/cycles/kernel/data_arrays.h
+++ b/intern/cycles/kernel/data_arrays.h
@@ -29,7 +29,7 @@ KERNEL_DATA_ARRAY(DecomposedTransform, camera_motion)
 /* triangles */
 KERNEL_DATA_ARRAY(uint, tri_shader)
 KERNEL_DATA_ARRAY(packed_float3, tri_vnormal)
-KERNEL_DATA_ARRAY(uint4, tri_vindex)
+KERNEL_DATA_ARRAY(packed_uint3, tri_vindex)
 KERNEL_DATA_ARRAY(uint, tri_patch)
 KERNEL_DATA_ARRAY(float2, tri_patch_uv)
 KERNEL_DATA_ARRAY(packed_float3, tri_verts)
--- a/intern/cycles/kernel/device/cpu/kernel_avx2.cpp
+++ b/intern/cycles/kernel/device/cpu/kernel_avx2.cpp
@@ -10,7 +10,7 @@
 #ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
 #  define KERNEL_STUB
 #else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
+/* SSE optimization disabled for now on 32 bit, see bug #36316. */
 #  if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
 #    define __KERNEL_SSE__
 #    define __KERNEL_SSE2__
--- a/intern/cycles/kernel/device/cpu/kernel_sse2.cpp
+++ b/intern/cycles/kernel/device/cpu/kernel_sse2.cpp
@@ -10,7 +10,7 @@
 #ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
 #  define KERNEL_STUB
 #else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
+/* SSE optimization disabled for now on 32 bit, see bug #36316. */
 #  if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
 #    define __KERNEL_SSE2__
 #  endif
--- a/intern/cycles/kernel/device/cpu/kernel_sse41.cpp
+++ b/intern/cycles/kernel/device/cpu/kernel_sse41.cpp
@@ -10,7 +10,7 @@
 #ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
 #  define KERNEL_STUB
 #else
-/* SSE optimization disabled for now on 32 bit, see bug T36316. */
+/* SSE optimization disabled for now on 32 bit, see bug #36316. */
 #  if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
 #    define __KERNEL_SSE2__
 #    define __KERNEL_SSE3__
--- a/Show More
+++ b/Show More