Test: Retrieve original mesh information from the editmesh

Threadsafe access to face corner normals
Cleanup to naming and other small tweaks
2022-07-07 17:25:36 -05:00 · 2022-07-07 17:09:47 -05:00 · 2022-07-07 16:32:02 -05:00 · 2022-07-07 13:49:09 -05:00
828 changed files with 11522 additions and 18257 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -565,19 +565,15 @@ endif()
 option(WITH_OPENGL              "When off limits visibility of the opengl headers to just bf_gpu and gawain (temporary option for development purposes)" ON)
 option(WITH_GLEW_ES             "Switches to experimental copy of GLEW that has support for OpenGL ES. (temporary option for development purposes)" OFF)
 option(WITH_GL_PROFILE_ES20     "Support using OpenGL ES 2.0. (through either EGL or the AGL/WGL/XGL 'es20' profile)"                               OFF)
-option(WITH_GPU_BUILDTIME_SHADER_BUILDER  "Shader builder is a developer option enabling linting on GLSL during compilation"                                  OFF)
+option(WITH_GPU_SHADER_BUILDER  "Shader builder is a developer option enabling linting on GLSL during compilation"                                  OFF)

 mark_as_advanced(
  WITH_OPENGL
  WITH_GLEW_ES
  WITH_GL_PROFILE_ES20
-  WITH_GPU_BUILDTIME_SHADER_BUILDER
+  WITH_GPU_SHADER_BUILDER
 )

-if(WITH_HEADLESS)
-  set(WITH_OPENGL OFF)
-endif()
-
 # Metal

 if (APPLE)
--- a/build_files/build_environment/cmake/dpcpp.cmake
+++ b/build_files/build_environment/cmake/dpcpp.cmake
@@ -63,8 +63,6 @@ set(DPCPP_EXTRA_ARGS
  -DPython3_ROOT_DIR=${LIBDIR}/python/
  -DPython3_EXECUTABLE=${PYTHON_BINARY}
  -DPYTHON_EXECUTABLE=${PYTHON_BINARY}
-  -DLLDB_ENABLE_CURSES=OFF
-  -DLLVM_ENABLE_TERMINFO=OFF
 )

 if(WIN32)
--- a/build_files/build_environment/cmake/embree.cmake
+++ b/build_files/build_environment/cmake/embree.cmake
@@ -10,12 +10,18 @@ set(EMBREE_EXTRA_ARGS
  -DEMBREE_RAY_MASK=ON
  -DEMBREE_FILTER_FUNCTION=ON
  -DEMBREE_BACKFACE_CULLING=OFF
+  -DEMBREE_MAX_ISA=AVX2
  -DEMBREE_TASKING_SYSTEM=TBB
  -DEMBREE_TBB_ROOT=${LIBDIR}/tbb
  -DTBB_ROOT=${LIBDIR}/tbb
+  -DTBB_STATIC_LIB=${TBB_STATIC_LIBRARY}
 )

-if (NOT BLENDER_PLATFORM_ARM)
+if(BLENDER_PLATFORM_ARM)
+  set(EMBREE_EXTRA_ARGS
+    ${EMBREE_EXTRA_ARGS}
+    -DEMBREE_MAX_ISA=NEON)
+else()
  set(EMBREE_EXTRA_ARGS
    ${EMBREE_EXTRA_ARGS}
    -DEMBREE_MAX_ISA=AVX2)
@@ -24,10 +30,23 @@ endif()
 if(TBB_STATIC_LIBRARY)
  set(EMBREE_EXTRA_ARGS
    ${EMBREE_EXTRA_ARGS}
-    -DEMBREE_TBB_COMPONENT=tbb_static
+    -DEMBREE_TBB_LIBRARY_NAME=tbb_static
+    -DEMBREE_TBBMALLOC_LIBRARY_NAME=tbbmalloc_static
  )
 endif()

+if(WIN32)
+  set(EMBREE_BUILD_DIR ${BUILD_MODE}/)
+  if(BUILD_MODE STREQUAL Debug)
+    list(APPEND EMBREE_EXTRA_ARGS
+     -DEMBREE_TBBMALLOC_LIBRARY_NAME=tbbmalloc_debug
+     -DEMBREE_TBB_LIBRARY_NAME=tbb_debug
+    )
+  endif()
+else()
+  set(EMBREE_BUILD_DIR)
+endif()
+
 ExternalProject_Add(external_embree
  URL file://${PACKAGE_DIR}/${EMBREE_FILE}
  DOWNLOAD_DIR ${DOWNLOAD_DIR}
--- a/build_files/build_environment/cmake/versions.cmake
+++ b/build_files/build_environment/cmake/versions.cmake
@@ -410,9 +410,9 @@ set(SQLITE_HASH fb558c49ee21a837713c4f1e7e413309aabdd9c7)
 set(SQLITE_HASH_TYPE SHA1)
 set(SQLITE_FILE sqlite-src-3240000.zip)

-set(EMBREE_VERSION 3.13.4)
+set(EMBREE_VERSION 3.13.3)
 set(EMBREE_URI https://github.com/embree/embree/archive/v${EMBREE_VERSION}.zip)
-set(EMBREE_HASH 52d0be294d6c88ba7a6c9e046796e7be)
+set(EMBREE_HASH f62766ba54e48a2f327c3a22596e7133)
 set(EMBREE_HASH_TYPE MD5)
 set(EMBREE_FILE embree-v${EMBREE_VERSION}.zip)

--- a/build_files/build_environment/install_deps.sh
+++ b/build_files/build_environment/install_deps.sh
@@ -567,7 +567,7 @@ OPENCOLLADA_FORCE_BUILD=false
 OPENCOLLADA_FORCE_REBUILD=false
 OPENCOLLADA_SKIP=false

-EMBREE_VERSION="3.13.4"
+EMBREE_VERSION="3.13.3"
 EMBREE_VERSION_SHORT="3.13"
 EMBREE_VERSION_MIN="3.13"
 EMBREE_VERSION_MEX="4.0"
--- a/build_files/build_environment/patches/embree.diff
+++ b/build_files/build_environment/patches/embree.diff
@@ -1,37 +1,30 @@
-diff -Naur org/kernels/rtcore_config.h.in embree-3.13.4/kernels/rtcore_config.h.in
--- org/kernels/rtcore_config.h.in      2022-06-14 22:13:52 -0600
-+++ embree-3.13.4/kernels/rtcore_config.h.in    2022-06-24 15:20:12 -0600
-@@ -14,6 +14,7 @@
- #cmakedefine01 EMBREE_MIN_WIDTH
- #define RTC_MIN_WIDTH EMBREE_MIN_WIDTH
-
-+#cmakedefine EMBREE_STATIC_LIB
- #cmakedefine EMBREE_API_NAMESPACE
-
- #if defined(EMBREE_API_NAMESPACE)
-diff --git a/kernels/CMakeLists.txt b/kernels/CMakeLists.txt
-index 7c2f43d..106b1d5 100644
--- a/kernels/CMakeLists.txt
-+++ b/kernels/CMakeLists.txt
-@@ -201,6 +201,12 @@ embree_files(EMBREE_LIBRARY_FILES_AVX512 ${AVX512})
- #message("AVX2: ${EMBREE_LIBRARY_FILES_AVX2}")
- #message("AVX512: ${EMBREE_LIBRARY_FILES_AVX512}")
+diff -Naur orig/common/sys/platform.h external_embree/common/sys/platform.h
+--- orig/common/sys/platform.h	2020-05-13 23:08:53 -0600
+++ external_embree/common/sys/platform.h	2020-06-13 17:40:26 -0600
+@@ -84,8 +84,8 @@
+ ////////////////////////////////////////////////////////////////////////////////
 
-+# Bundle Neon2x into the main static library.
-+IF(EMBREE_ISA_NEON2X AND EMBREE_STATIC_LIB)
-+  LIST(APPEND EMBREE_LIBRARY_FILES ${EMBREE_LIBRARY_FILES_AVX2})
-+  LIST(REMOVE_DUPLICATES EMBREE_LIBRARY_FILES)
-+ENDIF()
-+
- # replaces all .cpp files with a dummy file that includes that .cpp file
- # this is to work around an ICC name mangling issue related to lambda functions under windows
- MACRO (CreateISADummyFiles list isa)
-@@ -277,7 +283,7 @@ IF (EMBREE_ISA_AVX  AND EMBREE_LIBRARY_FILES_AVX)
-   ENDIF()
- ENDIF()
- 
-IF (EMBREE_ISA_AVX2 AND EMBREE_LIBRARY_FILES_AVX2)
-+IF (EMBREE_ISA_AVX2 AND EMBREE_LIBRARY_FILES_AVX2 AND NOT (EMBREE_ISA_NEON2X AND EMBREE_STATIC_LIB))
-   DISABLE_STACK_PROTECTOR_FOR_INTERSECTORS(${EMBREE_LIBRARY_FILES_AVX2})
-   ADD_LIBRARY(embree_avx2 STATIC ${EMBREE_LIBRARY_FILES_AVX2})
-   TARGET_LINK_LIBRARIES(embree_avx2 PRIVATE tasking)
+ #ifdef __WIN32__
+-#define dll_export __declspec(dllexport)
+-#define dll_import __declspec(dllimport)
+#define dll_export 
+#define dll_import 
+ #else
+ #define dll_export __attribute__ ((visibility ("default")))
+ #define dll_import 
+diff --git orig/common/tasking/CMakeLists.txt external_embree/common/tasking/CMakeLists.txt
+--- orig/common/tasking/CMakeLists.txt
+++ external_embree/common/tasking/CMakeLists.txt
+@@ -27,7 +27,11 @@
+     else()
+       # If not found try getting older TBB via module (FindTBB.cmake)
+       unset(TBB_DIR CACHE)
+-      find_package(TBB 4.1 REQUIRED tbb)
+      if (TBB_STATIC_LIB)
+        find_package(TBB 4.1 REQUIRED tbb_static)
+      else()
+        find_package(TBB 4.1 REQUIRED tbb)
+      endif()
+       if (TBB_FOUND)
+         TARGET_LINK_LIBRARIES(tasking PUBLIC TBB)
+         TARGET_INCLUDE_DIRECTORIES(tasking PUBLIC "${TBB_INCLUDE_DIRS}")
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -71,6 +71,7 @@ if(NOT WIN32)
 endif()
 if(WIN32)
  set(WITH_WASAPI               ON  CACHE BOOL "" FORCE)
+  set(WITH_CYCLES_DEVICE_ONEAPI ON  CACHE BOOL "" FORCE)
 endif()
 if(UNIX AND NOT APPLE)
  set(WITH_DOC_MANPAGE         ON  CACHE BOOL "" FORCE)
@@ -91,7 +92,6 @@ if(NOT APPLE)
  set(WITH_CYCLES_CUDA_BINARIES   ON  CACHE BOOL "" FORCE)
  set(WITH_CYCLES_CUBIN_COMPILER  OFF CACHE BOOL "" FORCE)
  set(WITH_CYCLES_HIP_BINARIES    ON  CACHE BOOL "" FORCE)
-  set(WITH_CYCLES_DEVICE_ONEAPI   ON  CACHE BOOL "" FORCE)

  # Disable AoT kernels compilations until buildbot can deliver them in a reasonabel time.
  set(WITH_CYCLES_ONEAPI_BINARIES OFF CACHE BOOL "" FORCE)
--- a/doc/python_api/sphinx_doc_gen.py
+++ b/doc/python_api/sphinx_doc_gen.py
@@ -1529,8 +1529,7 @@ def pyrna2sphinx(basepath):
        else:
            fw(".. class:: %s\n\n" % struct_id)

-        write_indented_lines("   ", fw, struct.description, False)
-        fw("\n")
+        fw("   %s\n\n" % struct.description)

        # Properties sorted in alphabetical order.
        sorted_struct_properties = struct.properties[:]
--- a/extern/audaspace/bindings/C/AUD_Special.cpp
+++ b/extern/audaspace/bindings/C/AUD_Special.cpp
@@ -270,7 +270,7 @@ AUD_API int AUD_readSound(AUD_Sound* sound, float* buffer, int length, int sampl
 	return length;
 }

-AUD_API int AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data, char* error, size_t errorsize)
+AUD_API const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data)
 {
 	try
 	{
@@ -282,20 +282,15 @@ AUD_API int AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int lengt
 		std::shared_ptr<IWriter> writer = FileWriter::createWriter(filename, convCToDSpec(specs), static_cast<Container>(format), static_cast<Codec>(codec), bitrate);
 		FileWriter::writeReader(reader, writer, length, buffersize, callback, data);

-		return true;
+		return nullptr;
 	}
 	catch(Exception& e)
 	{
-		if(error && errorsize)
-		{
-			std::strncpy(error, e.getMessage().c_str(), errorsize);
-			error[errorsize - 1] = '\0';
-		}
-		return false;
+		return e.getMessage().c_str();
 	}
 }

-AUD_API int AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data, char* error, size_t errorsize)
+AUD_API const char* AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length, unsigned int buffersize, const char* filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate, void(*callback)(float, void*), void* data)
 {
 	try
 	{
@@ -333,16 +328,11 @@ AUD_API int AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsign
 		reader->seek(start);
 		FileWriter::writeReader(reader, writers, length, buffersize, callback, data);

-		return true;
+		return nullptr;
 	}
 	catch(Exception& e)
 	{
-		if(error && errorsize)
-		{
-			std::strncpy(error, e.getMessage().c_str(), errorsize);
-			error[errorsize - 1] = '\0';
-		}
-		return false;
+		return e.getMessage().c_str();
 	}
 }

--- a/extern/audaspace/bindings/C/AUD_Special.h
+++ b/extern/audaspace/bindings/C/AUD_Special.h
@@ -70,15 +70,13 @@ extern AUD_API int AUD_readSound(AUD_Sound* sound, float* buffer, int length, in
 * \param bitrate The bitrate for encoding.
 * \param callback A callback function that is called periodically during mixdown, reporting progress if length > 0. Can be NULL.
 * \param data Pass through parameter that is passed to the callback.
- * \param error String buffer to copy the error message to in case of failure.
- * \param errorsize The size of the error buffer.
- * \return Whether or not the operation succeeded.
+ * \return An error message or NULL in case of success.
 */
-extern AUD_API int AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length,
+extern AUD_API const char* AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned int length,
 							   unsigned int buffersize, const char* filename,
 							   AUD_DeviceSpecs specs, AUD_Container format,
 							   AUD_Codec codec, unsigned int bitrate,
-							   void(*callback)(float, void*), void* data, char* error, size_t errorsize);
+							   void(*callback)(float, void*), void* data);

 /**
 * Mixes a sound down into multiple files.
@@ -93,15 +91,13 @@ extern AUD_API int AUD_mixdown(AUD_Sound* sound, unsigned int start, unsigned in
 * \param bitrate The bitrate for encoding.
 * \param callback A callback function that is called periodically during mixdown, reporting progress if length > 0. Can be NULL.
 * \param data Pass through parameter that is passed to the callback.
- * \param error String buffer to copy the error message to in case of failure.
- * \param errorsize The size of the error buffer.
- * \return Whether or not the operation succeeded.
+ * \return An error message or NULL in case of success.
 */
-extern AUD_API int AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length,
+extern AUD_API const char* AUD_mixdown_per_channel(AUD_Sound* sound, unsigned int start, unsigned int length,
 										   unsigned int buffersize, const char* filename,
 										   AUD_DeviceSpecs specs, AUD_Container format,
 										   AUD_Codec codec, unsigned int bitrate,
-										   void(*callback)(float, void*), void* data, char* error, size_t errorsize);
+										   void(*callback)(float, void*), void* data);

 /**
 * Opens a read device and prepares it for mixdown of the sound scene.
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -128,6 +128,10 @@ if(WITH_OPENIMAGEDENOISE)
  )
 endif()

+if(WITH_EXPERIMENTAL_FEATURES)
+  add_definitions(-DWITH_NEW_CURVES_TYPE)
+endif()
+
 blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")

 add_dependencies(bf_intern_cycles bf_rna)
--- a/intern/cycles/blender/addon/presets.py
+++ b/intern/cycles/blender/addon/presets.py
@@ -84,36 +84,10 @@ class AddPresetViewportSampling(AddPresetBase, Operator):
    preset_subdir = "cycles/viewport_sampling"


-class AddPresetPerformance(AddPresetBase, Operator):
-    '''Add an Performance Preset'''
-    bl_idname = "render.cycles_performance_preset_add"
-    bl_label = "Add Performance Preset"
-    preset_menu = "CYCLES_PT_performance_presets"
-
-    preset_defines = [
-        "render = bpy.context.scene.render"
-        "cycles = bpy.context.scene.cycles"
-    ]
-
-    preset_values = [
-        "render.threads_mode",
-        "render.use_persistent_data",
-        "cycles.debug_use_spatial_splits",
-        "cycles.debug_use_compact_bvh",
-        "cycles.debug_use_hair_bvh",
-        "cycles.debug_bvh_time_steps",
-        "cycles.use_auto_tile",
-        "cycles.tile_size",
-    ]
-
-    preset_subdir = "cycles/performance"
-
-
 classes = (
    AddPresetIntegrator,
    AddPresetSampling,
    AddPresetViewportSampling,
-    AddPresetPerformance,
 )


--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -693,7 +693,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
    debug_use_compact_bvh: BoolProperty(
        name="Use Compact BVH",
        description="Use compact BVH structure (uses less ram but renders slower)",
-        default=False,
+        default=True,
    )
    debug_bvh_time_steps: IntProperty(
        name="BVH Time Steps",
@@ -1560,7 +1560,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
                if sys.platform.startswith("win"):
                    col.label(text="and Windows driver version 101.1660 or newer", icon='BLANK1')
                elif sys.platform.startswith("linux"):
-                    col.label(text="and Linux driver version xx.xx.23570 or newer", icon='BLANK1')
+                    col.label(text="and Linux driver version xx.xx.20066 or newer", icon='BLANK1')
            elif device_type == 'METAL':
                col.label(text="Requires Apple Silicon with macOS 12.2 or newer", icon='BLANK1')
                col.label(text="or AMD with macOS 12.3 or newer", icon='BLANK1')
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -43,12 +43,6 @@ class CYCLES_PT_integrator_presets(CyclesPresetPanel):
    preset_add_operator = "render.cycles_integrator_preset_add"


-class CYCLES_PT_performance_presets(CyclesPresetPanel):
-    bl_label = "Performance Presets"
-    preset_subdir = "cycles/performance"
-    preset_add_operator = "render.cycles_performance_preset_add"
-
-
 class CyclesButtonsPanel:
    bl_space_type = "PROPERTIES"
    bl_region_type = "WINDOW"
@@ -630,9 +624,6 @@ class CYCLES_RENDER_PT_performance(CyclesButtonsPanel, Panel):
    bl_label = "Performance"
    bl_options = {'DEFAULT_CLOSED'}

-    def draw_header_preset(self, context):
-        CYCLES_PT_performance_presets.draw_panel_header(self.layout)
-
    def draw(self, context):
        pass

@@ -952,8 +943,6 @@ class CYCLES_CAMERA_PT_dof(CyclesButtonsPanel, Panel):

        col = split.column()
        col.prop(dof, "focus_object", text="Focus Object")
-        if dof.focus_object and dof.focus_object.type == 'ARMATURE':
-            col.prop_search(dof, "focus_subtarget", dof.focus_object.data, "bones", text="Focus Bone")

        sub = col.row()
        sub.active = dof.focus_object is None
@@ -1213,7 +1202,7 @@ class CYCLES_OBJECT_PT_lightgroup(CyclesButtonsPanel, Panel):
        sub.prop_search(ob, "lightgroup", view_layer, "lightgroups", text="Light Group", results_are_suggestions=True)

        sub = row.column(align=True)
-        sub.enabled = bool(ob.lightgroup) and not any(lg.name == ob.lightgroup for lg in view_layer.lightgroups)
+        sub.active = bool(ob.lightgroup) and not any(lg.name == ob.lightgroup for lg in view_layer.lightgroups)
        sub.operator("scene.view_layer_add_lightgroup", icon='ADD', text="").name = ob.lightgroup


@@ -1651,7 +1640,7 @@ class CYCLES_WORLD_PT_settings_light_group(CyclesButtonsPanel, Panel):
        )

        sub = row.column(align=True)
-        sub.enabled = bool(world.lightgroup) and not any(lg.name == world.lightgroup for lg in view_layer.lightgroups)
+        sub.active = bool(world.lightgroup) and not any(lg.name == world.lightgroup for lg in view_layer.lightgroups)
        sub.operator("scene.view_layer_add_lightgroup", icon='ADD', text="").name = world.lightgroup


@@ -2280,7 +2269,6 @@ classes = (
    CYCLES_PT_sampling_presets,
    CYCLES_PT_viewport_sampling_presets,
    CYCLES_PT_integrator_presets,
-    CYCLES_PT_performance_presets,
    CYCLES_RENDER_PT_sampling,
    CYCLES_RENDER_PT_sampling_viewport,
    CYCLES_RENDER_PT_sampling_viewport_denoise,
--- a/intern/cycles/blender/camera.cpp
+++ b/intern/cycles/blender/camera.cpp
@@ -143,20 +143,11 @@ static float blender_camera_focal_distance(BL::RenderEngine &b_engine,
  if (!b_dof_object)
    return b_camera.dof().focus_distance();

-  Transform dofmat = get_transform(b_dof_object.matrix_world());
-
-  string focus_subtarget = b_camera.dof().focus_subtarget();
-  if (b_dof_object.pose() && !focus_subtarget.empty()) {
-    BL::PoseBone b_bone = b_dof_object.pose().bones[focus_subtarget];
-    if (b_bone) {
-      dofmat = dofmat * get_transform(b_bone.matrix());
-    }
-  }
-
  /* for dof object, return distance along camera Z direction */
  BL::Array<float, 16> b_ob_matrix;
  b_engine.camera_model_matrix(b_ob, bcam->use_spherical_stereo, b_ob_matrix);
  Transform obmat = transform_clear_scale(get_transform(b_ob_matrix));
+  Transform dofmat = get_transform(b_dof_object.matrix_world());
  float3 view_dir = normalize(transform_get_column(&obmat, 2));
  float3 dof_dir = transform_get_column(&obmat, 3) - transform_get_column(&dofmat, 3);
  return fabsf(dot(view_dir, dof_dir));
--- a/intern/cycles/blender/curves.cpp
+++ b/intern/cycles/blender/curves.cpp
@@ -613,6 +613,8 @@ void BlenderSync::sync_particle_hair(
  }
 }

+#ifdef WITH_NEW_CURVES_TYPE
+
 static std::optional<BL::FloatAttribute> find_curves_radius_attribute(BL::Curves b_curves)
 {
  for (BL::Attribute &b_attribute : b_curves.attributes) {
@@ -630,25 +632,6 @@ static std::optional<BL::FloatAttribute> find_curves_radius_attribute(BL::Curves
  return std::nullopt;
 }

-static BL::FloatVectorAttribute find_curves_position_attribute(BL::Curves b_curves)
-{
-  for (BL::Attribute &b_attribute : b_curves.attributes) {
-    if (b_attribute.name() != "position") {
-      continue;
-    }
-    if (b_attribute.domain() != BL::Attribute::domain_POINT) {
-      continue;
-    }
-    if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT_VECTOR) {
-      continue;
-    }
-    return BL::FloatVectorAttribute{b_attribute};
-  }
-  /* The position attribute must exist. */
-  assert(false);
-  return BL::FloatVectorAttribute{b_curves.attributes[0]};
-}
-
 template<typename TypeInCycles, typename GetValueAtIndex>
 static void fill_generic_attribute(BL::Curves &b_curves,
                                   TypeInCycles *data,
@@ -812,16 +795,16 @@ static void attr_create_generic(Scene *scene,
  }
 }

-static float4 hair_point_as_float4(BL::FloatVectorAttribute b_attr_position,
+static float4 hair_point_as_float4(BL::Curves b_curves,
                                   std::optional<BL::FloatAttribute> b_attr_radius,
                                   const int index)
 {
-  float4 mP = float3_to_float4(get_float3(b_attr_position.data[index].vector()));
+  float4 mP = float3_to_float4(get_float3(b_curves.position_data[index].vector()));
  mP.w = b_attr_radius ? b_attr_radius->data[index].value() : 0.0f;
  return mP;
 }

-static float4 interpolate_hair_points(BL::FloatVectorAttribute b_attr_position,
+static float4 interpolate_hair_points(BL::Curves b_curves,
                                      std::optional<BL::FloatAttribute> b_attr_radius,
                                      const int first_point_index,
                                      const int num_points,
@@ -831,8 +814,8 @@ static float4 interpolate_hair_points(BL::FloatVectorAttribute b_attr_position,
  const int point_a = clamp((int)curve_t, 0, num_points - 1);
  const int point_b = min(point_a + 1, num_points - 1);
  const float t = curve_t - (float)point_a;
-  return lerp(hair_point_as_float4(b_attr_position, b_attr_radius, first_point_index + point_a),
-              hair_point_as_float4(b_attr_position, b_attr_radius, first_point_index + point_b),
+  return lerp(hair_point_as_float4(b_curves, b_attr_radius, first_point_index + point_a),
+              hair_point_as_float4(b_curves, b_attr_radius, first_point_index + point_b),
              t);
 }

@@ -865,7 +848,6 @@ static void export_hair_curves(Scene *scene,

  hair->reserve_curves(num_curves, num_keys);

-  BL::FloatVectorAttribute b_attr_position = find_curves_position_attribute(b_curves);
  std::optional<BL::FloatAttribute> b_attr_radius = find_curves_radius_attribute(b_curves);

  /* Export curves and points. */
@@ -884,9 +866,9 @@ static void export_hair_curves(Scene *scene,

    /* Position and radius. */
    for (int i = 0; i < num_points; i++) {
-      const float3 co = get_float3(b_attr_position.data[first_point_index + i].vector());
+      const float3 co = get_float3(b_curves.position_data[first_point_index + i].vector());
      const float radius = b_attr_radius ? b_attr_radius->data[first_point_index + i].value() :
-                                           0.005f;
+                                           0.0f;
      hair->add_curve_key(co, radius);

      if (attr_intercept) {
@@ -941,7 +923,6 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
  int num_motion_keys = 0;
  int curve_index = 0;

-  BL::FloatVectorAttribute b_attr_position = find_curves_position_attribute(b_curves);
  std::optional<BL::FloatAttribute> b_attr_radius = find_curves_radius_attribute(b_curves);

  for (int i = 0; i < num_curves; i++) {
@@ -957,7 +938,7 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
        int point_index = first_point_index + i;

        if (point_index < num_keys) {
-          mP[num_motion_keys] = hair_point_as_float4(b_attr_position, b_attr_radius, point_index);
+          mP[num_motion_keys] = hair_point_as_float4(b_curves, b_attr_radius, point_index);
          num_motion_keys++;

          if (!have_motion) {
@@ -977,7 +958,7 @@ static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motio
      for (int i = 0; i < curve.num_keys; i++) {
        const float step = i * step_size;
        mP[num_motion_keys] = interpolate_hair_points(
-            b_attr_position, b_attr_radius, first_point_index, num_points, step);
+            b_curves, b_attr_radius, first_point_index, num_points, step);
        num_motion_keys++;
      }
      have_motion = true;
@@ -1009,6 +990,15 @@ void BlenderSync::sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int
    export_hair_curves(scene, hair, b_curves, need_motion, motion_scale);
  }
 }
+#else
+void BlenderSync::sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int motion_step)
+{
+  (void)hair;
+  (void)b_ob_info;
+  (void)motion;
+  (void)motion_step;
+}
+#endif

 void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, Hair *hair)
 {
@@ -1020,11 +1010,14 @@ void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, H
  new_hair.set_used_shaders(used_shaders);

  if (view_layer.use_hair) {
+#ifdef WITH_NEW_CURVES_TYPE
    if (b_ob_info.object_data.is_a(&RNA_Curves)) {
      /* Hair object. */
      sync_hair(&new_hair, b_ob_info, false);
    }
-    else {
+    else
+#endif
+    {
      /* Particle hair. */
      bool need_undeformed = new_hair.need_attribute(scene, ATTR_STD_GENERATED);
      BL::Mesh b_mesh = object_to_mesh(
@@ -1071,12 +1064,15 @@ void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph,

  /* Export deformed coordinates. */
  if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
+#ifdef WITH_NEW_CURVES_TYPE
    if (b_ob_info.object_data.is_a(&RNA_Curves)) {
      /* Hair object. */
      sync_hair(hair, b_ob_info, true, motion_step);
      return;
    }
-    else {
+    else
+#endif
+    {
      /* Particle hair. */
      BL::Mesh b_mesh = object_to_mesh(
          b_data, b_ob_info, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
--- a/intern/cycles/blender/geometry.cpp
+++ b/intern/cycles/blender/geometry.cpp
@@ -18,7 +18,11 @@ CCL_NAMESPACE_BEGIN

 static Geometry::Type determine_geom_type(BObjectInfo &b_ob_info, bool use_particle_hair)
 {
+#ifdef WITH_NEW_CURVES_TYPE
  if (b_ob_info.object_data.is_a(&RNA_Curves) || use_particle_hair) {
+#else
+  if (use_particle_hair) {
+#endif
    return Geometry::HAIR;
  }

@@ -213,7 +217,11 @@ void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
    if (progress.get_cancel())
      return;

+#ifdef WITH_NEW_CURVES_TYPE
    if (b_ob_info.object_data.is_a(&RNA_Curves) || use_particle_hair) {
+#else
+    if (use_particle_hair) {
+#endif
      Hair *hair = static_cast<Hair *>(geom);
      sync_hair_motion(b_depsgraph, b_ob_info, hair, motion_step);
    }
--- a/intern/cycles/blender/pointcloud.cpp
+++ b/intern/cycles/blender/pointcloud.cpp
@@ -1,10 +1,8 @@
 /* SPDX-License-Identifier: Apache-2.0
 * Copyright 2011-2022 Blender Foundation */

-#include <optional>
-
-#include "scene/attribute.h"
 #include "scene/pointcloud.h"
+#include "scene/attribute.h"
 #include "scene/scene.h"

 #include "blender/sync.h"
@@ -140,36 +138,6 @@ static void copy_attributes(PointCloud *pointcloud,
  }
 }

-static std::optional<BL::FloatAttribute> find_radius_attribute(BL::PointCloud b_pointcloud)
-{
-  for (BL::Attribute &b_attribute : b_pointcloud.attributes) {
-    if (b_attribute.name() != "radius") {
-      continue;
-    }
-    if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT) {
-      continue;
-    }
-    return BL::FloatAttribute{b_attribute};
-  }
-  return std::nullopt;
-}
-
-static BL::FloatVectorAttribute find_position_attribute(BL::PointCloud b_pointcloud)
-{
-  for (BL::Attribute &b_attribute : b_pointcloud.attributes) {
-    if (b_attribute.name() != "position") {
-      continue;
-    }
-    if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT_VECTOR) {
-      continue;
-    }
-    return BL::FloatVectorAttribute{b_attribute};
-  }
-  /* The position attribute must exist. */
-  assert(false);
-  return BL::FloatVectorAttribute{b_pointcloud.attributes[0]};
-}
-
 static void export_pointcloud(Scene *scene,
                              PointCloud *pointcloud,
                              BL::PointCloud b_pointcloud,
@@ -188,18 +156,18 @@ static void export_pointcloud(Scene *scene,
  const int num_points = b_pointcloud.points.length();
  pointcloud->reserve(num_points);

-  BL::FloatVectorAttribute b_attr_position = find_position_attribute(b_pointcloud);
-  std::optional<BL::FloatAttribute> b_attr_radius = find_radius_attribute(b_pointcloud);
-
  /* Export points. */
-  for (int i = 0; i < num_points; i++) {
-    const float3 co = get_float3(b_attr_position.data[i].vector());
-    const float radius = b_attr_radius ? b_attr_radius->data[i].value() : 0.0f;
+  BL::PointCloud::points_iterator b_point_iter;
+  for (b_pointcloud.points.begin(b_point_iter); b_point_iter != b_pointcloud.points.end();
+       ++b_point_iter) {
+    BL::Point b_point = *b_point_iter;
+    const float3 co = get_float3(b_point.co());
+    const float radius = b_point.radius();
    pointcloud->add_point(co, radius);

    /* Random number per point. */
    if (attr_random != NULL) {
-      attr_random->add(hash_uint2_to_float(i, 0));
+      attr_random->add(hash_uint2_to_float(b_point.index(), 0));
    }
  }

@@ -227,15 +195,14 @@ static void export_pointcloud_motion(PointCloud *pointcloud,
  int num_motion_points = 0;
  const array<float3> &pointcloud_points = pointcloud->get_points();

-  BL::FloatVectorAttribute b_attr_position = find_position_attribute(b_pointcloud);
-  std::optional<BL::FloatAttribute> b_attr_radius = find_radius_attribute(b_pointcloud);
+  BL::PointCloud::points_iterator b_point_iter;
+  for (b_pointcloud.points.begin(b_point_iter); b_point_iter != b_pointcloud.points.end();
+       ++b_point_iter) {
+    BL::Point b_point = *b_point_iter;

-  for (int i = 0; i < num_points; i++) {
    if (num_motion_points < num_points) {
-      const float3 co = get_float3(b_attr_position.data[i].vector());
-      const float radius = b_attr_radius ? b_attr_radius->data[i].value() : 0.0f;
-      float3 P = co;
-      P.w = radius;
+      float3 P = get_float3(b_point.co());
+      P.w = b_point.radius();
      mP[num_motion_points] = P;
      have_motion = have_motion || (P != pointcloud_points[num_motion_points]);
      num_motion_points++;
--- a/intern/cycles/bvh/params.h
+++ b/intern/cycles/bvh/params.h
@@ -129,7 +129,7 @@ class BVHParams {

    top_level = false;
    bvh_layout = BVH_LAYOUT_BVH2;
-    use_compact_structure = false;
+    use_compact_structure = true;
    use_unaligned_nodes = false;

    num_motion_curve_steps = 0;
--- a/intern/cycles/device/cpu/device_impl.cpp
+++ b/intern/cycles/device/cpu/device_impl.cpp
@@ -197,7 +197,7 @@ void CPUDevice::const_copy_to(const char *name, void *host, size_t size)

    // Update scene handle (since it is different for each device on multi devices)
    KernelData *const data = (KernelData *)host;
-    data->device_bvh = embree_scene;
+    data->bvh.scene = embree_scene;
  }
 #endif
  kernel_const_copy(&kernel_globals, name, host, size);
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -29,7 +29,6 @@ class DeviceQueue;
 class Progress;
 class CPUKernels;
 class CPUKernelThreadGlobals;
-class Scene;

 /* Device Types */

@@ -187,11 +186,6 @@ class Device {
    return 0;
  }

-  /* Called after kernel texture setup, and prior to integrator state setup. */
-  virtual void optimize_for_scene(Scene * /*scene*/)
-  {
-  }
-
  virtual bool is_resident(device_ptr /*key*/, Device *sub_device)
  {
    /* Memory is always resident if this is not a multi device, regardless of whether the pointer
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -75,8 +75,7 @@ class MetalDevice : public Device {
  std::vector<id<MTLTexture>> texture_slot_map;

  bool use_metalrt = false;
-  MetalPipelineType kernel_specialization_level = PSO_GENERIC;
-  std::atomic_bool async_compile_and_load = false;
+  bool use_function_specialisation = false;

  virtual BVHLayoutMask get_bvh_layout_mask() const override;

@@ -92,7 +91,9 @@ class MetalDevice : public Device {

  bool use_adaptive_compilation();

-  void make_source(MetalPipelineType pso_type, const uint kernel_features);
+  string get_source(const uint kernel_features);
+
+  string compile_kernel(const uint kernel_features, const char *name);

  virtual bool load_kernels(const uint kernel_features) override;

@@ -110,9 +111,7 @@ class MetalDevice : public Device {

  virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;

-  virtual void optimize_for_scene(Scene *scene) override;
-
-  bool compile_and_load(MetalPipelineType pso_type);
+  id<MTLLibrary> compile(string const &source);

  /* ------------------------------------------------------------------ */
  /* low-level memory management */
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -6,8 +6,6 @@
 #  include "device/metal/device_impl.h"
 #  include "device/metal/device.h"

-#  include "scene/scene.h"
-
 #  include "util/debug.h"
 #  include "util/md5.h"
 #  include "util/path.h"
@@ -80,10 +78,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
    case METAL_GPU_APPLE: {
      max_threads_per_threadgroup = 512;
      use_metalrt = info.use_metalrt;
-
-      /* Specialize the intersection kernels on Apple GPUs by default as these can be built very
-       * quickly. */
-      kernel_specialization_level = PSO_SPECIALIZED_INTERSECT;
      break;
    }
  }
@@ -96,13 +90,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
    capture_enabled = true;
  }

-  if (auto envstr = getenv("CYCLES_METAL_SPECIALIZATION_LEVEL")) {
-    kernel_specialization_level = (MetalPipelineType)atoi(envstr);
-  }
-  metal_printf("kernel_specialization_level = %s\n",
-               kernel_type_as_string(
-                   (MetalPipelineType)min((int)kernel_specialization_level, (int)PSO_NUM - 1)));
-
  MTLArgumentDescriptor *arg_desc_params = [[MTLArgumentDescriptor alloc] init];
  arg_desc_params.dataType = MTLDataTypePointer;
  arg_desc_params.access = MTLArgumentAccessReadOnly;
@@ -222,86 +209,61 @@ bool MetalDevice::use_adaptive_compilation()
  return DebugFlags().metal.adaptive_compile;
 }

-void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_features)
+string MetalDevice::get_source(const uint kernel_features)
 {
-  string global_defines;
+  string build_options;
+
  if (use_adaptive_compilation()) {
-    global_defines += "#define __KERNEL_FEATURES__ " + to_string(kernel_features) + "\n";
+    build_options += " -D__KERNEL_FEATURES__=" + to_string(kernel_features);
  }

  if (use_metalrt) {
-    global_defines += "#define __METALRT__\n";
+    build_options += "-D__METALRT__ ";
    if (motion_blur) {
-      global_defines += "#define __METALRT_MOTION__\n";
+      build_options += "-D__METALRT_MOTION__ ";
    }
  }

 #  ifdef WITH_CYCLES_DEBUG
-  global_defines += "#define __KERNEL_DEBUG__\n";
+  build_options += "-D__KERNEL_DEBUG__ ";
 #  endif

  switch (device_vendor) {
    default:
      break;
    case METAL_GPU_INTEL:
-      global_defines += "#define __KERNEL_METAL_INTEL__\n";
+      build_options += "-D__KERNEL_METAL_INTEL__ ";
      break;
    case METAL_GPU_AMD:
-      global_defines += "#define __KERNEL_METAL_AMD__\n";
+      build_options += "-D__KERNEL_METAL_AMD__ ";
      break;
    case METAL_GPU_APPLE:
-      global_defines += "#define __KERNEL_METAL_APPLE__\n";
+      build_options += "-D__KERNEL_METAL_APPLE__ ";
      break;
  }

-  string &source = this->source[pso_type];
-  source = "\n#include \"kernel/device/metal/kernel.metal\"\n";
-  source = path_source_replace_includes(source, path_get("source"));
+  /* reformat -D defines list into compilable form */
+  vector<string> components;
+  string_replace(build_options, "-D", "");
+  string_split(components, build_options, " ");

-  /* Perform any required specialization on the source.
-   * With Metal function constants we can generate a single variant of the kernel source which can
-   * be repeatedly respecialized.
-   */
-  string baked_constants;
-
-  /* Replace specific KernelData "dot" dereferences with a Metal function_constant identifier of
-   * the same character length. Build a string of all active constant values which is then hashed
-   * in order to identify the PSO.
-   */
-  if (pso_type != PSO_GENERIC) {
-    const double starttime = time_dt();
-
-#  define KERNEL_STRUCT_BEGIN(name, parent) \
-    string_replace_same_length(source, "kernel_data." #parent ".", "kernel_data_" #parent "_");
-
-    /* Add constants to md5 so that 'get_best_pipeline' is able to return a suitable match. */
-#  define KERNEL_STRUCT_MEMBER(parent, _type, name) \
-    baked_constants += string(#parent "." #name "=") + \
-                       to_string(_type(launch_params.data.parent.name)) + "\n";
-
-#  include "kernel/data_template.h"
-
-    /* Opt in to all of available specializations. This can be made more granular for the
-     * PSO_SPECIALIZED_INTERSECT case in order to minimize the number of specialization requests,
-     * but the overhead should be negligible as these are very quick to (re)build and aren't
-     * serialized to disk via MTLBinaryArchives.
-     */
-    global_defines += "#define __KERNEL_USE_DATA_CONSTANTS__\n";
-
-    metal_printf("KernelData patching took %.1f ms\n", (time_dt() - starttime) * 1000.0);
+  string globalDefines;
+  for (const string &component : components) {
+    vector<string> assignments;
+    string_split(assignments, component, "=");
+    if (assignments.size() == 2)
+      globalDefines += string_printf(
+          "#define %s %s\n", assignments[0].c_str(), assignments[1].c_str());
+    else
+      globalDefines += string_printf("#define %s\n", assignments[0].c_str());
  }

-  source = global_defines + source;
-  metal_printf("================\n%s================\n\%s================\n",
-               global_defines.c_str(),
-               baked_constants.c_str());
+  string source = globalDefines + "\n#include \"kernel/device/metal/kernel.metal\"\n";
+  source = path_source_replace_includes(source, path_get("source"));

-  /* Generate an MD5 from the source and include any baked constants. This is used when caching
-   * PSOs. */
-  MD5Hash md5;
-  md5.append(baked_constants);
-  md5.append(source);
-  source_md5[pso_type] = md5.get_hex();
+  metal_printf("Global defines:\n%s\n", globalDefines.c_str());
+
+  return source;
 }

 bool MetalDevice::load_kernels(const uint _kernel_features)
@@ -317,22 +279,28 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
   * active, but may still need to be rendered without motion blur if that isn't active as well. */
  motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION;

-  bool result = compile_and_load(PSO_GENERIC);
+  source[PSO_GENERIC] = get_source(kernel_features);
+
+  const double starttime = time_dt();
+
+  mtlLibrary[PSO_GENERIC] = compile(source[PSO_GENERIC]);
+
+  metal_printf("Front-end compilation finished in %.1f seconds (generic)\n",
+               time_dt() - starttime);
+
+  MD5Hash md5;
+  md5.append(source[PSO_GENERIC]);
+  source_md5[PSO_GENERIC] = md5.get_hex();
+
+  bool result = MetalDeviceKernels::load(this, false);

  reserve_local_memory(kernel_features);
+
  return result;
 }

-bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
+id<MTLLibrary> MetalDevice::compile(string const &source)
 {
-  make_source(pso_type, kernel_features);
-
-  if (!MetalDeviceKernels::should_load_kernels(this, pso_type)) {
-    /* We already have a full set of matching pipelines which are cached or queued. */
-    metal_printf("%s kernels already requested\n", kernel_type_as_string(pso_type));
-    return true;
-  }
-
  MTLCompileOptions *options = [[MTLCompileOptions alloc] init];

  options.fastMathEnabled = YES;
@@ -340,30 +308,19 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
    options.languageVersion = MTLLanguageVersion2_4;
  }

-  if (getenv("CYCLES_METAL_PROFILING") || getenv("CYCLES_METAL_DEBUG")) {
-    path_write_text(path_cache_get(string_printf("%s.metal", kernel_type_as_string(pso_type))),
-                    source[pso_type]);
-  }
-
-  const double starttime = time_dt();
-
  NSError *error = NULL;
-  mtlLibrary[pso_type] = [mtlDevice newLibraryWithSource:@(source[pso_type].c_str())
-                                                 options:options
-                                                   error:&error];
+  id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str())
+                                                      options:options
+                                                        error:&error];

-  if (!mtlLibrary[pso_type]) {
+  if (!mtlLibrary) {
    NSString *err = [error localizedDescription];
    set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
  }

-  metal_printf("Front-end compilation finished in %.1f seconds (%s)\n",
-               time_dt() - starttime,
-               kernel_type_as_string(pso_type));
-
  [options release];

-  return MetalDeviceKernels::load(this, pso_type);
+  return mtlLibrary;
 }

 void MetalDevice::reserve_local_memory(const uint kernel_features)
@@ -670,58 +627,6 @@ device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, siz
  return 0;
 }

-void MetalDevice::optimize_for_scene(Scene *scene)
-{
-  MetalPipelineType specialization_level = kernel_specialization_level;
-
-  if (specialization_level < PSO_SPECIALIZED_INTERSECT) {
-    return;
-  }
-
-  /* PSO_SPECIALIZED_INTERSECT kernels are fast to specialize, so we always load them
-   * synchronously. */
-  compile_and_load(PSO_SPECIALIZED_INTERSECT);
-
-  if (specialization_level < PSO_SPECIALIZED_SHADE) {
-    return;
-  }
-  if (!scene->params.background) {
-    /* Don't load PSO_SPECIALIZED_SHADE kernels during viewport rendering as they are slower to
-     * build. */
-    return;
-  }
-
-  /* PSO_SPECIALIZED_SHADE kernels are slower to specialize, so we load them asynchronously, and
-   * only if there isn't an existing load in flight.
-   */
-  auto specialize_shade_fn = ^() {
-    compile_and_load(PSO_SPECIALIZED_SHADE);
-    async_compile_and_load = false;
-  };
-
-  bool async_specialize_shade = true;
-
-  /* Block if a per-kernel profiling is enabled (ensure steady rendering rate). */
-  if (getenv("CYCLES_METAL_PROFILING") != nullptr) {
-    async_specialize_shade = false;
-  }
-
-  if (async_specialize_shade) {
-    if (!async_compile_and_load) {
-      async_compile_and_load = true;
-      dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
-                     specialize_shade_fn);
-    }
-    else {
-      metal_printf(
-          "Async PSO_SPECIALIZED_SHADE load request already in progress - dropping request\n");
-    }
-  }
-  else {
-    specialize_shade_fn();
-  }
-}
-
 void MetalDevice::const_copy_to(const char *name, void *host, size_t size)
 {
  if (strcmp(name, "data") == 0) {
@@ -747,7 +652,7 @@ void MetalDevice::const_copy_to(const char *name, void *host, size_t size)
  /* Update data storage pointers in launch parameters. */
  if (strcmp(name, "integrator_state") == 0) {
    /* IntegratorStateGPU is contiguous pointers */
-    const size_t pointer_block_size = offsetof(IntegratorStateGPU, sort_partition_divisor);
+    const size_t pointer_block_size = sizeof(IntegratorStateGPU);
    update_launch_pointers(
        offsetof(KernelParamsMetal, integrator_state), host, size, pointer_block_size);
  }
--- a/intern/cycles/device/metal/kernel.h
+++ b/intern/cycles/device/metal/kernel.h
@@ -31,7 +31,7 @@ enum {
 enum { METALRT_TABLE_DEFAULT, METALRT_TABLE_SHADOW, METALRT_TABLE_LOCAL, METALRT_TABLE_NUM };

 /* Pipeline State Object types */
-enum MetalPipelineType {
+enum {
  /* A kernel that can be used with all scenes, supporting all features.
   * It is slow to compile, but only needs to be compiled once and is then
   * cached for future render sessions. This allows a render to get underway
@@ -39,33 +39,28 @@ enum MetalPipelineType {
   */
  PSO_GENERIC,

-  /* A intersection kernel that is very quick to specialize and results in faster intersection
-   * kernel performance. It uses Metal function constants to replace several KernelData variables
-   * with fixed constants.
+  /* A kernel that is relatively quick to compile, but is specialized for the
+   * scene being rendered. It only contains the functionality and even baked in
+   * constants for values that means it needs to be recompiled whenever a
+   * dependent setting is changed. The render performance of this kernel is
+   * significantly faster though, and justifies the extra compile time.
   */
-  PSO_SPECIALIZED_INTERSECT,
-
-  /* A shading kernel that is slow to specialize, but results in faster shading kernel performance
-   * rendered. It uses Metal function constants to replace several KernelData variables with fixed
-   * constants and short-circuit all unused SVM node case handlers.
-   */
-  PSO_SPECIALIZED_SHADE,
+  /* METAL_WIP: This isn't used and will require more changes to enable. */
+  PSO_SPECIALISED,

  PSO_NUM
 };

-const char *kernel_type_as_string(MetalPipelineType pso_type);
+const char *kernel_type_as_string(int kernel_type);

 struct MetalKernelPipeline {

  void compile();

  id<MTLLibrary> mtlLibrary = nil;
-  MetalPipelineType pso_type;
+  bool scene_specialized;
  string source_md5;
-  size_t usage_count = 0;

-  KernelData kernel_data_;
  bool use_metalrt;
  bool metalrt_hair;
  bool metalrt_hair_thick;
@@ -80,8 +75,6 @@ struct MetalKernelPipeline {
  id<MTLComputePipelineState> pipeline = nil;
  int num_threads_per_block = 0;

-  bool should_use_binary_archive() const;
-
  string error_str;

  API_AVAILABLE(macos(11.0))
@@ -92,8 +85,7 @@ struct MetalKernelPipeline {
 /* Cache of Metal kernels for each DeviceKernel. */
 namespace MetalDeviceKernels {

-bool should_load_kernels(MetalDevice *device, MetalPipelineType pso_type);
-bool load(MetalDevice *device, MetalPipelineType pso_type);
+bool load(MetalDevice *device, bool scene_specialized);
 const MetalKernelPipeline *get_best_pipeline(const MetalDevice *device, DeviceKernel kernel);

 } /* namespace MetalDeviceKernels */
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -5,7 +5,6 @@

 #  include "device/metal/kernel.h"
 #  include "device/metal/device_impl.h"
-#  include "kernel/device/metal/function_constants.h"
 #  include "util/md5.h"
 #  include "util/path.h"
 #  include "util/tbb.h"
@@ -17,15 +16,13 @@ CCL_NAMESPACE_BEGIN
 /* limit to 2 MTLCompiler instances */
 int max_mtlcompiler_threads = 2;

-const char *kernel_type_as_string(MetalPipelineType pso_type)
+const char *kernel_type_as_string(int kernel_type)
 {
-  switch (pso_type) {
+  switch (kernel_type) {
    case PSO_GENERIC:
      return "PSO_GENERIC";
-    case PSO_SPECIALIZED_INTERSECT:
-      return "PSO_SPECIALIZED_INTERSECT";
-    case PSO_SPECIALIZED_SHADE:
-      return "PSO_SPECIALIZED_SHADE";
+    case PSO_SPECIALISED:
+      return "PSO_SPECIALISED";
    default:
      assert(0);
  }
@@ -53,11 +50,7 @@ struct ShaderCache {

  /* Non-blocking request for a kernel, optionally specialized to the scene being rendered by
   * device. */
-  void load_kernel(DeviceKernel kernel, MetalDevice *device, MetalPipelineType pso_type);
-
-  bool should_load_kernel(DeviceKernel device_kernel,
-                          MetalDevice *device,
-                          MetalPipelineType pso_type);
+  void load_kernel(DeviceKernel kernel, MetalDevice *device, bool scene_specialized);

  void wait_for_all();

@@ -146,53 +139,9 @@ void ShaderCache::compile_thread_func(int thread_index)
  }
 }

-bool ShaderCache::should_load_kernel(DeviceKernel device_kernel,
-                                     MetalDevice *device,
-                                     MetalPipelineType pso_type)
-{
-  if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
-    /* Skip megakernel. */
-    return false;
-  }
-
-  if (device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
-    if ((device->kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0) {
-      /* Skip shade_surface_raytrace kernel if the scene doesn't require it. */
-      return false;
-    }
-  }
-
-  if (pso_type != PSO_GENERIC) {
-    /* Only specialize kernels where it can make an impact. */
-    if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
-        device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
-      return false;
-    }
-
-    /* Only specialize shading / intersection kernels as requested. */
-    bool is_shade_kernel = (device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
-    bool is_shade_pso = (pso_type == PSO_SPECIALIZED_SHADE);
-    if (is_shade_pso != is_shade_kernel) {
-      return false;
-    }
-  }
-
-  {
-    /* check whether the kernel has already been requested / cached */
-    thread_scoped_lock lock(cache_mutex);
-    for (auto &pipeline : pipelines[device_kernel]) {
-      if (pipeline->source_md5 == device->source_md5[pso_type]) {
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
 void ShaderCache::load_kernel(DeviceKernel device_kernel,
                              MetalDevice *device,
-                              MetalPipelineType pso_type)
+                              bool scene_specialized)
 {
  {
    /* create compiler threads on first run */
@@ -205,21 +154,52 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
    }
  }

-  if (!should_load_kernel(device_kernel, device, pso_type)) {
+  if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
+    /* skip megakernel */
    return;
  }

+  if (scene_specialized) {
+    /* Only specialize kernels where it can make an impact. */
+    if (device_kernel < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
+        device_kernel > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
+      return;
+    }
+  }
+
+  {
+    /* check whether the kernel has already been requested / cached */
+    thread_scoped_lock lock(cache_mutex);
+    for (auto &pipeline : pipelines[device_kernel]) {
+      if (scene_specialized) {
+        if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
+          /* we already requested a pipeline that is specialized for this kernel data */
+          metal_printf("Specialized kernel already requested (%s)\n",
+                       device_kernel_as_string(device_kernel));
+          return;
+        }
+      }
+      else {
+        if (pipeline->source_md5 == device->source_md5[PSO_GENERIC]) {
+          /* we already requested a generic pipeline for this kernel */
+          metal_printf("Generic kernel already requested (%s)\n",
+                       device_kernel_as_string(device_kernel));
+          return;
+        }
+      }
+    }
+  }
+
  incomplete_requests++;

  PipelineRequest request;
  request.pipeline = new MetalKernelPipeline;
-  memcpy(&request.pipeline->kernel_data_,
-         &device->launch_params.data,
-         sizeof(request.pipeline->kernel_data_));
-  request.pipeline->pso_type = pso_type;
+  request.pipeline->scene_specialized = scene_specialized;
  request.pipeline->mtlDevice = mtlDevice;
-  request.pipeline->source_md5 = device->source_md5[pso_type];
-  request.pipeline->mtlLibrary = device->mtlLibrary[pso_type];
+  request.pipeline->source_md5 =
+      device->source_md5[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
+  request.pipeline->mtlLibrary =
+      device->mtlLibrary[scene_specialized ? PSO_SPECIALISED : PSO_GENERIC];
  request.pipeline->device_kernel = device_kernel;
  request.pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;

@@ -234,24 +214,7 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,

  {
    thread_scoped_lock lock(cache_mutex);
-    auto &collection = pipelines[device_kernel];
-
-    /* Cache up to 3 kernel variants with the same pso_type, purging oldest first. */
-    int max_entries_of_same_pso_type = 3;
-    for (int i = (int)collection.size() - 1; i >= 0; i--) {
-      if (collection[i]->pso_type == pso_type) {
-        max_entries_of_same_pso_type -= 1;
-        if (max_entries_of_same_pso_type == 0) {
-          metal_printf("Purging oldest %s:%s kernel from ShaderCache\n",
-                       kernel_type_as_string(pso_type),
-                       device_kernel_as_string(device_kernel));
-          collection.erase(collection.begin() + i);
-          break;
-        }
-      }
-    }
-
-    collection.push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
+    pipelines[device_kernel].push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
    request_queue.push_back(request);
  }
  cond_var.notify_one();
@@ -285,9 +248,8 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
      continue;
    }

-    if (pipeline->pso_type != PSO_GENERIC) {
-      if (pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_INTERSECT] ||
-          pipeline->source_md5 == device->source_md5[PSO_SPECIALIZED_SHADE]) {
+    if (pipeline->scene_specialized) {
+      if (pipeline->source_md5 == device->source_md5[PSO_SPECIALISED]) {
        best_pipeline = pipeline.get();
      }
    }
@@ -296,65 +258,13 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
    }
  }

-  if (best_pipeline->usage_count == 0 && best_pipeline->pso_type != PSO_GENERIC) {
-    metal_printf("Swapping in %s version of %s\n",
-                 kernel_type_as_string(best_pipeline->pso_type),
-                 device_kernel_as_string(kernel));
-  }
-  best_pipeline->usage_count += 1;
-
  return best_pipeline;
 }

-bool MetalKernelPipeline::should_use_binary_archive() const
-{
-  if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
-    if (atoi(str) != 0) {
-      /* Don't archive if we have opted out by env var. */
-      return false;
-    }
-  }
-
-  if (pso_type == PSO_GENERIC) {
-    /* Archive the generic kernels. */
-    return true;
-  }
-
-  if (device_kernel >= DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND &&
-      device_kernel <= DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW) {
-    /* Archive all shade kernels - they take a long time to compile. */
-    return true;
-  }
-
-  /* The remaining kernels are all fast to compile. They may get cached by the system shader cache,
-   * but will be quick to regenerate if not. */
-  return false;
-}
-
-static MTLFunctionConstantValues *GetConstantValues(KernelData const *data = nullptr)
-{
-  MTLFunctionConstantValues *constant_values = [MTLFunctionConstantValues new];
-
-  MTLDataType MTLDataType_int = MTLDataTypeInt;
-  MTLDataType MTLDataType_float = MTLDataTypeFloat;
-  MTLDataType MTLDataType_float4 = MTLDataTypeFloat4;
-  KernelData zero_data = {0};
-  if (!data) {
-    data = &zero_data;
-  }
-
-#  define KERNEL_STRUCT_MEMBER(parent, _type, name) \
-    [constant_values setConstantValue:&data->parent.name \
-                                 type:MTLDataType_##_type \
-                              atIndex:KernelData_##parent##_##name];
-
-#  include "kernel/data_template.h"
-
-  return constant_values;
-}
-
 void MetalKernelPipeline::compile()
 {
+  int pso_type = scene_specialized ? PSO_SPECIALISED : PSO_GENERIC;
+
  const std::string function_name = std::string("cycles_metal_") +
                                    device_kernel_as_string(device_kernel);

@@ -371,17 +281,6 @@ void MetalKernelPipeline::compile()
  if (@available(macOS 11.0, *)) {
    MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
    func_desc.name = entryPoint;
-
-    if (pso_type == PSO_SPECIALIZED_SHADE) {
-      func_desc.constantValues = GetConstantValues(&kernel_data_);
-    }
-    else if (pso_type == PSO_SPECIALIZED_INTERSECT) {
-      func_desc.constantValues = GetConstantValues(&kernel_data_);
-    }
-    else {
-      func_desc.constantValues = GetConstantValues();
-    }
-
    function = [mtlLibrary newFunctionWithDescriptor:func_desc error:&error];
  }

@@ -528,7 +427,10 @@ void MetalKernelPipeline::compile()

  MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;

-  bool use_binary_archive = should_use_binary_archive();
+  bool use_binary_archive = true;
+  if (auto str = getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
+    use_binary_archive = (atoi(str) == 0);
+  }

  id<MTLBinaryArchive> archive = nil;
  string metalbin_path;
@@ -706,32 +608,19 @@ void MetalKernelPipeline::compile()
  }
 }

-bool MetalDeviceKernels::load(MetalDevice *device, MetalPipelineType pso_type)
+bool MetalDeviceKernels::load(MetalDevice *device, bool scene_specialized)
 {
-  const double starttime = time_dt();
  auto shader_cache = get_shader_cache(device->mtlDevice);
  for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
-    shader_cache->load_kernel((DeviceKernel)i, device, pso_type);
+    shader_cache->load_kernel((DeviceKernel)i, device, scene_specialized);
  }

-  shader_cache->wait_for_all();
-  metal_printf("Back-end compilation finished in %.1f seconds (%s)\n",
-               time_dt() - starttime,
-               kernel_type_as_string(pso_type));
+  if (!scene_specialized || getenv("CYCLES_METAL_PROFILING")) {
+    shader_cache->wait_for_all();
+  }
  return true;
 }

-bool MetalDeviceKernels::should_load_kernels(MetalDevice *device, MetalPipelineType pso_type)
-{
-  auto shader_cache = get_shader_cache(device->mtlDevice);
-  for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
-    if (shader_cache->should_load_kernel((DeviceKernel)i, device, pso_type)) {
-      return true;
-    }
-  }
-  return false;
-}
-
 const MetalKernelPipeline *MetalDeviceKernels::get_best_pipeline(const MetalDevice *device,
                                                                 DeviceKernel kernel)
 {
--- a/intern/cycles/device/metal/queue.h
+++ b/intern/cycles/device/metal/queue.h
@@ -24,7 +24,6 @@ class MetalDeviceQueue : public DeviceQueue {

  virtual int num_concurrent_states(const size_t) const override;
  virtual int num_concurrent_busy_states() const override;
-  virtual int num_sort_partition_elements() const override;

  virtual void init_execution() override;

--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -293,11 +293,6 @@ int MetalDeviceQueue::num_concurrent_busy_states() const
  return result;
 }

-int MetalDeviceQueue::num_sort_partition_elements() const
-{
-  return MetalInfo::optimal_sort_partition_elements(metal_device_->mtlDevice);
-}
-
 void MetalDeviceQueue::init_execution()
 {
  /* Synchronize all textures and memory copies before executing task. */
@@ -364,7 +359,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
  /* Prepare any non-pointer (i.e. plain-old-data) KernelParamsMetal data */
  /* The plain-old-data is contiguous, continuing to the end of KernelParamsMetal */
  size_t plain_old_launch_data_offset = offsetof(KernelParamsMetal, integrator_state) +
-                                        offsetof(IntegratorStateGPU, sort_partition_divisor);
+                                        sizeof(IntegratorStateGPU);
  size_t plain_old_launch_data_size = sizeof(KernelParamsMetal) - plain_old_launch_data_offset;
  memcpy(init_arg_buffer + globals_offsets + plain_old_launch_data_offset,
         (uint8_t *)&metal_device_->launch_params + plain_old_launch_data_offset,
@@ -421,7 +416,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,

  /* this relies on IntegratorStateGPU layout being contiguous device_ptrs  */
  const size_t pointer_block_end = offsetof(KernelParamsMetal, integrator_state) +
-                                   offsetof(IntegratorStateGPU, sort_partition_divisor);
+                                   sizeof(IntegratorStateGPU);
  for (size_t offset = 0; offset < pointer_block_end; offset += sizeof(device_ptr)) {
    int pointer_index = int(offset / sizeof(device_ptr));
    MetalDevice::MetalMem *mmem = *(
--- a/intern/cycles/device/metal/util.h
+++ b/intern/cycles/device/metal/util.h
@@ -37,7 +37,6 @@ struct MetalInfo {
  static int get_apple_gpu_core_count(id<MTLDevice> device);
  static MetalGPUVendor get_device_vendor(id<MTLDevice> device);
  static AppleGPUArchitecture get_apple_gpu_architecture(id<MTLDevice> device);
-  static int optimal_sort_partition_elements(id<MTLDevice> device);
  static string get_device_name(id<MTLDevice> device);
 };

--- a/intern/cycles/device/metal/util.mm
+++ b/intern/cycles/device/metal/util.mm
@@ -72,21 +72,6 @@ MetalGPUVendor MetalInfo::get_device_vendor(id<MTLDevice> device)
  return METAL_GPU_UNKNOWN;
 }

-int MetalInfo::optimal_sort_partition_elements(id<MTLDevice> device)
-{
-  if (auto str = getenv("CYCLES_METAL_SORT_PARTITION_ELEMENTS")) {
-    return atoi(str);
-  }
-
-  /* On M1 and M2 GPUs, we see better cache utilization if we partition the active indices before
-   * sorting each partition by material. Partitioning into chunks of 65536 elements results in an
-   * overall render time speedup of up to 15%. */
-  if (get_device_vendor(device) == METAL_GPU_APPLE) {
-    return 65536;
-  }
-  return 0;
-}
-
 vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
 {
  static vector<id<MTLDevice>> usable_devices;
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -2047,7 +2047,7 @@ void OptiXDevice::const_copy_to(const char *name, void *host, size_t size)

    /* Update traversable handle (since it is different for each device on multi devices). */
    KernelData *const data = (KernelData *)host;
-    *(OptixTraversableHandle *)&data->device_bvh = tlas_handle;
+    *(OptixTraversableHandle *)&data->bvh.scene = tlas_handle;

    update_launch_params(offsetof(KernelParamsOptiX, data), host, size);
    return;
--- a/intern/cycles/device/queue.h
+++ b/intern/cycles/device/queue.h
@@ -105,13 +105,6 @@ class DeviceQueue {
   * value. */
  virtual int num_concurrent_busy_states() const = 0;

-  /* Number of elements in a partition of sorted shaders, that improves memory locality of
-   * integrator state fetch at the cost of decreased coherence for shader kernel execution. */
-  virtual int num_sort_partition_elements() const
-  {
-    return 65536;
-  }
-
  /* Initialize execution of kernels on this queue.
   *
   * Will, for example, load all data required by the kernels from Device to global or path state.
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -373,7 +373,7 @@ void PathTrace::path_trace(RenderWork &render_work)
    work_balance_infos_[i].time_spent += work_time;
    work_balance_infos_[i].occupancy = statistics.occupancy;

-    VLOG_INFO << "Rendered " << num_samples << " samples in " << work_time << " seconds ("
+    VLOG_WORK << "Rendered " << num_samples << " samples in " << work_time << " seconds ("
              << work_time / num_samples
              << " seconds per sample), occupancy: " << statistics.occupancy;
  });
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -181,45 +181,27 @@ void PathTraceWorkGPU::alloc_integrator_queue()

 void PathTraceWorkGPU::alloc_integrator_sorting()
 {
-  /* Compute sort partitions, to balance between memory locality and coherence.
-   * Sort partitioning becomes less effective when more shaders are in the wavefront. In lieu of a
-   * more sophisticated heuristic we simply disable sort partitioning if the shader count is high.
-   */
-  num_sort_partitions_ = 1;
-  if (device_scene_->data.max_shaders < 300) {
-    const int num_elements = queue_->num_sort_partition_elements();
-    if (num_elements) {
-      num_sort_partitions_ = max(max_num_paths_ / num_elements, 1);
-    }
-  }
-
-  integrator_state_gpu_.sort_partition_divisor = (int)divide_up(max_num_paths_,
-                                                                num_sort_partitions_);
-
  /* Allocate arrays for shader sorting. */
-  const int sort_buckets = device_scene_->data.max_shaders * num_sort_partitions_;
-  if (integrator_shader_sort_counter_.size() < sort_buckets) {
-    integrator_shader_sort_counter_.alloc(sort_buckets);
+  const int max_shaders = device_scene_->data.max_shaders;
+  if (integrator_shader_sort_counter_.size() < max_shaders) {
+    integrator_shader_sort_counter_.alloc(max_shaders);
    integrator_shader_sort_counter_.zero_to_device();
+
+    integrator_shader_raytrace_sort_counter_.alloc(max_shaders);
+    integrator_shader_raytrace_sort_counter_.zero_to_device();
+
+    integrator_shader_mnee_sort_counter_.alloc(max_shaders);
+    integrator_shader_mnee_sort_counter_.zero_to_device();
+
+    integrator_shader_sort_prefix_sum_.alloc(max_shaders);
+    integrator_shader_sort_prefix_sum_.zero_to_device();
+
    integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] =
        (int *)integrator_shader_sort_counter_.device_pointer;
-
-    if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
-      integrator_shader_raytrace_sort_counter_.alloc(sort_buckets);
-      integrator_shader_raytrace_sort_counter_.zero_to_device();
-      integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
-          (int *)integrator_shader_raytrace_sort_counter_.device_pointer;
-    }
-
-    if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
-      integrator_shader_mnee_sort_counter_.alloc(sort_buckets);
-      integrator_shader_mnee_sort_counter_.zero_to_device();
-      integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
-          (int *)integrator_shader_mnee_sort_counter_.device_pointer;
-    }
-
-    integrator_shader_sort_prefix_sum_.alloc(sort_buckets);
-    integrator_shader_sort_prefix_sum_.zero_to_device();
+    integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
+        (int *)integrator_shader_raytrace_sort_counter_.device_pointer;
+    integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
+        (int *)integrator_shader_mnee_sort_counter_.device_pointer;
  }
 }

@@ -351,12 +333,8 @@ void PathTraceWorkGPU::enqueue_reset()
  queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_RESET, max_num_paths_, args);
  queue_->zero_to_device(integrator_queue_counter_);
  queue_->zero_to_device(integrator_shader_sort_counter_);
-  if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
-    queue_->zero_to_device(integrator_shader_raytrace_sort_counter_);
-  }
-  if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
-    queue_->zero_to_device(integrator_shader_mnee_sort_counter_);
-  }
+  queue_->zero_to_device(integrator_shader_raytrace_sort_counter_);
+  queue_->zero_to_device(integrator_shader_mnee_sort_counter_);

  /* Tiles enqueue need to know number of active paths, which is based on this counter. Zero the
   * counter on the host side because `zero_to_device()` is not doing it. */
@@ -508,9 +486,9 @@ void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel,
  /* Compute prefix sum of number of active paths with each shader. */
  {
    const int work_size = 1;
-    int sort_buckets = device_scene_->data.max_shaders * num_sort_partitions_;
+    int max_shaders = device_scene_->data.max_shaders;

-    DeviceKernelArguments args(&d_counter, &d_prefix_sum, &sort_buckets);
+    DeviceKernelArguments args(&d_counter, &d_prefix_sum, &max_shaders);

    queue_->enqueue(DEVICE_KERNEL_PREFIX_SUM, work_size, args);
  }
--- a/intern/cycles/integrator/path_trace_work_gpu.h
+++ b/intern/cycles/integrator/path_trace_work_gpu.h
@@ -156,9 +156,6 @@ class PathTraceWorkGPU : public PathTraceWork {
  bool interop_use_checked_ = false;
  bool interop_use_ = false;

-  /* Number of partitions to sort state indices into prior to material sort. */
-  int num_sort_partitions_;
-
  /* Maximum number of concurrent integrator states. */
  int max_num_paths_;

--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -79,7 +79,6 @@ set(SRC_KERNEL_DEVICE_METAL_HEADERS
  device/metal/compat.h
  device/metal/context_begin.h
  device/metal/context_end.h
-  device/metal/function_constants.h
  device/metal/globals.h
 )

@@ -155,7 +154,6 @@ set(SRC_KERNEL_SVM_HEADERS
  svm/math_util.h
  svm/mix.h
  svm/musgrave.h
-  svm/node_types_template.h
  svm/noise.h
  svm/noisetex.h
  svm/normal.h
@@ -284,7 +282,6 @@ set(SRC_KERNEL_UTIL_HEADERS

 set(SRC_KERNEL_TYPES_HEADERS
  data_arrays.h
-  data_template.h
  tables.h
  types.h
 )
@@ -847,9 +844,10 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
  else()
    list(APPEND sycl_compiler_flags -fPIC)

-    # We avoid getting __FAST_MATH__ to be defined when building on CentOS 7 until the compilation crash
-    # it triggers at either AoT or JIT stages gets fixed.
-    list(APPEND sycl_compiler_flags -fhonor-nans)
+    # avoid getting __FAST_MATH__ to be defined for the graphics compiler on CentOS 7 until the compile-time issue it triggers gets fixed.
+    if(WITH_CYCLES_ONEAPI_BINARIES)
+      list(APPEND sycl_compiler_flags -fhonor-nans)
+    endif()

    # add $ORIGIN to cycles_kernel_oneapi.so rpath so libsycl.so and
    # libpi_level_zero.so can be placed next to it and get found.
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -172,11 +172,11 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
    ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT;
  }

-  optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
+  optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
             ray->P,
             ray->D,
-             ray->tmin,
-             ray->tmax,
+             0.0f,
+             ray->t,
             ray->time,
             ray_mask,
             ray_flags,
@@ -203,28 +203,28 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
 #elif defined(__METALRT__)

  if (!scene_intersect_valid(ray)) {
-    isect->t = ray->tmax;
+    isect->t = ray->t;
    isect->type = PRIMITIVE_NONE;
    return false;
  }

 #  if defined(__KERNEL_DEBUG__)
  if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
-    isect->t = ray->tmax;
+    isect->t = ray->t;
    isect->type = PRIMITIVE_NONE;
    kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
    return false;
  }

  if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
-    isect->t = ray->tmax;
+    isect->t = ray->t;
    isect->type = PRIMITIVE_NONE;
    kernel_assert(!"Invalid ift_default");
    return false;
  }
 #  endif

-  metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
+  metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
  metalrt_intersector_type metalrt_intersect;

  if (!kernel_data.bvh.have_curves) {
@@ -263,7 +263,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
 #  endif

  if (intersection.type == intersection_type::none) {
-    isect->t = ray->tmax;
+    isect->t = ray->t;
    isect->type = PRIMITIVE_NONE;

    return false;
@@ -295,14 +295,14 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
  }

 #  ifdef __EMBREE__
-  if (kernel_data.device_bvh) {
-    isect->t = ray->tmax;
+  if (kernel_data.bvh.scene) {
+    isect->t = ray->t;
    CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
    IntersectContext rtc_ctx(&ctx);
    RTCRayHit ray_hit;
    ctx.ray = ray;
    kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
-    rtcIntersect1(kernel_data.device_bvh, &rtc_ctx.context, &ray_hit);
+    rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
    if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID &&
        ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
      kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
@@ -357,11 +357,11 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
  if (local_isect) {
    local_isect->num_hits = 0; /* Initialize hit count to zero. */
  }
-  optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
+  optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
             ray->P,
             ray->D,
-             ray->tmin,
-             ray->tmax,
+             0.0f,
+             ray->t,
             ray->time,
             0xFF,
             /* Need to always call into __anyhit__kernel_optix_local_hit. */
@@ -405,7 +405,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
  }
 #    endif

-  metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
+  metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
  metalrt_intersector_type metalrt_intersect;

  metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
@@ -451,7 +451,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
  }

 #    ifdef __EMBREE__
-  if (kernel_data.device_bvh) {
+  if (kernel_data.bvh.scene) {
    const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) &
                           SD_OBJECT_TRANSFORM_APPLIED);
    CCLIntersectContext ctx(
@@ -470,13 +470,13 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,

    /* If this object has its own BVH, use it. */
    if (has_bvh) {
-      RTCGeometry geom = rtcGetGeometry(kernel_data.device_bvh, local_object * 2);
+      RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
      if (geom) {
        float3 P = ray->P;
        float3 dir = ray->D;
        float3 idir = ray->D;
        Transform ob_itfm;
-        rtc_ray.tfar = ray->tmax *
+        rtc_ray.tfar = ray->t *
                       bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
        /* bvh_instance_motion_push() returns the inverse transform but
         * it's not needed here. */
@@ -496,7 +496,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
      }
    }
    else {
-      rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
+      rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
    }

    /* rtcOccluded1 sets tfar to -inf if a hit was found. */
@@ -539,11 +539,11 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
    ray_mask = 0xFF;
  }

-  optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
+  optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
             ray->P,
             ray->D,
-             ray->tmin,
-             ray->tmax,
+             0.0f,
+             ray->t,
             ray->time,
             ray_mask,
             /* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */
@@ -582,7 +582,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
  }
 #    endif

-  metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
+  metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
  metalrt_intersector_type metalrt_intersect;

  metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
@@ -633,7 +633,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
  }

 #    ifdef __EMBREE__
-  if (kernel_data.device_bvh) {
+  if (kernel_data.bvh.scene) {
    CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
    Intersection *isect_array = (Intersection *)state->shadow_isect;
    ctx.isect_s = isect_array;
@@ -642,7 +642,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
    IntersectContext rtc_ctx(&ctx);
    RTCRay rtc_ray;
    kernel_embree_setup_ray(*ray, rtc_ray, visibility);
-    rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
+    rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);

    *num_recorded_hits = ctx.num_recorded_hits;
    *throughput = ctx.throughput;
@@ -698,11 +698,11 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
    ray_mask = 0xFF;
  }

-  optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
+  optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
             ray->P,
             ray->D,
-             ray->tmin,
-             ray->tmax,
+             0.0f,
+             ray->t,
             ray->time,
             ray_mask,
             /* Need to always call into __anyhit__kernel_optix_volume_test. */
@@ -744,7 +744,7 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
  }
 #    endif

-  metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
+  metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
  metalrt_intersector_type metalrt_intersect;

  metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
@@ -825,7 +825,7 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg,
  }

 #  ifdef __EMBREE__
-  if (kernel_data.device_bvh) {
+  if (kernel_data.bvh.scene) {
    CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
    ctx.isect_s = isect;
    ctx.max_hits = max_hits;
@@ -834,7 +834,7 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg,
    IntersectContext rtc_ctx(&ctx);
    RTCRay rtc_ray;
    kernel_embree_setup_ray(*ray, rtc_ray, visibility);
-    rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
+    rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
    return ctx.num_hits;
  }
 #  endif /* __EMBREE__ */
--- a/intern/cycles/kernel/bvh/embree.h
+++ b/intern/cycles/kernel/bvh/embree.h
@@ -83,8 +83,8 @@ ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
  rtc_ray.dir_x = ray.D.x;
  rtc_ray.dir_y = ray.D.y;
  rtc_ray.dir_z = ray.D.z;
-  rtc_ray.tnear = ray.tmin;
-  rtc_ray.tfar = ray.tmax;
+  rtc_ray.tnear = 0.0f;
+  rtc_ray.tfar = ray.t;
  rtc_ray.time = ray.time;
  rtc_ray.mask = visibility;
 }
@@ -107,7 +107,7 @@ ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg
    const int oID = hit->instID[0] / 2;
    if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
      RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
-          rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
+          rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
      const int pID = hit->primID +
                      (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
      status = intersection_skip_self_shadow(ray->self, oID, pID);
@@ -117,7 +117,7 @@ ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg
    const int oID = hit->geomID / 2;
    if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
      const int pID = hit->primID + (intptr_t)rtcGetGeometryUserData(
-                                        rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
+                                        rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
      status = intersection_skip_self_shadow(ray->self, oID, pID);
    }
  }
@@ -133,14 +133,14 @@ ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
  isect->t = ray->tfar;
  if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
    RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
-        rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
+        rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
    isect->prim = hit->primID +
                  (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
    isect->object = hit->instID[0] / 2;
  }
  else {
    isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
-                                    rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
+                                    rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
    isect->object = hit->geomID / 2;
  }

@@ -166,7 +166,7 @@ ccl_device_inline void kernel_embree_convert_sss_hit(
  isect->v = hit->u;
  isect->t = ray->tfar;
  RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
-      rtcGetGeometry(kernel_data.device_bvh, object * 2));
+      rtcGetGeometry(kernel_data.bvh.scene, object * 2));
  isect->prim = hit->primID +
                (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
  isect->object = object;
--- a/intern/cycles/kernel/bvh/local.h
+++ b/intern/cycles/kernel/bvh/local.h
@@ -47,9 +47,8 @@ ccl_device_inline
  float3 P = ray->P;
  float3 dir = bvh_clamp_direction(ray->D);
  float3 idir = bvh_inverse_direction(dir);
-  float tmin = ray->tmin;
  int object = OBJECT_NONE;
-  float isect_t = ray->tmax;
+  float isect_t = ray->t;

  if (local_isect != NULL) {
    local_isect->num_hits = 0;
@@ -60,13 +59,10 @@ ccl_device_inline
  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
 #if BVH_FEATURE(BVH_MOTION)
    Transform ob_itfm;
-    const float t_world_to_instance = bvh_instance_motion_push(
-        kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
+    isect_t *= bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
 #else
-    const float t_world_to_instance = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
+    isect_t *= bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
 #endif
-    isect_t *= t_world_to_instance;
-    tmin *= t_world_to_instance;
    object = local_object;
  }

@@ -85,7 +81,6 @@ ccl_device_inline
                                       dir,
 #endif
                                       idir,
-                                       tmin,
                                       isect_t,
                                       node_addr,
                                       PATH_RAY_ALL_VISIBILITY,
@@ -160,7 +155,6 @@ ccl_device_inline
                                           local_object,
                                           prim,
                                           prim_addr,
-                                           tmin,
                                           isect_t,
                                           lcg_state,
                                           max_hits)) {
@@ -197,7 +191,6 @@ ccl_device_inline
                                                  local_object,
                                                  prim,
                                                  prim_addr,
-                                                  tmin,
                                                  isect_t,
                                                  lcg_state,
                                                  max_hits)) {
--- a/intern/cycles/kernel/bvh/nodes.h
+++ b/intern/cycles/kernel/bvh/nodes.h
@@ -18,8 +18,7 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals kg
 ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
                                                      const float3 P,
                                                      const float3 idir,
-                                                      const float tmin,
-                                                      const float tmax,
+                                                      const float t,
                                                      const int node_addr,
                                                      const uint visibility,
                                                      float dist[2])
@@ -40,8 +39,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
  float c0hiy = (node1.z - P.y) * idir.y;
  float c0loz = (node2.x - P.z) * idir.z;
  float c0hiz = (node2.z - P.z) * idir.z;
-  float c0min = max4(tmin, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
-  float c0max = min4(tmax, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
+  float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
+  float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));

  float c1lox = (node0.y - P.x) * idir.x;
  float c1hix = (node0.w - P.x) * idir.x;
@@ -49,8 +48,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
  float c1hiy = (node1.w - P.y) * idir.y;
  float c1loz = (node2.y - P.z) * idir.z;
  float c1hiz = (node2.w - P.z) * idir.z;
-  float c1min = max4(tmin, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
-  float c1max = min4(tmax, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
+  float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
+  float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));

  dist[0] = c0min;
  dist[1] = c1min;
@@ -67,8 +66,7 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
 ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg,
                                                               const float3 P,
                                                               const float3 dir,
-                                                               const float tmin,
-                                                               const float tmax,
+                                                               const float t,
                                                               int node_addr,
                                                               int child,
                                                               float dist[2])
@@ -85,8 +83,8 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg,
  const float far_x = max(lower_xyz.x, upper_xyz.x);
  const float far_y = max(lower_xyz.y, upper_xyz.y);
  const float far_z = max(lower_xyz.z, upper_xyz.z);
-  const float tnear = max4(tmin, near_x, near_y, near_z);
-  const float tfar = min4(tmax, far_x, far_y, far_z);
+  const float tnear = max4(0.0f, near_x, near_y, near_z);
+  const float tfar = min4(t, far_x, far_y, far_z);
  *dist = tnear;
  return tnear <= tfar;
 }
@@ -95,8 +93,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
                                                        const float3 P,
                                                        const float3 dir,
                                                        const float3 idir,
-                                                        const float tmin,
-                                                        const float tmax,
+                                                        const float t,
                                                        const int node_addr,
                                                        const uint visibility,
                                                        float dist[2])
@@ -105,7 +102,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
 #ifdef __VISIBILITY_FLAG__
  float4 cnodes = kernel_data_fetch(bvh_nodes, node_addr + 0);
 #endif
-  if (bvh_unaligned_node_intersect_child(kg, P, dir, tmin, tmax, node_addr, 0, &dist[0])) {
+  if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
 #ifdef __VISIBILITY_FLAG__
    if ((__float_as_uint(cnodes.x) & visibility))
 #endif
@@ -113,7 +110,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
      mask |= 1;
    }
  }
-  if (bvh_unaligned_node_intersect_child(kg, P, dir, tmin, tmax, node_addr, 1, &dist[1])) {
+  if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
 #ifdef __VISIBILITY_FLAG__
    if ((__float_as_uint(cnodes.y) & visibility))
 #endif
@@ -128,17 +125,16 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals kg,
                                              const float3 P,
                                              const float3 dir,
                                              const float3 idir,
-                                              const float tmin,
-                                              const float tmax,
+                                              const float t,
                                              const int node_addr,
                                              const uint visibility,
                                              float dist[2])
 {
  float4 node = kernel_data_fetch(bvh_nodes, node_addr);
  if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
-    return bvh_unaligned_node_intersect(kg, P, dir, idir, tmin, tmax, node_addr, visibility, dist);
+    return bvh_unaligned_node_intersect(kg, P, dir, idir, t, node_addr, visibility, dist);
  }
  else {
-    return bvh_aligned_node_intersect(kg, P, idir, tmin, tmax, node_addr, visibility, dist);
+    return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
  }
 }
--- a/intern/cycles/kernel/bvh/shadow_all.h
+++ b/intern/cycles/kernel/bvh/shadow_all.h
@@ -49,7 +49,6 @@ ccl_device_inline
  float3 P = ray->P;
  float3 dir = bvh_clamp_direction(ray->D);
  float3 idir = bvh_inverse_direction(dir);
-  float tmin = ray->tmin;
  int object = OBJECT_NONE;
  uint num_hits = 0;

@@ -60,12 +59,12 @@ ccl_device_inline
  /* Max distance in world space. May be dynamically reduced when max number of
   * recorded hits is exceeded and we no longer need to find hits beyond the max
   * distance found. */
-  float t_max_world = ray->tmax;
+  float t_max_world = ray->t;

  /* Current maximum distance to the intersection.
   * Is calculated as a ray length, transformed to an object space when entering
   * instance node. */
-  float t_max_current = ray->tmax;
+  float t_max_current = ray->t;

  /* Conversion from world to local space for the current instance if any, 1.0
   * otherwise. */
@@ -89,7 +88,6 @@ ccl_device_inline
                                       dir,
 #endif
                                       idir,
-                                       tmin,
                                       t_max_current,
                                       node_addr,
                                       visibility,
@@ -158,16 +156,8 @@ ccl_device_inline

            switch (type & PRIMITIVE_ALL) {
              case PRIMITIVE_TRIANGLE: {
-                hit = triangle_intersect(kg,
-                                         &isect,
-                                         P,
-                                         dir,
-                                         tmin,
-                                         t_max_current,
-                                         visibility,
-                                         prim_object,
-                                         prim,
-                                         prim_addr);
+                hit = triangle_intersect(
+                    kg, &isect, P, dir, t_max_current, visibility, prim_object, prim, prim_addr);
                break;
              }
 #if BVH_FEATURE(BVH_MOTION)
@@ -176,7 +166,6 @@ ccl_device_inline
                                                &isect,
                                                P,
                                                dir,
-                                                tmin,
                                                t_max_current,
                                                ray->time,
                                                visibility,
@@ -200,16 +189,8 @@ ccl_device_inline
                }

                const int curve_type = kernel_data_fetch(prim_type, prim_addr);
-                hit = curve_intersect(kg,
-                                      &isect,
-                                      P,
-                                      dir,
-                                      tmin,
-                                      t_max_current,
-                                      prim_object,
-                                      prim,
-                                      ray->time,
-                                      curve_type);
+                hit = curve_intersect(
+                    kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, curve_type);

                break;
              }
@@ -226,16 +207,8 @@ ccl_device_inline
                }

                const int point_type = kernel_data_fetch(prim_type, prim_addr);
-                hit = point_intersect(kg,
-                                      &isect,
-                                      P,
-                                      dir,
-                                      tmin,
-                                      t_max_current,
-                                      prim_object,
-                                      prim,
-                                      ray->time,
-                                      point_type);
+                hit = point_intersect(
+                    kg, &isect, P, dir, t_max_current, prim_object, prim, ray->time, point_type);
                break;
              }
 #endif /* BVH_FEATURE(BVH_POINTCLOUD) */
@@ -329,7 +302,6 @@ ccl_device_inline

          /* Convert intersection to object space. */
          t_max_current *= t_world_to_instance;
-          tmin *= t_world_to_instance;

          ++stack_ptr;
          kernel_assert(stack_ptr < BVH_STACK_SIZE);
@@ -351,8 +323,7 @@ ccl_device_inline
 #endif

      /* Restore world space ray length. */
-      tmin = ray->tmin;
-      t_max_current = ray->tmax;
+      t_max_current = ray->t;

      object = OBJECT_NONE;
      t_world_to_instance = 1.0f;
--- a/intern/cycles/kernel/bvh/traversal.h
+++ b/intern/cycles/kernel/bvh/traversal.h
@@ -43,14 +43,13 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
  float3 P = ray->P;
  float3 dir = bvh_clamp_direction(ray->D);
  float3 idir = bvh_inverse_direction(dir);
-  float tmin = ray->tmin;
  int object = OBJECT_NONE;

 #if BVH_FEATURE(BVH_MOTION)
  Transform ob_itfm;
 #endif

-  isect->t = ray->tmax;
+  isect->t = ray->t;
  isect->u = 0.0f;
  isect->v = 0.0f;
  isect->prim = PRIM_NONE;
@@ -72,7 +71,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
                                         dir,
 #endif
                                         idir,
-                                         tmin,
                                         isect->t,
                                         node_addr,
                                         visibility,
@@ -135,16 +133,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,

            switch (type & PRIMITIVE_ALL) {
              case PRIMITIVE_TRIANGLE: {
-                if (triangle_intersect(kg,
-                                       isect,
-                                       P,
-                                       dir,
-                                       tmin,
-                                       isect->t,
-                                       visibility,
-                                       prim_object,
-                                       prim,
-                                       prim_addr)) {
+                if (triangle_intersect(
+                        kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr)) {
                  /* shadow ray early termination */
                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
                    return true;
@@ -157,7 +147,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
                                              isect,
                                              P,
                                              dir,
-                                              tmin,
                                              isect->t,
                                              ray->time,
                                              visibility,
@@ -185,7 +174,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,

                const int curve_type = kernel_data_fetch(prim_type, prim_addr);
                const bool hit = curve_intersect(
-                    kg, isect, P, dir, tmin, isect->t, prim_object, prim, ray->time, curve_type);
+                    kg, isect, P, dir, isect->t, prim_object, prim, ray->time, curve_type);
                if (hit) {
                  /* shadow ray early termination */
                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
@@ -206,7 +195,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,

                const int point_type = kernel_data_fetch(prim_type, prim_addr);
                const bool hit = point_intersect(
-                    kg, isect, P, dir, tmin, isect->t, prim_object, prim, ray->time, point_type);
+                    kg, isect, P, dir, isect->t, prim_object, prim, ray->time, point_type);
                if (hit) {
                  /* shadow ray early termination */
                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
@@ -223,15 +212,11 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
          object = kernel_data_fetch(prim_object, -prim_addr - 1);

 #if BVH_FEATURE(BVH_MOTION)
-          const float t_world_to_instance = bvh_instance_motion_push(
-              kg, object, ray, &P, &dir, &idir, &ob_itfm);
+          isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
 #else
-          const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
+          isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
 #endif

-          isect->t *= t_world_to_instance;
-          tmin *= t_world_to_instance;
-
          ++stack_ptr;
          kernel_assert(stack_ptr < BVH_STACK_SIZE);
          traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -250,7 +235,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
 #else
      isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
 #endif
-      tmin = ray->tmin;

      object = OBJECT_NONE;
      node_addr = traversal_stack[stack_ptr];
--- a/intern/cycles/kernel/bvh/util.h
+++ b/intern/cycles/kernel/bvh/util.h
@@ -5,19 +5,6 @@

 CCL_NAMESPACE_BEGIN

-/* Offset intersection distance by the smallest possible amount, to skip
- * intersections at this distance. This works in cases where the ray start
- * position is unchanged and only tmin is updated, since for self
- * intersection we'll be comparing against the exact same distances. */
-ccl_device_forceinline float intersection_t_offset(const float t)
-{
-  /* This is a simplified version of nextafterf(t, FLT_MAX), only dealing with
-   * non-negative and finite t. */
-  kernel_assert(t >= 0.0f && isfinite_safe(t));
-  const uint32_t bits = (t == 0.0f) ? 1 : __float_as_uint(t) + 1;
-  return __uint_as_float(bits);
-}
-
 #if defined(__KERNEL_CPU__)
 ccl_device int intersections_compare(const void *a, const void *b)
 {
--- a/intern/cycles/kernel/bvh/volume.h
+++ b/intern/cycles/kernel/bvh/volume.h
@@ -46,14 +46,13 @@ ccl_device_inline
  float3 P = ray->P;
  float3 dir = bvh_clamp_direction(ray->D);
  float3 idir = bvh_inverse_direction(dir);
-  float tmin = ray->tmin;
  int object = OBJECT_NONE;

 #if BVH_FEATURE(BVH_MOTION)
  Transform ob_itfm;
 #endif

-  isect->t = ray->tmax;
+  isect->t = ray->t;
  isect->u = 0.0f;
  isect->v = 0.0f;
  isect->prim = PRIM_NONE;
@@ -74,7 +73,6 @@ ccl_device_inline
                                       dir,
 #endif
                                       idir,
-                                       tmin,
                                       isect->t,
                                       node_addr,
                                       visibility,
@@ -142,7 +140,7 @@ ccl_device_inline
                  continue;
                }
                triangle_intersect(
-                    kg, isect, P, dir, tmin, isect->t, visibility, prim_object, prim, prim_addr);
+                    kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr);
              }
              break;
            }
@@ -167,7 +165,6 @@ ccl_device_inline
                                          isect,
                                          P,
                                          dir,
-                                          tmin,
                                          isect->t,
                                          ray->time,
                                          visibility,
@@ -189,15 +186,11 @@ ccl_device_inline
          int object_flag = kernel_data_fetch(object_flag, object);
          if (object_flag & SD_OBJECT_HAS_VOLUME) {
 #if BVH_FEATURE(BVH_MOTION)
-            const float t_world_to_instance = bvh_instance_motion_push(
-                kg, object, ray, &P, &dir, &idir, &ob_itfm);
+            isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
 #else
-            const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
+            isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
 #endif

-            isect->t *= t_world_to_instance;
-            tmin *= t_world_to_instance;
-
            ++stack_ptr;
            kernel_assert(stack_ptr < BVH_STACK_SIZE);
            traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -224,8 +217,6 @@ ccl_device_inline
      isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
 #endif

-      tmin = ray->tmin;
-
      object = OBJECT_NONE;
      node_addr = traversal_stack[stack_ptr];
      --stack_ptr;
--- a/intern/cycles/kernel/bvh/volume_all.h
+++ b/intern/cycles/kernel/bvh/volume_all.h
@@ -44,12 +44,12 @@ ccl_device_inline
  int node_addr = kernel_data.bvh.root;

  /* ray parameters in registers */
+  const float tmax = ray->t;
  float3 P = ray->P;
  float3 dir = bvh_clamp_direction(ray->D);
  float3 idir = bvh_inverse_direction(dir);
-  float tmin = ray->tmin;
  int object = OBJECT_NONE;
-  float isect_t = ray->tmax;
+  float isect_t = tmax;

 #if BVH_FEATURE(BVH_MOTION)
  Transform ob_itfm;
@@ -58,7 +58,7 @@ ccl_device_inline
  int num_hits_in_instance = 0;

  uint num_hits = 0;
-  isect_array->t = ray->tmax;
+  isect_array->t = tmax;

  /* traversal loop */
  do {
@@ -75,7 +75,6 @@ ccl_device_inline
                                       dir,
 #endif
                                       idir,
-                                       tmin,
                                       isect_t,
                                       node_addr,
                                       visibility,
@@ -142,16 +141,8 @@ ccl_device_inline
                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
                  continue;
                }
-                hit = triangle_intersect(kg,
-                                         isect_array,
-                                         P,
-                                         dir,
-                                         tmin,
-                                         isect_t,
-                                         visibility,
-                                         prim_object,
-                                         prim,
-                                         prim_addr);
+                hit = triangle_intersect(
+                    kg, isect_array, P, dir, isect_t, visibility, prim_object, prim, prim_addr);
                if (hit) {
                  /* Move on to next entry in intersections array. */
                  isect_array++;
@@ -198,7 +189,6 @@ ccl_device_inline
                                                isect_array,
                                                P,
                                                dir,
-                                                tmin,
                                                isect_t,
                                                ray->time,
                                                visibility,
@@ -242,15 +232,11 @@ ccl_device_inline
          int object_flag = kernel_data_fetch(object_flag, object);
          if (object_flag & SD_OBJECT_HAS_VOLUME) {
 #if BVH_FEATURE(BVH_MOTION)
-            const float t_world_to_instance = bvh_instance_motion_push(
-                kg, object, ray, &P, &dir, &idir, &ob_itfm);
+            isect_t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
 #else
-            const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
+            isect_t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
 #endif

-            isect_t *= t_world_to_instance;
-            tmin *= t_world_to_instance;
-
            num_hits_in_instance = 0;
            isect_array->t = isect_t;

@@ -294,8 +280,7 @@ ccl_device_inline
 #endif
      }

-      tmin = ray->tmin;
-      isect_t = ray->tmax;
+      isect_t = tmax;
      isect_array->t = isect_t;

      object = OBJECT_NONE;
--- a/intern/cycles/kernel/camera/camera.h
+++ b/intern/cycles/kernel/camera/camera.h
@@ -165,11 +165,9 @@ ccl_device void camera_sample_perspective(KernelGlobals kg,
  float nearclip = kernel_data.cam.nearclip * z_inv;
  ray->P += nearclip * ray->D;
  ray->dP += nearclip * ray->dD;
-  ray->tmin = 0.0f;
-  ray->tmax = kernel_data.cam.cliplength * z_inv;
+  ray->t = kernel_data.cam.cliplength * z_inv;
 #else
-  ray->tmin = 0.0f;
-  ray->tmax = FLT_MAX;
+  ray->t = FLT_MAX;
 #endif
 }

@@ -233,11 +231,9 @@ ccl_device void camera_sample_orthographic(KernelGlobals kg,

 #ifdef __CAMERA_CLIPPING__
  /* clipping */
-  ray->tmin = 0.0f;
-  ray->tmax = kernel_data.cam.cliplength;
+  ray->t = kernel_data.cam.cliplength;
 #else
-  ray->tmin = 0.0f;
-  ray->tmax = FLT_MAX;
+  ray->t = FLT_MAX;
 #endif
 }

@@ -262,7 +258,7 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,

  /* indicates ray should not receive any light, outside of the lens */
  if (is_zero(D)) {
-    ray->tmax = 0.0f;
+    ray->t = 0.0f;
    return;
  }

@@ -353,11 +349,9 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
  float nearclip = cam->nearclip;
  ray->P += nearclip * ray->D;
  ray->dP += nearclip * ray->dD;
-  ray->tmin = 0.0f;
-  ray->tmax = cam->cliplength;
+  ray->t = cam->cliplength;
 #else
-  ray->tmin = 0.0f;
-  ray->tmax = FLT_MAX;
+  ray->t = FLT_MAX;
 #endif
 }

@@ -374,7 +368,7 @@ ccl_device_inline void camera_sample(KernelGlobals kg,
                                     ccl_private Ray *ray)
 {
  /* pixel filter */
-  int filter_table_offset = kernel_data.tables.filter_table_offset;
+  int filter_table_offset = kernel_data.film.filter_table_offset;
  float raster_x = x + lookup_table_read(kg, filter_u, filter_table_offset, FILTER_TABLE_SIZE);
  float raster_y = y + lookup_table_read(kg, filter_v, filter_table_offset, FILTER_TABLE_SIZE);

--- a/intern/cycles/kernel/data_template.h
+++ b/intern/cycles/kernel/data_template.h
@@ -1,206 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2022 Blender Foundation */
-
-#ifndef KERNEL_STRUCT_BEGIN
-#  define KERNEL_STRUCT_BEGIN(name, parent)
-#endif
-#ifndef KERNEL_STRUCT_END
-#  define KERNEL_STRUCT_END(name)
-#endif
-#ifndef KERNEL_STRUCT_MEMBER
-#  define KERNEL_STRUCT_MEMBER(parent, type, name)
-#endif
-
-/* Background. */
-
-KERNEL_STRUCT_BEGIN(KernelBackground, background)
-/* xyz store direction, w the angle. float4 instead of float3 is used
- * to ensure consistent padding/alignment across devices. */
-KERNEL_STRUCT_MEMBER(background, float4, sun)
-/* Only shader index. */
-KERNEL_STRUCT_MEMBER(background, int, surface_shader)
-KERNEL_STRUCT_MEMBER(background, int, volume_shader)
-KERNEL_STRUCT_MEMBER(background, float, volume_step_size)
-KERNEL_STRUCT_MEMBER(background, int, transparent)
-KERNEL_STRUCT_MEMBER(background, float, transparent_roughness_squared_threshold)
-/* Portal sampling. */
-KERNEL_STRUCT_MEMBER(background, float, portal_weight)
-KERNEL_STRUCT_MEMBER(background, int, num_portals)
-KERNEL_STRUCT_MEMBER(background, int, portal_offset)
-/* Sun sampling. */
-KERNEL_STRUCT_MEMBER(background, float, sun_weight)
-/* Importance map sampling. */
-KERNEL_STRUCT_MEMBER(background, float, map_weight)
-KERNEL_STRUCT_MEMBER(background, int, map_res_x)
-KERNEL_STRUCT_MEMBER(background, int, map_res_y)
-/* Multiple importance sampling. */
-KERNEL_STRUCT_MEMBER(background, int, use_mis)
-/* Lightgroup. */
-KERNEL_STRUCT_MEMBER(background, int, lightgroup)
-/* Padding. */
-KERNEL_STRUCT_MEMBER(background, int, pad1)
-KERNEL_STRUCT_MEMBER(background, int, pad2)
-KERNEL_STRUCT_MEMBER(background, int, pad3)
-KERNEL_STRUCT_END(KernelBackground)
-
-/* BVH: own BVH2 if no native device acceleration struct used. */
-
-KERNEL_STRUCT_BEGIN(KernelBVH, bvh)
-KERNEL_STRUCT_MEMBER(bvh, int, root)
-KERNEL_STRUCT_MEMBER(bvh, int, have_motion)
-KERNEL_STRUCT_MEMBER(bvh, int, have_curves)
-KERNEL_STRUCT_MEMBER(bvh, int, bvh_layout)
-KERNEL_STRUCT_MEMBER(bvh, int, use_bvh_steps)
-KERNEL_STRUCT_MEMBER(bvh, int, curve_subdivisions)
-KERNEL_STRUCT_MEMBER(bvh, int, pad1)
-KERNEL_STRUCT_MEMBER(bvh, int, pad2)
-KERNEL_STRUCT_END(KernelBVH)
-
-/* Film. */
-
-KERNEL_STRUCT_BEGIN(KernelFilm, film)
-/* XYZ to rendering color space transform. float4 instead of float3 to
- * ensure consistent padding/alignment across devices. */
-KERNEL_STRUCT_MEMBER(film, float4, xyz_to_r)
-KERNEL_STRUCT_MEMBER(film, float4, xyz_to_g)
-KERNEL_STRUCT_MEMBER(film, float4, xyz_to_b)
-KERNEL_STRUCT_MEMBER(film, float4, rgb_to_y)
-/* Rec709 to rendering color space. */
-KERNEL_STRUCT_MEMBER(film, float4, rec709_to_r)
-KERNEL_STRUCT_MEMBER(film, float4, rec709_to_g)
-KERNEL_STRUCT_MEMBER(film, float4, rec709_to_b)
-KERNEL_STRUCT_MEMBER(film, int, is_rec709)
-/* Exposuse. */
-KERNEL_STRUCT_MEMBER(film, float, exposure)
-/* Passed used. */
-KERNEL_STRUCT_MEMBER(film, int, pass_flag)
-KERNEL_STRUCT_MEMBER(film, int, light_pass_flag)
-/* Pass offsets. */
-KERNEL_STRUCT_MEMBER(film, int, pass_stride)
-KERNEL_STRUCT_MEMBER(film, int, pass_combined)
-KERNEL_STRUCT_MEMBER(film, int, pass_depth)
-KERNEL_STRUCT_MEMBER(film, int, pass_position)
-KERNEL_STRUCT_MEMBER(film, int, pass_normal)
-KERNEL_STRUCT_MEMBER(film, int, pass_roughness)
-KERNEL_STRUCT_MEMBER(film, int, pass_motion)
-KERNEL_STRUCT_MEMBER(film, int, pass_motion_weight)
-KERNEL_STRUCT_MEMBER(film, int, pass_uv)
-KERNEL_STRUCT_MEMBER(film, int, pass_object_id)
-KERNEL_STRUCT_MEMBER(film, int, pass_material_id)
-KERNEL_STRUCT_MEMBER(film, int, pass_diffuse_color)
-KERNEL_STRUCT_MEMBER(film, int, pass_glossy_color)
-KERNEL_STRUCT_MEMBER(film, int, pass_transmission_color)
-KERNEL_STRUCT_MEMBER(film, int, pass_diffuse_indirect)
-KERNEL_STRUCT_MEMBER(film, int, pass_glossy_indirect)
-KERNEL_STRUCT_MEMBER(film, int, pass_transmission_indirect)
-KERNEL_STRUCT_MEMBER(film, int, pass_volume_indirect)
-KERNEL_STRUCT_MEMBER(film, int, pass_diffuse_direct)
-KERNEL_STRUCT_MEMBER(film, int, pass_glossy_direct)
-KERNEL_STRUCT_MEMBER(film, int, pass_transmission_direct)
-KERNEL_STRUCT_MEMBER(film, int, pass_volume_direct)
-KERNEL_STRUCT_MEMBER(film, int, pass_emission)
-KERNEL_STRUCT_MEMBER(film, int, pass_background)
-KERNEL_STRUCT_MEMBER(film, int, pass_ao)
-KERNEL_STRUCT_MEMBER(film, float, pass_alpha_threshold)
-KERNEL_STRUCT_MEMBER(film, int, pass_shadow)
-KERNEL_STRUCT_MEMBER(film, float, pass_shadow_scale)
-KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher)
-KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher_sample_count)
-KERNEL_STRUCT_MEMBER(film, int, pass_shadow_catcher_matte)
-/* Cryptomatte. */
-KERNEL_STRUCT_MEMBER(film, int, cryptomatte_passes)
-KERNEL_STRUCT_MEMBER(film, int, cryptomatte_depth)
-KERNEL_STRUCT_MEMBER(film, int, pass_cryptomatte)
-/* Adaptive sampling. */
-KERNEL_STRUCT_MEMBER(film, int, pass_adaptive_aux_buffer)
-KERNEL_STRUCT_MEMBER(film, int, pass_sample_count)
-/* Mist. */
-KERNEL_STRUCT_MEMBER(film, int, pass_mist)
-KERNEL_STRUCT_MEMBER(film, float, mist_start)
-KERNEL_STRUCT_MEMBER(film, float, mist_inv_depth)
-KERNEL_STRUCT_MEMBER(film, float, mist_falloff)
-/* Denoising. */
-KERNEL_STRUCT_MEMBER(film, int, pass_denoising_normal)
-KERNEL_STRUCT_MEMBER(film, int, pass_denoising_albedo)
-KERNEL_STRUCT_MEMBER(film, int, pass_denoising_depth)
-/* AOVs. */
-KERNEL_STRUCT_MEMBER(film, int, pass_aov_color)
-KERNEL_STRUCT_MEMBER(film, int, pass_aov_value)
-/* Light groups. */
-KERNEL_STRUCT_MEMBER(film, int, pass_lightgroup)
-/* Baking. */
-KERNEL_STRUCT_MEMBER(film, int, pass_bake_primitive)
-KERNEL_STRUCT_MEMBER(film, int, pass_bake_differential)
-/* Shadow catcher. */
-KERNEL_STRUCT_MEMBER(film, int, use_approximate_shadow_catcher)
-/* Padding. */
-KERNEL_STRUCT_MEMBER(film, int, pad1)
-KERNEL_STRUCT_MEMBER(film, int, pad2)
-KERNEL_STRUCT_END(KernelFilm)
-
-/* Integrator. */
-
-KERNEL_STRUCT_BEGIN(KernelIntegrator, integrator)
-/* Emission. */
-KERNEL_STRUCT_MEMBER(integrator, int, use_direct_light)
-KERNEL_STRUCT_MEMBER(integrator, int, num_distribution)
-KERNEL_STRUCT_MEMBER(integrator, int, num_all_lights)
-KERNEL_STRUCT_MEMBER(integrator, float, pdf_triangles)
-KERNEL_STRUCT_MEMBER(integrator, float, pdf_lights)
-KERNEL_STRUCT_MEMBER(integrator, float, light_inv_rr_threshold)
-/* Bounces. */
-KERNEL_STRUCT_MEMBER(integrator, int, min_bounce)
-KERNEL_STRUCT_MEMBER(integrator, int, max_bounce)
-KERNEL_STRUCT_MEMBER(integrator, int, max_diffuse_bounce)
-KERNEL_STRUCT_MEMBER(integrator, int, max_glossy_bounce)
-KERNEL_STRUCT_MEMBER(integrator, int, max_transmission_bounce)
-KERNEL_STRUCT_MEMBER(integrator, int, max_volume_bounce)
-/* AO bounces. */
-KERNEL_STRUCT_MEMBER(integrator, int, ao_bounces)
-KERNEL_STRUCT_MEMBER(integrator, float, ao_bounces_distance)
-KERNEL_STRUCT_MEMBER(integrator, float, ao_bounces_factor)
-KERNEL_STRUCT_MEMBER(integrator, float, ao_additive_factor)
-/* Transparency. */
-KERNEL_STRUCT_MEMBER(integrator, int, transparent_min_bounce)
-KERNEL_STRUCT_MEMBER(integrator, int, transparent_max_bounce)
-KERNEL_STRUCT_MEMBER(integrator, int, transparent_shadows)
-/* Caustics. */
-KERNEL_STRUCT_MEMBER(integrator, int, caustics_reflective)
-KERNEL_STRUCT_MEMBER(integrator, int, caustics_refractive)
-KERNEL_STRUCT_MEMBER(integrator, float, filter_glossy)
-/* Seed. */
-KERNEL_STRUCT_MEMBER(integrator, int, seed)
-/* Clamp. */
-KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_direct)
-KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_indirect)
-/* MIS. */
-KERNEL_STRUCT_MEMBER(integrator, int, use_lamp_mis)
-/* Caustics. */
-KERNEL_STRUCT_MEMBER(integrator, int, use_caustics)
-/* Sampling pattern. */
-KERNEL_STRUCT_MEMBER(integrator, int, sampling_pattern)
-KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance)
-/* Volume render. */
-KERNEL_STRUCT_MEMBER(integrator, int, use_volumes)
-KERNEL_STRUCT_MEMBER(integrator, int, volume_max_steps)
-KERNEL_STRUCT_MEMBER(integrator, float, volume_step_rate)
-/* Shadow catcher. */
-KERNEL_STRUCT_MEMBER(integrator, int, has_shadow_catcher)
-/* Closure filter. */
-KERNEL_STRUCT_MEMBER(integrator, int, filter_closures)
-/* MIS debugging. */
-KERNEL_STRUCT_MEMBER(integrator, int, direct_light_sampling_type)
-/* Padding */
-KERNEL_STRUCT_MEMBER(integrator, int, pad1)
-KERNEL_STRUCT_END(KernelIntegrator)
-
-/* SVM. For shader specialization. */
-
-KERNEL_STRUCT_BEGIN(KernelSVMUsage, svm_usage)
-#define SHADER_NODE_TYPE(type) KERNEL_STRUCT_MEMBER(svm_usage, int, type)
-#include "kernel/svm/node_types_template.h"
-KERNEL_STRUCT_END(KernelSVMUsage)
-
-#undef KERNEL_STRUCT_BEGIN
-#undef KERNEL_STRUCT_MEMBER
-#undef KERNEL_STRUCT_END
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -246,7 +246,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
 ccl_gpu_kernel_postfix

 #if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
-constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]];
+constant int __dummy_constant [[function_constant(0)]];
 #endif

 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
--- a/intern/cycles/kernel/device/metal/function_constants.h
+++ b/intern/cycles/kernel/device/metal/function_constants.h
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2021-2022 Blender Foundation */
-
-enum {
-  Kernel_DummyConstant,
-#define KERNEL_STRUCT_MEMBER(parent, type, name) KernelData_##parent##_##name,
-#include "kernel/data_template.h"
-};
-
-#ifdef __KERNEL_METAL__
-#  define KERNEL_STRUCT_MEMBER(parent, type, name) \
-    constant type kernel_data_##parent##_##name \
-        [[function_constant(KernelData_##parent##_##name)]];
-#  include "kernel/data_template.h"
-#endif
--- a/intern/cycles/kernel/device/metal/kernel.metal
+++ b/intern/cycles/kernel/device/metal/kernel.metal
@@ -5,7 +5,6 @@

 #include "kernel/device/metal/compat.h"
 #include "kernel/device/metal/globals.h"
-#include "kernel/device/metal/function_constants.h"
 #include "kernel/device/gpu/kernel.h"

 /* MetalRT intersection handlers */
@@ -410,7 +409,6 @@ void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
                                const float3 ray_origin,
                                const float3 ray_direction,
                                float time,
-                                const float ray_tmin,
                                const float ray_tmax,
                                thread BoundingBoxIntersectionResult &result)
 {
@@ -435,7 +433,7 @@ void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
    isect.t *= len;

  MetalKernelContext context(launch_params_metal);
-  if (context.curve_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
+  if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
    result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
                  launch_params_metal, payload, object, prim, isect.u);
    if (result.accept) {
@@ -457,7 +455,6 @@ void metalrt_intersection_curve_shadow(constant KernelParamsMetal &launch_params
                                       const float3 ray_origin,
                                       const float3 ray_direction,
                                       float time,
-                                       const float ray_tmin,
                                       const float ray_tmax,
                                       thread BoundingBoxIntersectionResult &result)
 {
@@ -477,7 +474,7 @@ void metalrt_intersection_curve_shadow(constant KernelParamsMetal &launch_params
    isect.t *= len;

  MetalKernelContext context(launch_params_metal);
-  if (context.curve_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
+  if (context.curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
    result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
                launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
    result.accept = !result.continue_search;
@@ -496,7 +493,6 @@ __intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[b
                             const uint primitive_id [[primitive_id]],
                             const float3 ray_origin [[origin]],
                             const float3 ray_direction [[direction]],
-                             const float ray_tmin [[min_distance]],
                             const float ray_tmax [[max_distance]])
 {
  uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
@@ -514,7 +510,7 @@ __intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[b
 #  else
                               0.0f,
 #  endif
-                               ray_tmin, ray_tmax, result);
+                               ray_tmax, result);
  }

  return result;
@@ -528,7 +524,6 @@ __intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_me
                                    const uint primitive_id [[primitive_id]],
                                    const float3 ray_origin [[origin]],
                                    const float3 ray_direction [[direction]],
-                                    const float ray_tmin [[min_distance]],
                                    const float ray_tmax [[max_distance]])
 {
  uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
@@ -546,7 +541,7 @@ __intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_me
 #  else
                               0.0f,
 #  endif
-                               ray_tmin, ray_tmax, result);
+                               ray_tmax, result);
  }

  return result;
@@ -560,7 +555,6 @@ __intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buff
                          const uint primitive_id [[primitive_id]],
                          const float3 ray_origin [[origin]],
                          const float3 ray_direction [[direction]],
-                          const float ray_tmin [[min_distance]],
                          const float ray_tmax [[max_distance]])
 {
  uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
@@ -576,7 +570,7 @@ __intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buff
 #  else
                             0.0f,
 #  endif
-                             ray_tmin, ray_tmax, result);
+                             ray_tmax, result);

  return result;
 }
@@ -589,7 +583,6 @@ __intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal
                                 const uint primitive_id [[primitive_id]],
                                 const float3 ray_origin [[origin]],
                                 const float3 ray_direction [[direction]],
-                                 const float ray_tmin [[min_distance]],
                                 const float ray_tmax [[max_distance]])
 {
  uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
@@ -606,7 +599,7 @@ __intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal
 #  else
                             0.0f,
 #  endif
-                             ray_tmin, ray_tmax, result);
+                             ray_tmax, result);

  return result;
 }
@@ -622,7 +615,6 @@ void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal,
                                const float3 ray_origin,
                                const float3 ray_direction,
                                float time,
-                                const float ray_tmin,
                                const float ray_tmax,
                                thread BoundingBoxIntersectionResult &result)
 {
@@ -647,7 +639,7 @@ void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal,
    isect.t *= len;

  MetalKernelContext context(launch_params_metal);
-  if (context.point_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
+  if (context.point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
    result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
                  launch_params_metal, payload, object, prim, isect.u);
    if (result.accept) {
@@ -669,7 +661,6 @@ void metalrt_intersection_point_shadow(constant KernelParamsMetal &launch_params
                                       const float3 ray_origin,
                                       const float3 ray_direction,
                                       float time,
-                                       const float ray_tmin,
                                       const float ray_tmax,
                                       thread BoundingBoxIntersectionResult &result)
 {
@@ -689,7 +680,7 @@ void metalrt_intersection_point_shadow(constant KernelParamsMetal &launch_params
    isect.t *= len;

  MetalKernelContext context(launch_params_metal);
-  if (context.point_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
+  if (context.point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
    result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
                launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
    result.accept = !result.continue_search;
@@ -708,7 +699,6 @@ __intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1
                             const uint primitive_id [[primitive_id]],
                             const float3 ray_origin [[origin]],
                             const float3 ray_direction [[direction]],
-                             const float ray_tmin [[min_distance]],
                             const float ray_tmax [[max_distance]])
 {
  const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
@@ -725,7 +715,7 @@ __intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1
 #  else
                             0.0f,
 #  endif
-                             ray_tmin, ray_tmax, result);
+                             ray_tmax, result);

  return result;
 }
@@ -738,7 +728,6 @@ __intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[b
                                    const uint primitive_id [[primitive_id]],
                                    const float3 ray_origin [[origin]],
                                    const float3 ray_direction [[direction]],
-                                    const float ray_tmin [[min_distance]],
                                    const float ray_tmax [[max_distance]])
 {
  const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
@@ -755,7 +744,7 @@ __intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[b
 #  else
                             0.0f,
 #  endif
-                             ray_tmin, ray_tmax, result);
+                             ray_tmax, result);

  return result;
 }
--- a/intern/cycles/kernel/device/oneapi/kernel.cpp
+++ b/intern/cycles/kernel/device/oneapi/kernel.cpp
@@ -670,7 +670,7 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
 }

 static const int lowest_supported_driver_version_win = 1011660;
-static const int lowest_supported_driver_version_neo = 23570;
+static const int lowest_supported_driver_version_neo = 20066;

 static int parse_driver_build_version(const sycl::device &device)
 {
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -51,36 +51,32 @@ ccl_device_forceinline int get_object_id()
 extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest()
 {
  const int global_index = optixGetLaunchIndex().x;
-  const int path_index = (kernel_params.path_index_array) ?
-                             kernel_params.path_index_array[global_index] :
-                             global_index;
+  const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
+                                                       global_index;
  integrator_intersect_closest(nullptr, path_index, kernel_params.render_buffer);
 }

 extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow()
 {
  const int global_index = optixGetLaunchIndex().x;
-  const int path_index = (kernel_params.path_index_array) ?
-                             kernel_params.path_index_array[global_index] :
-                             global_index;
+  const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
+                                                       global_index;
  integrator_intersect_shadow(nullptr, path_index);
 }

 extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_subsurface()
 {
  const int global_index = optixGetLaunchIndex().x;
-  const int path_index = (kernel_params.path_index_array) ?
-                             kernel_params.path_index_array[global_index] :
-                             global_index;
+  const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
+                                                       global_index;
  integrator_intersect_subsurface(nullptr, path_index);
 }

 extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_volume_stack()
 {
  const int global_index = optixGetLaunchIndex().x;
-  const int path_index = (kernel_params.path_index_array) ?
-                             kernel_params.path_index_array[global_index] :
-                             global_index;
+  const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
+                                                       global_index;
  integrator_intersect_volume_stack(nullptr, path_index);
 }

@@ -412,7 +408,6 @@ ccl_device_inline void optix_intersection_curve(const int prim, const int type)

  float3 P = optixGetObjectRayOrigin();
  float3 dir = optixGetObjectRayDirection();
-  float tmin = optixGetRayTmin();

  /* The direction is not normalized by default, but the curve intersection routine expects that */
  float len;
@@ -430,7 +425,7 @@ ccl_device_inline void optix_intersection_curve(const int prim, const int type)
  if (isect.t != FLT_MAX)
    isect.t *= len;

-  if (curve_intersect(NULL, &isect, P, dir, tmin, isect.t, object, prim, time, type)) {
+  if (curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
    static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
    optixReportIntersection(isect.t / len,
                            type & PRIMITIVE_ALL,
@@ -467,7 +462,6 @@ extern "C" __global__ void __intersection__point()

  float3 P = optixGetObjectRayOrigin();
  float3 dir = optixGetObjectRayDirection();
-  float tmin = optixGetRayTmin();

  /* The direction is not normalized by default, the point intersection routine expects that. */
  float len;
@@ -486,7 +480,7 @@ extern "C" __global__ void __intersection__point()
    isect.t *= len;
  }

-  if (point_intersect(NULL, &isect, P, dir, tmin, isect.t, object, prim, time, type)) {
+  if (point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
    static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
    optixReportIntersection(isect.t / len, type & PRIMITIVE_ALL);
  }
--- a/intern/cycles/kernel/geom/curve_intersect.h
+++ b/intern/cycles/kernel/geom/curve_intersect.h
@@ -156,8 +156,7 @@ ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, co
 }

 ccl_device bool curve_intersect_iterative(const float3 ray_dir,
-                                          const float ray_tmin,
-                                          ccl_private float *ray_tmax,
+                                          ccl_private float *ray_tfar,
                                          const float dt,
                                          const float4 curve[4],
                                          float u,
@@ -221,7 +220,7 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,

    if (fabsf(f) < f_err && fabsf(g) < g_err) {
      t += dt;
-      if (!(t >= ray_tmin && t <= *ray_tmax)) {
+      if (!(0.0f <= t && t <= *ray_tfar)) {
        return false; /* Rejects NaNs */
      }
      if (!(u >= 0.0f && u <= 1.0f)) {
@@ -238,7 +237,7 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
      }

      /* Record intersection. */
-      *ray_tmax = t;
+      *ray_tfar = t;
      isect->t = t;
      isect->u = u;
      isect->v = 0.0f;
@@ -251,8 +250,7 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,

 ccl_device bool curve_intersect_recursive(const float3 ray_orig,
                                          const float3 ray_dir,
-                                          const float ray_tmin,
-                                          float ray_tmax,
+                                          float ray_tfar,
                                          float4 curve[4],
                                          ccl_private Intersection *isect)
 {
@@ -333,7 +331,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
      }

      /* Intersect with cap-planes. */
-      float2 tp = make_float2(ray_tmin - dt, ray_tmax - dt);
+      float2 tp = make_float2(-dt, ray_tfar - dt);
      tp = make_float2(max(tp.x, tc_outer.x), min(tp.y, tc_outer.y));
      const float2 h0 = half_plane_intersect(
          float4_to_float3(P0), float4_to_float3(dP0du), ray_dir);
@@ -396,20 +394,19 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
                                          CURVE_NUM_BEZIER_SUBDIVISIONS;
        if (depth >= termDepth) {
          found |= curve_intersect_iterative(
-              ray_dir, ray_tmin, &ray_tmax, dt, curve, u_outer0, tp0.x, use_backfacing, isect);
+              ray_dir, &ray_tfar, dt, curve, u_outer0, tp0.x, use_backfacing, isect);
        }
        else {
          recurse = true;
        }
      }

-      const float t1 = tp1.x + dt;
-      if (valid1 && (t1 >= ray_tmin && t1 <= ray_tmax)) {
+      if (valid1 && (tp1.x + dt <= ray_tfar)) {
        const int termDepth = unstable1 ? CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE :
                                          CURVE_NUM_BEZIER_SUBDIVISIONS;
        if (depth >= termDepth) {
          found |= curve_intersect_iterative(
-              ray_dir, ray_tmin, &ray_tmax, dt, curve, u_outer1, tp1.y, use_backfacing, isect);
+              ray_dir, &ray_tfar, dt, curve, u_outer1, tp1.y, use_backfacing, isect);
        }
        else {
          recurse = true;
@@ -459,8 +456,7 @@ ccl_device_inline bool cylinder_culling_test(const float2 p1, const float2 p2, c
 * v0,v1,v3 and v2,v3,v1. The edge v1,v2 decides which of the two
 * triangles gets intersected.
 */
-ccl_device_inline bool ribbon_intersect_quad(const float ray_tmin,
-                                             const float ray_tmax,
+ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar,
                                             const float3 quad_v0,
                                             const float3 quad_v1,
                                             const float3 quad_v2,
@@ -501,7 +497,7 @@ ccl_device_inline bool ribbon_intersect_quad(const float ray_tmin,

  /* Perform depth test? */
  const float t = rcpDen * dot(v0, Ng);
-  if (!(t >= ray_tmin && t <= ray_tmax)) {
+  if (!(0.0f <= t && t <= ray_tfar)) {
    return false;
  }

@@ -538,8 +534,7 @@ ccl_device_inline float4 ribbon_to_ray_space(const float3 ray_space[3],

 ccl_device_inline bool ribbon_intersect(const float3 ray_org,
                                        const float3 ray_dir,
-                                        const float ray_tmin,
-                                        float ray_tmax,
+                                        float ray_tfar,
                                        const int N,
                                        float4 curve[4],
                                        ccl_private Intersection *isect)
@@ -587,7 +582,7 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,

      /* Intersect quad. */
      float vu, vv, vt;
-      bool valid0 = ribbon_intersect_quad(ray_tmin, ray_tmax, lp0, lp1, up1, up0, &vu, &vv, &vt);
+      bool valid0 = ribbon_intersect_quad(ray_tfar, lp0, lp1, up1, up0, &vu, &vv, &vt);

      if (valid0) {
        /* ignore self intersections */
@@ -601,7 +596,7 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,
          vv = 2.0f * vv - 1.0f;

          /* Record intersection. */
-          ray_tmax = vt;
+          ray_tfar = vt;
          isect->t = vt;
          isect->u = u + vu * step_size;
          isect->v = vv;
@@ -621,7 +616,6 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
                                            ccl_private Intersection *isect,
                                            const float3 P,
                                            const float3 dir,
-                                            const float tmin,
                                            const float tmax,
                                            int object,
                                            int prim,
@@ -651,7 +645,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
  if (type & PRIMITIVE_CURVE_RIBBON) {
    /* todo: adaptive number of subdivisions could help performance here. */
    const int subdivisions = kernel_data.bvh.curve_subdivisions;
-    if (ribbon_intersect(P, dir, tmin, tmax, subdivisions, curve, isect)) {
+    if (ribbon_intersect(P, dir, tmax, subdivisions, curve, isect)) {
      isect->prim = prim;
      isect->object = object;
      isect->type = type;
@@ -661,7 +655,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
    return false;
  }
  else {
-    if (curve_intersect_recursive(P, dir, tmin, tmax, curve, isect)) {
+    if (curve_intersect_recursive(P, dir, tmax, curve, isect)) {
      isect->prim = prim;
      isect->object = object;
      isect->type = type;
--- a/intern/cycles/kernel/geom/motion_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/motion_triangle_intersect.h
@@ -46,7 +46,6 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg,
                                                 ccl_private Intersection *isect,
                                                 float3 P,
                                                 float3 dir,
-                                                 float tmin,
                                                 float tmax,
                                                 float time,
                                                 uint visibility,
@@ -59,7 +58,7 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg,
  motion_triangle_vertices(kg, object, prim, time, verts);
  /* Ray-triangle intersection, unoptimized. */
  float t, u, v;
-  if (ray_triangle_intersect(P, dir, tmin, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) {
+  if (ray_triangle_intersect(P, dir, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) {
 #ifdef __VISIBILITY_FLAG__
    /* Visibility flag test. we do it here under the assumption
     * that most triangles are culled by node flags.
@@ -93,7 +92,6 @@ ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg,
                                                       int object,
                                                       int prim,
                                                       int prim_addr,
-                                                       float tmin,
                                                       float tmax,
                                                       ccl_private uint *lcg_state,
                                                       int max_hits)
@@ -103,7 +101,7 @@ ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg,
  motion_triangle_vertices(kg, object, prim, time, verts);
  /* Ray-triangle intersection, unoptimized. */
  float t, u, v;
-  if (!ray_triangle_intersect(P, dir, tmin, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) {
+  if (!ray_triangle_intersect(P, dir, tmax, verts[0], verts[1], verts[2], &u, &v, &t)) {
    return false;
  }

--- a/intern/cycles/kernel/geom/point_intersect.h
+++ b/intern/cycles/kernel/geom/point_intersect.h
@@ -9,12 +9,8 @@ CCL_NAMESPACE_BEGIN

 #ifdef __POINTCLOUD__

-ccl_device_forceinline bool point_intersect_test(const float4 point,
-                                                 const float3 P,
-                                                 const float3 dir,
-                                                 const float tmin,
-                                                 const float tmax,
-                                                 ccl_private float *t)
+ccl_device_forceinline bool point_intersect_test(
+    const float4 point, const float3 P, const float3 dir, const float tmax, ccl_private float *t)
 {
  const float3 center = float4_to_float3(point);
  const float radius = point.w;
@@ -32,12 +28,12 @@ ccl_device_forceinline bool point_intersect_test(const float4 point,

  const float td = sqrt((r2 - l2) * rd2);
  const float t_front = projC0 - td;
-  const bool valid_front = (tmin <= t_front) & (t_front <= tmax);
+  const bool valid_front = (0.0f <= t_front) & (t_front <= tmax);

  /* Always back-face culling for now. */
 #  if 0
  const float t_back = projC0 + td;
-  const bool valid_back = (tmin <= t_back) & (t_back <= tmax);
+  const bool valid_back = (0.0f <= t_back) & (t_back <= tmax);

  /* check if there is a first hit */
  const bool valid_first = valid_front | valid_back;
@@ -60,7 +56,6 @@ ccl_device_forceinline bool point_intersect(KernelGlobals kg,
                                            ccl_private Intersection *isect,
                                            const float3 P,
                                            const float3 dir,
-                                            const float tmin,
                                            const float tmax,
                                            const int object,
                                            const int prim,
@@ -70,7 +65,7 @@ ccl_device_forceinline bool point_intersect(KernelGlobals kg,
  const float4 point = (type & PRIMITIVE_MOTION) ? motion_point(kg, object, prim, time) :
                                                   kernel_data_fetch(points, prim);

-  if (!point_intersect_test(point, P, dir, tmin, tmax, &isect->t)) {
+  if (!point_intersect_test(point, P, dir, tmax, &isect->t)) {
    return false;
  }

--- a/intern/cycles/kernel/geom/shader_data.h
+++ b/intern/cycles/kernel/geom/shader_data.h
@@ -407,7 +407,7 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals kg,
 {

  /* vectors */
-  sd->P = ray->P + ray->D * ray->tmin;
+  sd->P = ray->P;
  sd->N = -ray->D;
  sd->Ng = -ray->D;
  sd->I = -ray->D;
@@ -441,6 +441,7 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals kg,

  /* for NDC coordinates */
  sd->ray_P = ray->P;
+  sd->ray_dP = ray->dP;
 }
 #endif /* __VOLUME__ */

--- a/intern/cycles/kernel/geom/triangle_intersect.h
+++ b/intern/cycles/kernel/geom/triangle_intersect.h
@@ -17,7 +17,6 @@ ccl_device_inline bool triangle_intersect(KernelGlobals kg,
                                          ccl_private Intersection *isect,
                                          float3 P,
                                          float3 dir,
-                                          float tmin,
                                          float tmax,
                                          uint visibility,
                                          int object,
@@ -29,7 +28,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals kg,
               tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1),
               tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
  float t, u, v;
-  if (ray_triangle_intersect(P, dir, tmin, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) {
+  if (ray_triangle_intersect(P, dir, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) {
 #ifdef __VISIBILITY_FLAG__
    /* Visibility flag test. we do it here under the assumption
     * that most triangles are culled by node flags.
@@ -63,7 +62,6 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg,
                                                int object,
                                                int prim,
                                                int prim_addr,
-                                                float tmin,
                                                float tmax,
                                                ccl_private uint *lcg_state,
                                                int max_hits)
@@ -73,7 +71,7 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg,
               tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1),
               tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
  float t, u, v;
-  if (!ray_triangle_intersect(P, dir, tmin, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) {
+  if (!ray_triangle_intersect(P, dir, tmax, tri_a, tri_b, tri_c, &u, &v, &t)) {
    return false;
  }

--- a/intern/cycles/kernel/integrator/init_from_bake.h
+++ b/intern/cycles/kernel/integrator/init_from_bake.h
@@ -174,15 +174,14 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
    Ray ray ccl_optional_struct_init;
    ray.P = zero_float3();
    ray.D = normalize(P);
-    ray.tmin = 0.0f;
-    ray.tmax = FLT_MAX;
+    ray.t = FLT_MAX;
    ray.time = 0.5f;
    ray.dP = differential_zero_compact();
    ray.dD = differential_zero_compact();
    integrator_state_write_ray(kg, state, &ray);

    /* Setup next kernel to execute. */
-    integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+    INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
  }
  else {
    /* Surface baking. */
@@ -211,8 +210,7 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
    Ray ray ccl_optional_struct_init;
    ray.P = P + N;
    ray.D = -N;
-    ray.tmin = 0.0f;
-    ray.tmax = FLT_MAX;
+    ray.t = FLT_MAX;
    ray.time = 0.5f;

    /* Setup differentials. */
@@ -249,15 +247,13 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
    const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);

    if (use_caustics) {
-      integrator_path_init_sorted(
-          kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index);
+      INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index);
    }
    else if (use_raytrace_kernel) {
-      integrator_path_init_sorted(
-          kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
+      INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
    }
    else {
-      integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index);
+      INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index);
    }
  }

--- a/intern/cycles/kernel/integrator/init_from_camera.h
+++ b/intern/cycles/kernel/integrator/init_from_camera.h
@@ -86,7 +86,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg,
    /* Generate camera ray. */
    Ray ray;
    integrate_camera_sample(kg, sample, x, y, rng_hash, &ray);
-    if (ray.tmax == 0.0f) {
+    if (ray.t == 0.0f) {
      return true;
    }

@@ -100,10 +100,10 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg,
  /* Continue with intersect_closest kernel, optionally initializing volume
   * stack before that if the camera may be inside a volume. */
  if (kernel_data.cam.is_inside_volume) {
-    integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
+    INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
  }
  else {
-    integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+    INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
  }

  return true;
--- a/intern/cycles/kernel/integrator/intersect_closest.h
+++ b/intern/cycles/kernel/integrator/intersect_closest.h
@@ -109,14 +109,14 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
    /* If using background pass, schedule background shading kernel so that we have a background
     * to alpha-over on. The background kernel will then continue the path afterwards. */
    INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
-    integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+    INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
    return;
  }

  if (!integrator_state_volume_stack_is_empty(kg, state)) {
    /* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher
     * objects from it, and then continue shading volume and shadow catcher surface after. */
-    integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
+    INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
    return;
  }

@@ -128,19 +128,18 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
  const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);

  if (use_caustics) {
-    integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
+    INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
  }
  else if (use_raytrace_kernel) {
-    integrator_path_init_sorted(
-        kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+    INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
  }
  else {
-    integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+    INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
  }
 }

 /* Schedule next kernel to be executed after updating volume stack for shadow catcher. */
-template<DeviceKernel current_kernel>
+template<uint32_t current_kernel>
 ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_volume(
    KernelGlobals kg, IntegratorState state)
 {
@@ -157,21 +156,20 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
  const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);

  if (use_caustics) {
-    integrator_path_next_sorted(
-        kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
+    INTEGRATOR_PATH_NEXT_SORTED(
+        current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
  }
  else if (use_raytrace_kernel) {
-    integrator_path_next_sorted(
-        kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+    INTEGRATOR_PATH_NEXT_SORTED(
+        current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
  }
  else {
-    integrator_path_next_sorted(
-        kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+    INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
  }
 }

 /* Schedule next kernel to be executed after executing background shader for shadow catcher. */
-template<DeviceKernel current_kernel>
+template<uint32_t current_kernel>
 ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_background(
    KernelGlobals kg, IntegratorState state)
 {
@@ -179,8 +177,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
  if (!integrator_state_volume_stack_is_empty(kg, state)) {
    /* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher
     * objects from it, and then continue shading volume and shadow catcher surface after. */
-    integrator_path_next(
-        kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
+    INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
    return;
  }

@@ -193,7 +190,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
 *
 * Note that current_kernel is a template value since making this a variable
 * leads to poor performance with CUDA atomics. */
-template<DeviceKernel current_kernel>
+template<uint32_t current_kernel>
 ccl_device_forceinline void integrator_intersect_next_kernel(
    KernelGlobals kg,
    IntegratorState state,
@@ -209,10 +206,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
    const int flags = (hit_surface) ? kernel_data_fetch(shaders, shader).flags : 0;

    if (!integrator_intersect_terminate(kg, state, flags)) {
-      integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
+      INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
    }
    else {
-      integrator_path_terminate(kg, state, current_kernel);
+      INTEGRATOR_PATH_TERMINATE(current_kernel);
    }
    return;
  }
@@ -221,7 +218,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
  if (hit) {
    /* Hit a surface, continue with light or surface kernel. */
    if (isect->type & PRIMITIVE_LAMP) {
-      integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
+      INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
    }
    else {
      /* Hit a surface, continue with surface kernel unless terminated. */
@@ -234,16 +231,16 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
                                  (object_flags & SD_OBJECT_CAUSTICS);
        const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
        if (use_caustics) {
-          integrator_path_next_sorted(
-              kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
+          INTEGRATOR_PATH_NEXT_SORTED(
+              current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
        }
        else if (use_raytrace_kernel) {
-          integrator_path_next_sorted(
-              kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+          INTEGRATOR_PATH_NEXT_SORTED(
+              current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
        }
        else {
-          integrator_path_next_sorted(
-              kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+          INTEGRATOR_PATH_NEXT_SORTED(
+              current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
        }

 #ifdef __SHADOW_CATCHER__
@@ -252,13 +249,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
 #endif
      }
      else {
-        integrator_path_terminate(kg, state, current_kernel);
+        INTEGRATOR_PATH_TERMINATE(current_kernel);
      }
    }
  }
  else {
    /* Nothing hit, continue with background kernel. */
-    integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+    INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
  }
 }

@@ -266,7 +263,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
 *
 * The logic here matches integrator_intersect_next_kernel, except that
 * volume shading and termination testing have already been done. */
-template<DeviceKernel current_kernel>
+template<uint32_t current_kernel>
 ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
    KernelGlobals kg,
    IntegratorState state,
@@ -276,7 +273,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
  if (isect->prim != PRIM_NONE) {
    /* Hit a surface, continue with light or surface kernel. */
    if (isect->type & PRIMITIVE_LAMP) {
-      integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
+      INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
      return;
    }
    else {
@@ -289,16 +286,16 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
      const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);

      if (use_caustics) {
-        integrator_path_next_sorted(
-            kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
+        INTEGRATOR_PATH_NEXT_SORTED(
+            current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
      }
      else if (use_raytrace_kernel) {
-        integrator_path_next_sorted(
-            kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+        INTEGRATOR_PATH_NEXT_SORTED(
+            current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
      }
      else {
-        integrator_path_next_sorted(
-            kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+        INTEGRATOR_PATH_NEXT_SORTED(
+            current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
      }

 #ifdef __SHADOW_CATCHER__
@@ -310,7 +307,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
  }
  else {
    /* Nothing hit, continue with background kernel. */
-    integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+    INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
    return;
  }
 }
@@ -324,7 +321,7 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg,
  /* Read ray from integrator state into local memory. */
  Ray ray ccl_optional_struct_init;
  integrator_state_read_ray(kg, state, &ray);
-  kernel_assert(ray.tmax != 0.0f);
+  kernel_assert(ray.t != 0.0f);

  const uint visibility = path_state_ray_visibility(state);
  const int last_isect_prim = INTEGRATOR_STATE(state, isect, prim);
@@ -332,12 +329,12 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg,

  /* Trick to use short AO rays to approximate indirect light at the end of the path. */
  if (path_state_ao_bounce(kg, state)) {
-    ray.tmax = kernel_data.integrator.ao_bounces_distance;
+    ray.t = kernel_data.integrator.ao_bounces_distance;

    if (last_isect_object != OBJECT_NONE) {
      const float object_ao_distance = kernel_data_fetch(objects, last_isect_object).ao_distance;
      if (object_ao_distance != 0.0f) {
-        ray.tmax = object_ao_distance;
+        ray.t = object_ao_distance;
      }
    }
  }
--- a/intern/cycles/kernel/integrator/intersect_shadow.h
+++ b/intern/cycles/kernel/integrator/intersect_shadow.h
@@ -162,7 +162,7 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt

  if (opaque_hit) {
    /* Hit an opaque surface, shadow path ends here. */
-    integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
+    INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
    return;
  }
  else {
@@ -171,9 +171,7 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt
     *
     * TODO: could also write to render buffer directly if no transparent shadows?
     * Could save a kernel execution for the common case. */
-    integrator_shadow_path_next(kg,
-                                state,
-                                DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW,
+    INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW,
                                DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
    return;
  }
--- a/intern/cycles/kernel/integrator/intersect_subsurface.h
+++ b/intern/cycles/kernel/integrator/intersect_subsurface.h
@@ -17,7 +17,7 @@ ccl_device void integrator_intersect_subsurface(KernelGlobals kg, IntegratorStat
  }
 #endif

-  integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
+  INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/integrator/intersect_volume_stack.h
+++ b/intern/cycles/kernel/integrator/intersect_volume_stack.h
@@ -24,8 +24,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,

  Ray volume_ray ccl_optional_struct_init;
  volume_ray.P = from_P;
-  volume_ray.D = normalize_len(to_P - from_P, &volume_ray.tmax);
-  volume_ray.tmin = 0.0f;
+  volume_ray.D = normalize_len(to_P - from_P, &volume_ray.t);
  volume_ray.self.object = INTEGRATOR_STATE(state, isect, object);
  volume_ray.self.prim = INTEGRATOR_STATE(state, isect, prim);
  volume_ray.self.light_object = OBJECT_NONE;
@@ -59,9 +58,12 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
    volume_stack_enter_exit(kg, state, stack_sd);

    /* Move ray forward. */
-    volume_ray.tmin = intersection_t_offset(isect.t);
+    volume_ray.P = stack_sd->P;
    volume_ray.self.object = isect.object;
    volume_ray.self.prim = isect.prim;
+    if (volume_ray.t != FLT_MAX) {
+      volume_ray.D = normalize_len(to_P - volume_ray.P, &volume_ray.t);
+    }
    ++step;
  }
 #endif
@@ -80,8 +82,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
  /* Trace ray in random direction. Any direction works, Z up is a guess to get the
   * fewest hits. */
  volume_ray.D = make_float3(0.0f, 0.0f, 1.0f);
-  volume_ray.tmin = 0.0f;
-  volume_ray.tmax = FLT_MAX;
+  volume_ray.t = FLT_MAX;
  volume_ray.self.object = OBJECT_NONE;
  volume_ray.self.prim = PRIM_NONE;
  volume_ray.self.light_object = OBJECT_NONE;
@@ -198,7 +199,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
    }

    /* Move ray forward. */
-    volume_ray.tmin = intersection_t_offset(isect.t);
+    volume_ray.P = stack_sd->P;
    volume_ray.self.object = isect.object;
    volume_ray.self.prim = isect.prim;
    ++step;
@@ -221,9 +222,7 @@ ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorSt
  }
  else {
    /* Volume stack init for camera rays, continue with intersection of camera ray. */
-    integrator_path_next(kg,
-                         state,
-                         DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
+    INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
                         DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
  }
 }
--- a/intern/cycles/kernel/integrator/mnee.h
+++ b/intern/cycles/kernel/integrator/mnee.h
@@ -137,14 +137,8 @@ ccl_device_forceinline void mnee_update_light_sample(KernelGlobals kg,
    }
  }
  else if (ls->type == LIGHT_AREA) {
-    float invarea = fabsf(klight->area.invarea);
    ls->D = normalize_len(ls->P - P, &ls->t);
-    ls->pdf = invarea;
-    if (klight->area.tan_spread > 0.f) {
-      ls->eval_fac = 0.25f * invarea;
-      ls->eval_fac *= light_spread_attenuation(
-          ls->D, ls->Ng, klight->area.tan_spread, klight->area.normalize_spread);
-    }
+    ls->pdf = fabsf(klight->area.invarea);
  }

  ls->pdf *= kernel_data.integrator.pdf_lights;
@@ -442,7 +436,6 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg,
  projection_ray.self.light_prim = PRIM_NONE;
  projection_ray.dP = differential_make_compact(sd->dP);
  projection_ray.dD = differential_zero_compact();
-  projection_ray.tmin = 0.0f;
  projection_ray.time = sd->time;
  Intersection projection_isect;

@@ -506,8 +499,8 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg,
        projection_ray.self.prim = pv.prim;
        projection_ray.P = pv.p;
      }
-      projection_ray.D = normalize_len(tentative_p - projection_ray.P, &projection_ray.tmax);
-      projection_ray.tmax *= MNEE_PROJECTION_DISTANCE_MULTIPLIER;
+      projection_ray.D = normalize_len(tentative_p - projection_ray.P, &projection_ray.t);
+      projection_ray.t *= MNEE_PROJECTION_DISTANCE_MULTIPLIER;

      bool projection_success = false;
      for (int isect_count = 0; isect_count < MNEE_MAX_INTERSECTION_COUNT; isect_count++) {
@@ -526,7 +519,8 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg,

        projection_ray.self.object = projection_isect.object;
        projection_ray.self.prim = projection_isect.prim;
-        projection_ray.tmin = intersection_t_offset(projection_isect.t);
+        projection_ray.P += projection_isect.t * projection_ray.D;
+        projection_ray.t -= projection_isect.t;
      }
      if (!projection_success) {
        reduce_stepsize = true;
@@ -858,7 +852,6 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
  Ray probe_ray;
  probe_ray.self.light_object = ls->object;
  probe_ray.self.light_prim = ls->prim;
-  probe_ray.tmin = 0.0f;
  probe_ray.dP = differential_make_compact(sd->dP);
  probe_ray.dD = differential_zero_compact();
  probe_ray.time = sd->time;
@@ -874,13 +867,13 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
    ccl_private const ManifoldVertex &v = vertices[vi];

    /* Check visibility. */
-    probe_ray.D = normalize_len(v.p - probe_ray.P, &probe_ray.tmax);
+    probe_ray.D = normalize_len(v.p - probe_ray.P, &probe_ray.t);
    if (scene_intersect(kg, &probe_ray, PATH_RAY_TRANSMIT, &probe_isect)) {
      int hit_object = (probe_isect.object == OBJECT_NONE) ?
                           kernel_data_fetch(prim_object, probe_isect.prim) :
                           probe_isect.object;
      /* Test whether the ray hit the appropriate object at its intended location. */
-      if (hit_object != v.object || fabsf(probe_ray.tmax - probe_isect.t) > MNEE_MIN_DISTANCE)
+      if (hit_object != v.object || fabsf(probe_ray.t - probe_isect.t) > MNEE_MIN_DISTANCE)
        return false;
    }
    probe_ray.self.object = v.object;
@@ -959,16 +952,15 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg,
  probe_ray.self.light_object = ls->object;
  probe_ray.self.light_prim = ls->prim;
  probe_ray.P = sd->P;
-  probe_ray.tmin = 0.0f;
  if (ls->t == FLT_MAX) {
    /* Distant / env light. */
    probe_ray.D = ls->D;
-    probe_ray.tmax = ls->t;
+    probe_ray.t = ls->t;
  }
  else {
    /* Other lights, avoid self-intersection. */
    probe_ray.D = ls->P - probe_ray.P;
-    probe_ray.D = normalize_len(probe_ray.D, &probe_ray.tmax);
+    probe_ray.D = normalize_len(probe_ray.D, &probe_ray.t);
  }
  probe_ray.dP = differential_make_compact(sd->dP);
  probe_ray.dD = differential_zero_compact();
@@ -1050,7 +1042,9 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg,

    probe_ray.self.object = probe_isect.object;
    probe_ray.self.prim = probe_isect.prim;
-    probe_ray.tmin = intersection_t_offset(probe_isect.t);
+    probe_ray.P += probe_isect.t * probe_ray.D;
+    if (ls->t != FLT_MAX)
+      probe_ray.t -= probe_isect.t;
  };

  /* Mark the manifold walk invalid to keep mollification on by default. */
--- a/intern/cycles/kernel/integrator/path_state.h
+++ b/intern/cycles/kernel/integrator/path_state.h
@@ -52,6 +52,7 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
  INTEGRATOR_STATE_WRITE(state, path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP |
                                              PATH_RAY_TRANSPARENT_BACKGROUND;
  INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = 0.0f;
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
  INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = FLT_MAX;
  INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = 1.0f;
  INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f);
--- a/intern/cycles/kernel/integrator/shade_background.h
+++ b/intern/cycles/kernel/integrator/shade_background.h
@@ -62,10 +62,11 @@ ccl_device float3 integrator_eval_background_shader(KernelGlobals kg,
    const float3 ray_P = INTEGRATOR_STATE(state, ray, P);
    const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
    const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
+    const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);

    /* multiple importance sampling, get background light pdf for ray
     * direction, and compute weight with respect to BSDF pdf */
-    const float pdf = background_light_pdf(kg, ray_P, ray_D);
+    const float pdf = background_light_pdf(kg, ray_P - ray_D * mis_ray_t, ray_D);
    const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf);
    L *= mis_weight;
  }
@@ -212,7 +213,7 @@ ccl_device void integrator_shade_background(KernelGlobals kg,
  }
 #endif

-  integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+  INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/integrator/shade_light.h
+++ b/intern/cycles/kernel/integrator/shade_light.h
@@ -22,8 +22,19 @@ ccl_device_inline void integrate_light(KernelGlobals kg,
  const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
  const float ray_time = INTEGRATOR_STATE(state, ray, time);

-  /* Advance ray to new start distance. */
-  INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(isect.t);
+  /* Advance ray beyond light. */
+  /* TODO: can we make this more numerically robust to avoid reintersecting the
+   * same light in some cases? Ray should not intersect surface anymore as the
+   * object and prim ids will prevent self intersection. */
+  const float3 new_ray_P = ray_P + ray_D * isect.t;
+  INTEGRATOR_STATE_WRITE(state, ray, P) = new_ray_P;
+  INTEGRATOR_STATE_WRITE(state, ray, t) -= isect.t;
+
+  /* Set position to where the BSDF was sampled, for correct MIS PDF. */
+  const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
+  ray_P -= ray_D * mis_ray_t;
+  isect.t += mis_ray_t;
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = isect.t;

  LightSample ls ccl_optional_struct_init;
  const bool use_light_sample = light_sample_from_intersection(kg, &isect, ray_P, ray_D, &ls);
@@ -88,13 +99,11 @@ ccl_device void integrator_shade_light(KernelGlobals kg,
  INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce;

  if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
-    integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
+    INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
    return;
  }
  else {
-    integrator_path_next(kg,
-                         state,
-                         DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
+    INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
                         DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
    return;
  }
--- a/intern/cycles/kernel/integrator/shade_shadow.h
+++ b/intern/cycles/kernel/integrator/shade_shadow.h
@@ -75,9 +75,13 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg,
  ray.self.light_object = OBJECT_NONE;
  ray.self.light_prim = PRIM_NONE;
  /* Modify ray position and length to match current segment. */
-  ray.tmin = (hit == 0) ? ray.tmin : INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t);
-  ray.tmax = (hit < num_recorded_hits) ? INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) :
-                                         ray.tmax;
+  const float start_t = (hit == 0) ? 0.0f :
+                                     INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t);
+  const float end_t = (hit < num_recorded_hits) ?
+                          INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) :
+                          ray.t;
+  ray.P += start_t * ray.D;
+  ray.t = end_t - start_t;

  shader_setup_from_volume(kg, shadow_sd, &ray);

@@ -133,7 +137,10 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg,
    /* There are more hits that we could not recorded due to memory usage,
     * adjust ray to intersect again from the last hit. */
    const float last_hit_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, num_recorded_hits - 1, t);
-    INTEGRATOR_STATE_WRITE(state, shadow_ray, tmin) = intersection_t_offset(last_hit_t);
+    const float3 ray_P = INTEGRATOR_STATE(state, shadow_ray, P);
+    const float3 ray_D = INTEGRATOR_STATE(state, shadow_ray, D);
+    INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray_P + last_hit_t * ray_D;
+    INTEGRATOR_STATE_WRITE(state, shadow_ray, t) -= last_hit_t;
  }

  return false;
@@ -151,22 +158,20 @@ ccl_device void integrator_shade_shadow(KernelGlobals kg,
  /* Evaluate transparent shadows. */
  const bool opaque = integrate_transparent_shadow(kg, state, num_hits);
  if (opaque) {
-    integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
+    INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
    return;
  }
 #endif

  if (shadow_intersections_has_remaining(num_hits)) {
    /* More intersections to find, continue shadow ray. */
-    integrator_shadow_path_next(kg,
-                                state,
-                                DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
+    INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
                                DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
    return;
  }
  else {
    kernel_accum_light(kg, state, render_buffer);
-    integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
+    INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
    return;
  }
 }
--- a/intern/cycles/kernel/integrator/shade_surface.h
+++ b/intern/cycles/kernel/integrator/shade_surface.h
@@ -77,7 +77,7 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
 #  endif
  {
    const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
-    const float t = sd->ray_length;
+    const float t = sd->ray_length + INTEGRATOR_STATE(state, path, mis_ray_t);

    /* Multiple importance sampling, get triangle light pdf,
     * and compute weight with respect to BSDF pdf. */
@@ -190,8 +190,8 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
  const bool is_light = light_sample_is_light(&ls);

  /* Branch off shadow kernel. */
-  IntegratorShadowState shadow_state = integrator_shadow_path_init(
-      kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false);
+  INTEGRATOR_SHADOW_PATH_INIT(
+      shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow);

  /* Copy volume stack and enter/exit volume. */
  integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
@@ -323,21 +323,16 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
    return LABEL_NONE;
  }

-  if (label & LABEL_TRANSPARENT) {
-    /* Only need to modify start distance for transparent. */
-    INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(sd->ray_length);
-  }
-  else {
-    /* Setup ray with changed origin and direction. */
-    INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
-    INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in);
-    INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
-    INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
+  /* Setup ray. Note that clipping works through transparent bounces. */
+  INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
+  INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in);
+  INTEGRATOR_STATE_WRITE(state, ray, t) = (label & LABEL_TRANSPARENT) ?
+                                              INTEGRATOR_STATE(state, ray, t) - sd->ray_length :
+                                              FLT_MAX;
 #ifdef __RAY_DIFFERENTIALS__
-    INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
-    INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in);
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
+  INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in);
 #endif
-  }

  /* Update throughput. */
  float3 throughput = INTEGRATOR_STATE(state, path, throughput);
@@ -354,8 +349,12 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
  }

  /* Update path state */
-  if (!(label & LABEL_TRANSPARENT)) {
+  if (label & LABEL_TRANSPARENT) {
+    INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
+  }
+  else {
    INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = bsdf_pdf;
+    INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
    INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
        bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
  }
@@ -372,8 +371,17 @@ ccl_device_forceinline int integrate_surface_volume_only_bounce(IntegratorState
    return LABEL_NONE;
  }

-  /* Only modify start distance. */
-  INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(sd->ray_length);
+  /* Setup ray position, direction stays unchanged. */
+  INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
+
+  /* Clipping works through transparent. */
+  INTEGRATOR_STATE_WRITE(state, ray, t) -= sd->ray_length;
+
+#  ifdef __RAY_DIFFERENTIALS__
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
+#  endif
+
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;

  return LABEL_TRANSMIT | LABEL_TRANSPARENT;
 }
@@ -424,8 +432,7 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
  Ray ray ccl_optional_struct_init;
  ray.P = shadow_ray_offset(kg, sd, ao_D, &skip_self);
  ray.D = ao_D;
-  ray.tmin = 0.0f;
-  ray.tmax = kernel_data.integrator.ao_bounces_distance;
+  ray.t = kernel_data.integrator.ao_bounces_distance;
  ray.time = sd->time;
  ray.self.object = (skip_self) ? sd->object : OBJECT_NONE;
  ray.self.prim = (skip_self) ? sd->prim : PRIM_NONE;
@@ -435,8 +442,7 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
  ray.dD = differential_zero_compact();

  /* Branch off shadow kernel. */
-  IntegratorShadowState shadow_state = integrator_shadow_path_init(
-      kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, true);
+  INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, ao);

  /* Copy volume stack and enter/exit volume. */
  integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
@@ -598,23 +604,22 @@ ccl_device bool integrate_surface(KernelGlobals kg,
 }

 template<uint node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE,
-         DeviceKernel current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
+         int current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
 ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg,
                                                     IntegratorState state,
                                                     ccl_global float *ccl_restrict render_buffer)
 {
  if (integrate_surface<node_feature_mask>(kg, state, render_buffer)) {
    if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE) {
-      integrator_path_next(
-          kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
+      INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
    }
    else {
-      kernel_assert(INTEGRATOR_STATE(state, ray, tmax) != 0.0f);
-      integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+      kernel_assert(INTEGRATOR_STATE(state, ray, t) != 0.0f);
+      INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
    }
  }
  else {
-    integrator_path_terminate(kg, state, current_kernel);
+    INTEGRATOR_PATH_TERMINATE(current_kernel);
  }
 }

--- a/intern/cycles/kernel/integrator/shade_volume.h
+++ b/intern/cycles/kernel/integrator/shade_volume.h
@@ -114,8 +114,7 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg,
 ccl_device_forceinline void volume_step_init(KernelGlobals kg,
                                             ccl_private const RNGState *rng_state,
                                             const float object_step_size,
-                                             const float tmin,
-                                             const float tmax,
+                                             float t,
                                             ccl_private float *step_size,
                                             ccl_private float *step_shade_offset,
                                             ccl_private float *steps_offset,
@@ -123,7 +122,7 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg,
 {
  if (object_step_size == FLT_MAX) {
    /* Homogeneous volume. */
-    *step_size = tmax - tmin;
+    *step_size = t;
    *step_shade_offset = 0.0f;
    *steps_offset = 1.0f;
    *max_steps = 1;
@@ -131,7 +130,6 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg,
  else {
    /* Heterogeneous volume. */
    *max_steps = kernel_data.integrator.volume_max_steps;
-    const float t = tmax - tmin;
    float step = min(object_step_size, t);

    /* compute exact steps in advance for malloc */
@@ -167,7 +165,7 @@ ccl_device void volume_shadow_homogeneous(KernelGlobals kg, IntegratorState stat
  float3 sigma_t = zero_float3();

  if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) {
-    *throughput *= volume_color_transmittance(sigma_t, ray->tmax - ray->tmin);
+    *throughput *= volume_color_transmittance(sigma_t, ray->t);
  }
 }
 #  endif
@@ -196,8 +194,7 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
  volume_step_init(kg,
                   &rng_state,
                   object_step_size,
-                   ray->tmin,
-                   ray->tmax,
+                   ray->t,
                   &step_size,
                   &step_shade_offset,
                   &unused,
@@ -205,13 +202,13 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
  const float steps_offset = 1.0f;

  /* compute extinction at the start */
-  float t = ray->tmin;
+  float t = 0.0f;

  float3 sum = zero_float3();

  for (int i = 0; i < max_steps; i++) {
    /* advance to new position */
-    float new_t = min(ray->tmax, ray->tmin + (i + steps_offset) * step_size);
+    float new_t = min(ray->t, (i + steps_offset) * step_size);
    float dt = new_t - t;

    float3 new_P = ray->P + ray->D * (t + dt * step_shade_offset);
@@ -236,7 +233,7 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,

    /* stop if at the end of the volume */
    t = new_t;
-    if (t == ray->tmax) {
+    if (t == ray->t) {
      /* Update throughput in case we haven't done it above */
      tp = *throughput * exp(sum);
      break;
@@ -260,16 +257,15 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r
                                           const float xi,
                                           ccl_private float *pdf)
 {
-  const float tmin = ray->tmin;
-  const float tmax = ray->tmax;
+  const float t = ray->t;
  const float delta = dot((light_P - ray->P), ray->D);
  const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
  if (UNLIKELY(D == 0.0f)) {
    *pdf = 0.0f;
    return 0.0f;
  }
-  const float theta_a = atan2f(tmin - delta, D);
-  const float theta_b = atan2f(tmax - delta, D);
+  const float theta_a = -atan2f(delta, D);
+  const float theta_b = atan2f(t - delta, D);
  const float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
  if (UNLIKELY(theta_b == theta_a)) {
    *pdf = 0.0f;
@@ -277,7 +273,7 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r
  }
  *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));

-  return clamp(delta + t_, tmin, tmax); /* clamp is only for float precision errors */
+  return min(t, delta + t_); /* min is only for float precision errors */
 }

 ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
@@ -290,12 +286,11 @@ ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
    return 0.0f;
  }

-  const float tmin = ray->tmin;
-  const float tmax = ray->tmax;
+  const float t = ray->t;
  const float t_ = sample_t - delta;

-  const float theta_a = atan2f(tmin - delta, D);
-  const float theta_b = atan2f(tmax - delta, D);
+  const float theta_a = -atan2f(delta, D);
+  const float theta_b = atan2f(t - delta, D);
  if (UNLIKELY(theta_b == theta_a)) {
    return 0.0f;
  }
@@ -315,12 +310,11 @@ ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray,
    return 0.0f;
  }

-  const float tmin = ray->tmin;
-  const float tmax = ray->tmax;
+  const float t = ray->t;
  const float t_ = sample_t - delta;

-  const float theta_a = atan2f(tmin - delta, D);
-  const float theta_b = atan2f(tmax - delta, D);
+  const float theta_a = -atan2f(delta, D);
+  const float theta_b = atan2f(t - delta, D);
  if (UNLIKELY(theta_b == theta_a)) {
    return 0.0f;
  }
@@ -396,8 +390,8 @@ ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients

 typedef struct VolumeIntegrateState {
  /* Volume segment extents. */
-  float tmin;
-  float tmax;
+  float start_t;
+  float end_t;

  /* If volume is absorption-only up to this point, and no probabilistic
   * scattering or termination has been used yet. */
@@ -432,9 +426,9 @@ ccl_device_forceinline void volume_integrate_step_scattering(

  /* Equiangular sampling for direct lighting. */
  if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR && !result.direct_scatter) {
-    if (result.direct_t >= vstate.tmin && result.direct_t <= vstate.tmax &&
+    if (result.direct_t >= vstate.start_t && result.direct_t <= vstate.end_t &&
        vstate.equiangular_pdf > VOLUME_SAMPLE_PDF_CUTOFF) {
-      const float new_dt = result.direct_t - vstate.tmin;
+      const float new_dt = result.direct_t - vstate.start_t;
      const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);

      result.direct_scatter = true;
@@ -464,7 +458,7 @@ ccl_device_forceinline void volume_integrate_step_scattering(
      /* compute sampling distance */
      const float sample_sigma_t = volume_channel_get(coeff.sigma_t, channel);
      const float new_dt = -logf(1.0f - vstate.rscatter) / sample_sigma_t;
-      const float new_t = vstate.tmin + new_dt;
+      const float new_t = vstate.start_t + new_dt;

      /* transmittance and pdf */
      const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
@@ -534,8 +528,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
  volume_step_init(kg,
                   rng_state,
                   object_step_size,
-                   ray->tmin,
-                   ray->tmax,
+                   ray->t,
                   &step_size,
                   &step_shade_offset,
                   &steps_offset,
@@ -543,8 +536,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(

  /* Initialize volume integration state. */
  VolumeIntegrateState vstate ccl_optional_struct_init;
-  vstate.tmin = ray->tmin;
-  vstate.tmax = ray->tmin;
+  vstate.start_t = 0.0f;
+  vstate.end_t = 0.0f;
  vstate.absorption_only = true;
  vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE);
  vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL);
@@ -585,8 +578,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(

  for (int i = 0; i < max_steps; i++) {
    /* Advance to new position */
-    vstate.tmax = min(ray->tmax, ray->tmin + (i + steps_offset) * step_size);
-    const float shade_t = vstate.tmin + (vstate.tmax - vstate.tmin) * step_shade_offset;
+    vstate.end_t = min(ray->t, (i + steps_offset) * step_size);
+    const float shade_t = vstate.start_t + (vstate.end_t - vstate.start_t) * step_shade_offset;
    sd->P = ray->P + ray->D * shade_t;

    /* compute segment */
@@ -595,7 +588,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
      const int closure_flag = sd->flag;

      /* Evaluate transmittance over segment. */
-      const float dt = (vstate.tmax - vstate.tmin);
+      const float dt = (vstate.end_t - vstate.start_t);
      const float3 transmittance = (closure_flag & SD_EXTINCTION) ?
                                       volume_color_transmittance(coeff.sigma_t, dt) :
                                       one_float3();
@@ -652,8 +645,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
    }

    /* Stop if at the end of the volume. */
-    vstate.tmin = vstate.tmax;
-    if (vstate.tmin == ray->tmax) {
+    vstate.start_t = vstate.end_t;
+    if (vstate.start_t == ray->t) {
      break;
    }
  }
@@ -781,8 +774,8 @@ ccl_device_forceinline void integrate_volume_direct_light(
  const bool is_light = light_sample_is_light(ls);

  /* Branch off shadow kernel. */
-  IntegratorShadowState shadow_state = integrator_shadow_path_init(
-      kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false);
+  INTEGRATOR_SHADOW_PATH_INIT(
+      shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow);

  /* Write shadow ray and associated state to global memory. */
  integrator_state_write_shadow_ray(kg, shadow_state, &ray);
@@ -887,8 +880,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
  /* Setup ray. */
  INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
  INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_omega_in);
-  INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
-  INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
+  INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
 #  ifdef __RAY_DIFFERENTIALS__
  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
  INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in);
@@ -909,6 +901,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(

  /* Update path state */
  INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf;
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
  INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
      phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));

@@ -1028,7 +1021,7 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,
  integrator_state_read_isect(kg, state, &isect);

  /* Set ray length to current segment. */
-  ray.tmax = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX;
+  ray.t = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX;

  /* Clean volume stack for background rays. */
  if (isect.prim == PRIM_NONE) {
@@ -1039,15 +1032,13 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,

  if (event == VOLUME_PATH_SCATTERED) {
    /* Queue intersect_closest kernel. */
-    integrator_path_next(kg,
-                         state,
-                         DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
+    INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
                         DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
    return;
  }
  else if (event == VOLUME_PATH_MISSED) {
    /* End path. */
-    integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
+    INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
    return;
  }
  else {
--- a/intern/cycles/kernel/integrator/shadow_catcher.h
+++ b/intern/cycles/kernel/integrator/shadow_catcher.h
@@ -50,7 +50,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals
 ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg,
                                                            ConstIntegratorState state)
 {
-  if (integrator_path_is_terminated(state)) {
+  if (INTEGRATOR_PATH_IS_TERMINATED) {
    return false;
  }

--- a/intern/cycles/kernel/integrator/shadow_state_template.h
+++ b/intern/cycles/kernel/integrator/shadow_state_template.h
@@ -47,8 +47,7 @@ KERNEL_STRUCT_END(shadow_path)
 KERNEL_STRUCT_BEGIN(shadow_ray)
 KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, P, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(shadow_ray, float, tmin, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(shadow_ray, float, tmax, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(shadow_ray, float, t, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(shadow_ray, float, time, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(shadow_ray, float, dP, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(shadow_ray, int, object, KERNEL_FEATURE_PATH_TRACING)
--- a/intern/cycles/kernel/integrator/state.h
+++ b/intern/cycles/kernel/integrator/state.h
@@ -127,9 +127,6 @@ typedef struct IntegratorStateGPU {

  /* Index of main path which will be used by a next shadow catcher split.  */
  ccl_global int *next_main_path_index;
-
-  /* Divisor used to partition active indices by locality when sorting by material.  */
-  uint sort_partition_divisor;
 } IntegratorStateGPU;

 /* Abstraction
--- a/intern/cycles/kernel/integrator/state_flow.h
+++ b/intern/cycles/kernel/integrator/state_flow.h
@@ -10,196 +10,125 @@ CCL_NAMESPACE_BEGIN

 /* Control Flow
 *
- * Utilities for control flow between kernels. The implementation is different between CPU and
- * GPU devices. For the latter part of the logic is handled on the host side with wavefronts.
+ * Utilities for control flow between kernels. The implementation may differ per device
+ * or even be handled on the host side. To abstract such differences, experiment with
+ * different implementations and for debugging, this is abstracted using macros.
 *
 * There is a main path for regular path tracing camera for path tracing. Shadows for next
 * event estimation branch off from this into their own path, that may be computed in
- * parallel while the main path continues. Additionally, shading kernels are sorted using
- * a key for coherence.
+ * parallel while the main path continues.
 *
 * Each kernel on the main path must call one of these functions. These may not be called
 * multiple times from the same kernel.
 *
- * integrator_path_init(kg, state, next_kernel)
- * integrator_path_next(kg, state, current_kernel, next_kernel)
- * integrator_path_terminate(kg, state, current_kernel)
+ * INTEGRATOR_PATH_INIT(next_kernel)
+ * INTEGRATOR_PATH_NEXT(current_kernel, next_kernel)
+ * INTEGRATOR_PATH_TERMINATE(current_kernel)
 *
 * For the shadow path similar functions are used, and again each shadow kernel must call
 * one of them, and only once.
 */

-ccl_device_forceinline bool integrator_path_is_terminated(ConstIntegratorState state)
-{
-  return INTEGRATOR_STATE(state, path, queued_kernel) == 0;
-}
-
-ccl_device_forceinline bool integrator_shadow_path_is_terminated(ConstIntegratorShadowState state)
-{
-  return INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0;
-}
+#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(state, path, queued_kernel) == 0)
+#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED \
+  (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0)

 #ifdef __KERNEL_GPU__

-ccl_device_forceinline void integrator_path_init(KernelGlobals kg,
-                                                 IntegratorState state,
-                                                 const DeviceKernel next_kernel)
-{
-  atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
-  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-}
+#  define INTEGRATOR_PATH_INIT(next_kernel) \
+    atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
+                                1); \
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+#  define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
+    atomic_fetch_and_sub_uint32( \
+        &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
+    atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
+                                1); \
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+#  define INTEGRATOR_PATH_TERMINATE(current_kernel) \
+    atomic_fetch_and_sub_uint32( \
+        &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;

-ccl_device_forceinline void integrator_path_next(KernelGlobals kg,
-                                                 IntegratorState state,
-                                                 const DeviceKernel current_kernel,
-                                                 const DeviceKernel next_kernel)
-{
-  atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
-                              1);
-  atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
-  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-}
+#  define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \
+    IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32( \
+        &kernel_integrator_state.next_shadow_path_index[0], 1); \
+    atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
+                                1); \
+    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
+#  define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
+    atomic_fetch_and_sub_uint32( \
+        &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
+    atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
+                                1); \
+    INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
+#  define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
+    atomic_fetch_and_sub_uint32( \
+        &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
+    INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;

-ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg,
-                                                      IntegratorState state,
-                                                      const DeviceKernel current_kernel)
-{
-  atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
-                              1);
-  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
-}
-
-ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(
-    KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
-{
-  IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32(
-      &kernel_integrator_state.next_shadow_path_index[0], 1);
-  atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
-  return shadow_state;
-}
-
-ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg,
-                                                        IntegratorShadowState state,
-                                                        const DeviceKernel current_kernel,
-                                                        const DeviceKernel next_kernel)
-{
-  atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
-                              1);
-  atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
-  INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
-}
-
-ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg,
-                                                             IntegratorShadowState state,
-                                                             const DeviceKernel current_kernel)
-{
-  atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
-                              1);
-  INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
-}
-
-/* Sort first by truncated state index (for good locality), then by key (for good coherence). */
-#  define INTEGRATOR_SORT_KEY(key, state) \
-    (key + kernel_data.max_shaders * (state / kernel_integrator_state.sort_partition_divisor))
-
-ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg,
-                                                        IntegratorState state,
-                                                        const DeviceKernel next_kernel,
-                                                        const uint32_t key)
-{
-  const int key_ = INTEGRATOR_SORT_KEY(key, state);
-  atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
-  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-  INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
-  atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
-}
-
-ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg,
-                                                        IntegratorState state,
-                                                        const DeviceKernel current_kernel,
-                                                        const DeviceKernel next_kernel,
-                                                        const uint32_t key)
-{
-  const int key_ = INTEGRATOR_SORT_KEY(key, state);
-  atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
-                              1);
-  atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
-  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-  INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
-  atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
-}
+#  define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
+    { \
+      const int key_ = key; \
+      atomic_fetch_and_add_uint32( \
+          &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
+      INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
+      atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
+                                  1); \
+    }
+#  define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
+    { \
+      const int key_ = key; \
+      atomic_fetch_and_sub_uint32( \
+          &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
+      atomic_fetch_and_add_uint32( \
+          &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
+      INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
+      atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
+                                  1); \
+    }

 #else

-ccl_device_forceinline void integrator_path_init(KernelGlobals kg,
-                                                 IntegratorState state,
-                                                 const DeviceKernel next_kernel)
-{
-  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-}
+#  define INTEGRATOR_PATH_INIT(next_kernel) \
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+#  define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
+    { \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
+      (void)key; \
+    }
+#  define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
+    { \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
+      (void)current_kernel; \
+    }
+#  define INTEGRATOR_PATH_TERMINATE(current_kernel) \
+    { \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; \
+      (void)current_kernel; \
+    }
+#  define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
+    { \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
+      (void)key; \
+      (void)current_kernel; \
+    }

-ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg,
-                                                        IntegratorState state,
-                                                        const DeviceKernel next_kernel,
-                                                        const uint32_t key)
-{
-  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-  (void)key;
-}
-
-ccl_device_forceinline void integrator_path_next(KernelGlobals kg,
-                                                 IntegratorState state,
-                                                 const DeviceKernel current_kernel,
-                                                 const DeviceKernel next_kernel)
-{
-  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-  (void)current_kernel;
-}
-
-ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg,
-                                                      IntegratorState state,
-                                                      const DeviceKernel current_kernel)
-{
-  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
-  (void)current_kernel;
-}
-
-ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg,
-                                                        IntegratorState state,
-                                                        const DeviceKernel current_kernel,
-                                                        const DeviceKernel next_kernel,
-                                                        const uint32_t key)
-{
-  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-  (void)key;
-  (void)current_kernel;
-}
-
-ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(
-    KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
-{
-  IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow;
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
-  return shadow_state;
-}
-
-ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg,
-                                                        IntegratorShadowState state,
-                                                        const DeviceKernel current_kernel,
-                                                        const DeviceKernel next_kernel)
-{
-  INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
-  (void)current_kernel;
-}
-
-ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg,
-                                                             IntegratorShadowState state,
-                                                             const DeviceKernel current_kernel)
-{
-  INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
-  (void)current_kernel;
-}
+#  define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \
+    IntegratorShadowState shadow_state = &state->shadow_type; \
+    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
+#  define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
+    { \
+      INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; \
+      (void)current_kernel; \
+    }
+#  define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
+    { \
+      INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; \
+      (void)current_kernel; \
+    }

 #endif

--- a/intern/cycles/kernel/integrator/state_template.h
+++ b/intern/cycles/kernel/integrator/state_template.h
@@ -37,10 +37,11 @@ KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
 /* enum PathRayMNEE */
 KERNEL_STRUCT_MEMBER(path, uint8_t, mnee, KERNEL_FEATURE_PATH_TRACING)
 /* Multiple importance sampling
- * The PDF of BSDF sampling at the last scatter point, which is at ray distance
- * zero and distance. Note that transparency and volume attenuation increase
- * the ray tmin but keep P unmodified so that this works. */
+ * The PDF of BSDF sampling at the last scatter point, and distance to the
+ * last scatter point minus the last ray segment. This distance lets us
+ * compute the complete distance through transparent surfaces and volumes. */
 KERNEL_STRUCT_MEMBER(path, float, mis_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(path, float, mis_ray_t, KERNEL_FEATURE_PATH_TRACING)
 /* Filter glossy. */
 KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
 /* Continuation probability for path termination. */
@@ -62,8 +63,7 @@ KERNEL_STRUCT_END(path)
 KERNEL_STRUCT_BEGIN(ray)
 KERNEL_STRUCT_MEMBER(ray, packed_float3, P, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(ray, float, tmin, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(ray, float, tmax, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(ray, float, t, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(ray, float, time, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(ray, float, dP, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(ray, float, dD, KERNEL_FEATURE_PATH_TRACING)
--- a/intern/cycles/kernel/integrator/state_util.h
+++ b/intern/cycles/kernel/integrator/state_util.h
@@ -17,8 +17,7 @@ ccl_device_forceinline void integrator_state_write_ray(KernelGlobals kg,
 {
  INTEGRATOR_STATE_WRITE(state, ray, P) = ray->P;
  INTEGRATOR_STATE_WRITE(state, ray, D) = ray->D;
-  INTEGRATOR_STATE_WRITE(state, ray, tmin) = ray->tmin;
-  INTEGRATOR_STATE_WRITE(state, ray, tmax) = ray->tmax;
+  INTEGRATOR_STATE_WRITE(state, ray, t) = ray->t;
  INTEGRATOR_STATE_WRITE(state, ray, time) = ray->time;
  INTEGRATOR_STATE_WRITE(state, ray, dP) = ray->dP;
  INTEGRATOR_STATE_WRITE(state, ray, dD) = ray->dD;
@@ -30,8 +29,7 @@ ccl_device_forceinline void integrator_state_read_ray(KernelGlobals kg,
 {
  ray->P = INTEGRATOR_STATE(state, ray, P);
  ray->D = INTEGRATOR_STATE(state, ray, D);
-  ray->tmin = INTEGRATOR_STATE(state, ray, tmin);
-  ray->tmax = INTEGRATOR_STATE(state, ray, tmax);
+  ray->t = INTEGRATOR_STATE(state, ray, t);
  ray->time = INTEGRATOR_STATE(state, ray, time);
  ray->dP = INTEGRATOR_STATE(state, ray, dP);
  ray->dD = INTEGRATOR_STATE(state, ray, dD);
@@ -44,8 +42,7 @@ ccl_device_forceinline void integrator_state_write_shadow_ray(
 {
  INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray->P;
  INTEGRATOR_STATE_WRITE(state, shadow_ray, D) = ray->D;
-  INTEGRATOR_STATE_WRITE(state, shadow_ray, tmin) = ray->tmin;
-  INTEGRATOR_STATE_WRITE(state, shadow_ray, tmax) = ray->tmax;
+  INTEGRATOR_STATE_WRITE(state, shadow_ray, t) = ray->t;
  INTEGRATOR_STATE_WRITE(state, shadow_ray, time) = ray->time;
  INTEGRATOR_STATE_WRITE(state, shadow_ray, dP) = ray->dP;
 }
@@ -56,8 +53,7 @@ ccl_device_forceinline void integrator_state_read_shadow_ray(KernelGlobals kg,
 {
  ray->P = INTEGRATOR_STATE(state, shadow_ray, P);
  ray->D = INTEGRATOR_STATE(state, shadow_ray, D);
-  ray->tmin = INTEGRATOR_STATE(state, shadow_ray, tmin);
-  ray->tmax = INTEGRATOR_STATE(state, shadow_ray, tmax);
+  ray->t = INTEGRATOR_STATE(state, shadow_ray, t);
  ray->time = INTEGRATOR_STATE(state, shadow_ray, time);
  ray->dP = INTEGRATOR_STATE(state, shadow_ray, dP);
  ray->dD = differential_zero_compact();
--- a/intern/cycles/kernel/integrator/subsurface.h
+++ b/intern/cycles/kernel/integrator/subsurface.h
@@ -38,8 +38,7 @@ ccl_device int subsurface_bounce(KernelGlobals kg,
  /* Setup ray into surface. */
  INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
  INTEGRATOR_STATE_WRITE(state, ray, D) = bssrdf->N;
-  INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
-  INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
+  INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
  INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact();

@@ -161,7 +160,7 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
  /* Pretend ray is coming from the outside towards the exit point. This ensures
   * correct front/back facing normals.
   * TODO: find a more elegant solution? */
-  ray.P += ray.D * ray.tmax * 2.0f;
+  ray.P += ray.D * ray.t * 2.0f;
  ray.D = -ray.D;

  integrator_state_write_isect(kg, state, &ss_isect.hits[0]);
@@ -178,23 +177,17 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
  const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);

  if (use_caustics) {
-    integrator_path_next_sorted(kg,
-                                state,
-                                DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
+    INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
                                DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE,
                                shader);
  }
  else if (use_raytrace_kernel) {
-    integrator_path_next_sorted(kg,
-                                state,
-                                DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
+    INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
                                DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
                                shader);
  }
  else {
-    integrator_path_next_sorted(kg,
-                                state,
-                                DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
+    INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
                                DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
                                shader);
  }
--- a/intern/cycles/kernel/integrator/subsurface_disk.h
+++ b/intern/cycles/kernel/integrator/subsurface_disk.h
@@ -82,8 +82,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
  /* Create ray. */
  ray.P = P + disk_N * disk_height + disk_P;
  ray.D = -disk_N;
-  ray.tmin = 0.0f;
-  ray.tmax = 2.0f * disk_height;
+  ray.t = 2.0f * disk_height;
  ray.dP = ray_dP;
  ray.dD = differential_zero_compact();
  ray.time = time;
@@ -189,8 +188,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,

      ray.P = ray.P + ray.D * ss_isect.hits[hit].t;
      ray.D = ss_isect.Ng[hit];
-      ray.tmin = 0.0f;
-      ray.tmax = 1.0f;
+      ray.t = 1.0f;
      return true;
    }

--- a/intern/cycles/kernel/integrator/subsurface_random_walk.h
+++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h
@@ -195,8 +195,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
  /* Setup ray. */
  ray.P = P;
  ray.D = D;
-  ray.tmin = 0.0f;
-  ray.tmax = FLT_MAX;
+  ray.t = FLT_MAX;
  ray.time = time;
  ray.dP = ray_dP;
  ray.dD = differential_zero_compact();
@@ -371,10 +370,10 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
     * chance of connecting to it.
     * TODO: Maybe use less than 10 times the mean free path? */
    if (bounce == 0) {
-      ray.tmax = max(t, 10.0f / (reduce_min(sigma_t)));
+      ray.t = max(t, 10.0f / (reduce_min(sigma_t)));
    }
    else {
-      ray.tmax = t;
+      ray.t = t;
      /* After the first bounce the object can intersect the same surface again */
      ray.self.object = OBJECT_NONE;
      ray.self.prim = PRIM_NONE;
@@ -385,12 +384,12 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
    if (hit) {
 #ifdef __KERNEL_GPU_RAYTRACING__
      /* t is always in world space with OptiX and MetalRT. */
-      ray.tmax = ss_isect.hits[0].t;
+      ray.t = ss_isect.hits[0].t;
 #else
      /* Compute world space distance to surface hit. */
      float3 D = transform_direction(&ob_itfm, ray.D);
      D = normalize(D) * ss_isect.hits[0].t;
-      ray.tmax = len(transform_direction(&ob_tfm, D));
+      ray.t = len(transform_direction(&ob_tfm, D));
 #endif
    }

@@ -398,16 +397,16 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
      /* Check if we hit the opposite side. */
      if (hit) {
        have_opposite_interface = true;
-        opposite_distance = dot(ray.P + ray.tmax * ray.D - P, -N);
+        opposite_distance = dot(ray.P + ray.t * ray.D - P, -N);
      }
      /* Apart from the opposite side check, we were supposed to only trace up to distance t,
       * so check if there would have been a hit in that case. */
-      hit = ray.tmax < t;
+      hit = ray.t < t;
    }

    /* Use the distance to the exit point for the throughput update if we found one. */
    if (hit) {
-      t = ray.tmax;
+      t = ray.t;
    }

    /* Advance to new scatter location. */
--- a/intern/cycles/kernel/light/light.h
+++ b/intern/cycles/kernel/light/light.h
@@ -270,26 +270,31 @@ ccl_device bool lights_intersect(KernelGlobals kg,

    if (type == LIGHT_SPOT) {
      /* Spot/Disk light. */
+      const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
+      const float3 ray_P = ray->P - ray->D * mis_ray_t;
+
      const float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]);
      const float radius = klight->spot.radius;
      if (radius == 0.0f) {
        continue;
      }
      /* disk oriented normal */
-      const float3 lightN = normalize(ray->P - lightP);
+      const float3 lightN = normalize(ray_P - lightP);
      /* One sided. */
      if (dot(ray->D, lightN) >= 0.0f) {
        continue;
      }

      float3 P;
-      if (!ray_disk_intersect(
-              ray->P, ray->D, ray->tmin, ray->tmax, lightP, lightN, radius, &P, &t)) {
+      if (!ray_disk_intersect(ray->P, ray->D, ray->t, lightP, lightN, radius, &P, &t)) {
        continue;
      }
    }
    else if (type == LIGHT_POINT) {
      /* Sphere light (aka, aligned disk light). */
+      const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
+      const float3 ray_P = ray->P - ray->D * mis_ray_t;
+
      const float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]);
      const float radius = klight->spot.radius;
      if (radius == 0.0f) {
@@ -297,10 +302,9 @@ ccl_device bool lights_intersect(KernelGlobals kg,
      }

      /* disk oriented normal */
-      const float3 lightN = normalize(ray->P - lightP);
+      const float3 lightN = normalize(ray_P - lightP);
      float3 P;
-      if (!ray_disk_intersect(
-              ray->P, ray->D, ray->tmin, ray->tmax, lightP, lightN, radius, &P, &t)) {
+      if (!ray_disk_intersect(ray->P, ray->D, ray->t, lightP, lightN, radius, &P, &t)) {
        continue;
      }
    }
@@ -326,19 +330,8 @@ ccl_device bool lights_intersect(KernelGlobals kg,
      const float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]);

      float3 P;
-      if (!ray_quad_intersect(ray->P,
-                              ray->D,
-                              ray->tmin,
-                              ray->tmax,
-                              light_P,
-                              axisu,
-                              axisv,
-                              Ng,
-                              &P,
-                              &t,
-                              &u,
-                              &v,
-                              is_round)) {
+      if (!ray_quad_intersect(
+              ray->P, ray->D, 0.0f, ray->t, light_P, axisu, axisv, Ng, &P, &t, &u, &v, is_round)) {
        continue;
      }
    }
@@ -782,8 +775,7 @@ ccl_device_forceinline void triangle_light_sample(KernelGlobals kg,
    ls->D = z * B + safe_sqrtf(1.0f - z * z) * safe_normalize(C_ - dot(C_, B) * B);

    /* calculate intersection with the planar triangle */
-    if (!ray_triangle_intersect(
-            P, ls->D, 0.0f, FLT_MAX, V[0], V[1], V[2], &ls->u, &ls->v, &ls->t)) {
+    if (!ray_triangle_intersect(P, ls->D, FLT_MAX, V[0], V[1], V[2], &ls->u, &ls->v, &ls->t)) {
      ls->pdf = 0.0f;
      return;
    }
--- a/intern/cycles/kernel/light/sample.h
+++ b/intern/cycles/kernel/light/sample.h
@@ -227,24 +227,23 @@ ccl_device_inline void shadow_ray_setup(ccl_private const ShaderData *ccl_restri
  if (ls->shader & SHADER_CAST_SHADOW) {
    /* setup ray */
    ray->P = P;
-    ray->tmin = 0.0f;

    if (ls->t == FLT_MAX) {
      /* distant light */
      ray->D = ls->D;
-      ray->tmax = ls->t;
+      ray->t = ls->t;
    }
    else {
      /* other lights, avoid self-intersection */
      ray->D = ls->P - P;
-      ray->D = normalize_len(ray->D, &ray->tmax);
+      ray->D = normalize_len(ray->D, &ray->t);
    }
  }
  else {
    /* signal to not cast shadow ray */
    ray->P = zero_float3();
    ray->D = zero_float3();
-    ray->tmax = 0.0f;
+    ray->t = 0.0f;
  }

  ray->dP = differential_make_compact(sd->dP);
--- a/intern/cycles/kernel/osl/services.cpp
+++ b/intern/cycles/kernel/osl/services.cpp
@@ -1094,8 +1094,10 @@ bool OSLRenderServices::get_background_attribute(const KernelGlobalsCPU *kg,
      ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P);

      if (derivatives) {
-        ndc[1] = zero_float3();
-        ndc[2] = zero_float3();
+        ndc[1] = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(sd->ray_dP, 0.0f, 0.0f)) -
+                 ndc[0];
+        ndc[2] = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(0.0f, sd->ray_dP, 0.0f)) -
+                 ndc[0];
      }
    }
    else {
@@ -1669,8 +1671,7 @@ bool OSLRenderServices::trace(TraceOpt &options,

  ray.P = TO_FLOAT3(P);
  ray.D = TO_FLOAT3(R);
-  ray.tmin = 0.0f;
-  ray.tmax = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist;
+  ray.t = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist;
  ray.time = sd->time;
  ray.self.object = OBJECT_NONE;
  ray.self.prim = PRIM_NONE;
--- a/intern/cycles/kernel/svm/ao.h
+++ b/intern/cycles/kernel/svm/ao.h
@@ -59,8 +59,7 @@ ccl_device float svm_ao(
    Ray ray;
    ray.P = sd->P;
    ray.D = D.x * T + D.y * B + D.z * N;
-    ray.tmin = 0.0f;
-    ray.tmax = max_dist;
+    ray.t = max_dist;
    ray.time = sd->time;
    ray.self.object = sd->object;
    ray.self.prim = sd->prim;
--- a/intern/cycles/kernel/svm/bevel.h
+++ b/intern/cycles/kernel/svm/bevel.h
@@ -179,8 +179,7 @@ ccl_device float3 svm_bevel(
    Ray ray ccl_optional_struct_init;
    ray.P = sd->P + disk_N * disk_height + disk_P;
    ray.D = -disk_N;
-    ray.tmin = 0.0f;
-    ray.tmax = 2.0f * disk_height;
+    ray.t = 2.0f * disk_height;
    ray.dP = differential_zero_compact();
    ray.dD = differential_zero_compact();
    ray.time = sd->time;
--- a/intern/cycles/kernel/svm/node_types_template.h
+++ b/intern/cycles/kernel/svm/node_types_template.h
@@ -1,110 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2022 Blender Foundation */
-
-#ifndef SHADER_NODE_TYPE
-#  define SHADER_NODE_TYPE(name)
-#endif
-
-/* NOTE: for best OpenCL performance, item definition in the enum must
- * match the switch case order in `svm.h`. */
-
-SHADER_NODE_TYPE(NODE_END)
-SHADER_NODE_TYPE(NODE_SHADER_JUMP)
-SHADER_NODE_TYPE(NODE_CLOSURE_BSDF)
-SHADER_NODE_TYPE(NODE_CLOSURE_EMISSION)
-SHADER_NODE_TYPE(NODE_CLOSURE_BACKGROUND)
-SHADER_NODE_TYPE(NODE_CLOSURE_SET_WEIGHT)
-SHADER_NODE_TYPE(NODE_CLOSURE_WEIGHT)
-SHADER_NODE_TYPE(NODE_EMISSION_WEIGHT)
-SHADER_NODE_TYPE(NODE_MIX_CLOSURE)
-SHADER_NODE_TYPE(NODE_JUMP_IF_ZERO)
-SHADER_NODE_TYPE(NODE_JUMP_IF_ONE)
-SHADER_NODE_TYPE(NODE_GEOMETRY)
-SHADER_NODE_TYPE(NODE_CONVERT)
-SHADER_NODE_TYPE(NODE_TEX_COORD)
-SHADER_NODE_TYPE(NODE_VALUE_F)
-SHADER_NODE_TYPE(NODE_VALUE_V)
-SHADER_NODE_TYPE(NODE_ATTR)
-SHADER_NODE_TYPE(NODE_VERTEX_COLOR)
-SHADER_NODE_TYPE(NODE_GEOMETRY_BUMP_DX)
-SHADER_NODE_TYPE(NODE_GEOMETRY_BUMP_DY)
-SHADER_NODE_TYPE(NODE_SET_DISPLACEMENT)
-SHADER_NODE_TYPE(NODE_DISPLACEMENT)
-SHADER_NODE_TYPE(NODE_VECTOR_DISPLACEMENT)
-SHADER_NODE_TYPE(NODE_TEX_IMAGE)
-SHADER_NODE_TYPE(NODE_TEX_IMAGE_BOX)
-SHADER_NODE_TYPE(NODE_TEX_NOISE)
-SHADER_NODE_TYPE(NODE_SET_BUMP)
-SHADER_NODE_TYPE(NODE_ATTR_BUMP_DX)
-SHADER_NODE_TYPE(NODE_ATTR_BUMP_DY)
-SHADER_NODE_TYPE(NODE_VERTEX_COLOR_BUMP_DX)
-SHADER_NODE_TYPE(NODE_VERTEX_COLOR_BUMP_DY)
-SHADER_NODE_TYPE(NODE_TEX_COORD_BUMP_DX)
-SHADER_NODE_TYPE(NODE_TEX_COORD_BUMP_DY)
-SHADER_NODE_TYPE(NODE_CLOSURE_SET_NORMAL)
-SHADER_NODE_TYPE(NODE_ENTER_BUMP_EVAL)
-SHADER_NODE_TYPE(NODE_LEAVE_BUMP_EVAL)
-SHADER_NODE_TYPE(NODE_HSV)
-SHADER_NODE_TYPE(NODE_CLOSURE_HOLDOUT)
-SHADER_NODE_TYPE(NODE_FRESNEL)
-SHADER_NODE_TYPE(NODE_LAYER_WEIGHT)
-SHADER_NODE_TYPE(NODE_CLOSURE_VOLUME)
-SHADER_NODE_TYPE(NODE_PRINCIPLED_VOLUME)
-SHADER_NODE_TYPE(NODE_MATH)
-SHADER_NODE_TYPE(NODE_VECTOR_MATH)
-SHADER_NODE_TYPE(NODE_RGB_RAMP)
-SHADER_NODE_TYPE(NODE_GAMMA)
-SHADER_NODE_TYPE(NODE_BRIGHTCONTRAST)
-SHADER_NODE_TYPE(NODE_LIGHT_PATH)
-SHADER_NODE_TYPE(NODE_OBJECT_INFO)
-SHADER_NODE_TYPE(NODE_PARTICLE_INFO)
-SHADER_NODE_TYPE(NODE_HAIR_INFO)
-SHADER_NODE_TYPE(NODE_POINT_INFO)
-SHADER_NODE_TYPE(NODE_TEXTURE_MAPPING)
-SHADER_NODE_TYPE(NODE_MAPPING)
-SHADER_NODE_TYPE(NODE_MIN_MAX)
-SHADER_NODE_TYPE(NODE_CAMERA)
-SHADER_NODE_TYPE(NODE_TEX_ENVIRONMENT)
-SHADER_NODE_TYPE(NODE_TEX_SKY)
-SHADER_NODE_TYPE(NODE_TEX_GRADIENT)
-SHADER_NODE_TYPE(NODE_TEX_VORONOI)
-SHADER_NODE_TYPE(NODE_TEX_MUSGRAVE)
-SHADER_NODE_TYPE(NODE_TEX_WAVE)
-SHADER_NODE_TYPE(NODE_TEX_MAGIC)
-SHADER_NODE_TYPE(NODE_TEX_CHECKER)
-SHADER_NODE_TYPE(NODE_TEX_BRICK)
-SHADER_NODE_TYPE(NODE_TEX_WHITE_NOISE)
-SHADER_NODE_TYPE(NODE_NORMAL)
-SHADER_NODE_TYPE(NODE_LIGHT_FALLOFF)
-SHADER_NODE_TYPE(NODE_IES)
-SHADER_NODE_TYPE(NODE_CURVES)
-SHADER_NODE_TYPE(NODE_TANGENT)
-SHADER_NODE_TYPE(NODE_NORMAL_MAP)
-SHADER_NODE_TYPE(NODE_INVERT)
-SHADER_NODE_TYPE(NODE_MIX)
-SHADER_NODE_TYPE(NODE_SEPARATE_COLOR)
-SHADER_NODE_TYPE(NODE_COMBINE_COLOR)
-SHADER_NODE_TYPE(NODE_SEPARATE_VECTOR)
-SHADER_NODE_TYPE(NODE_COMBINE_VECTOR)
-SHADER_NODE_TYPE(NODE_SEPARATE_HSV)
-SHADER_NODE_TYPE(NODE_COMBINE_HSV)
-SHADER_NODE_TYPE(NODE_VECTOR_ROTATE)
-SHADER_NODE_TYPE(NODE_VECTOR_TRANSFORM)
-SHADER_NODE_TYPE(NODE_WIREFRAME)
-SHADER_NODE_TYPE(NODE_WAVELENGTH)
-SHADER_NODE_TYPE(NODE_BLACKBODY)
-SHADER_NODE_TYPE(NODE_MAP_RANGE)
-SHADER_NODE_TYPE(NODE_VECTOR_MAP_RANGE)
-SHADER_NODE_TYPE(NODE_CLAMP)
-SHADER_NODE_TYPE(NODE_BEVEL)
-SHADER_NODE_TYPE(NODE_AMBIENT_OCCLUSION)
-SHADER_NODE_TYPE(NODE_TEX_VOXEL)
-SHADER_NODE_TYPE(NODE_AOV_START)
-SHADER_NODE_TYPE(NODE_AOV_COLOR)
-SHADER_NODE_TYPE(NODE_AOV_VALUE)
-SHADER_NODE_TYPE(NODE_FLOAT_CURVE)
-
-/* Padding for struct alignment. */
-SHADER_NODE_TYPE(NODE_PAD1)
-
-#undef SHADER_NODE_TYPE
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -204,15 +204,6 @@ CCL_NAMESPACE_END

 CCL_NAMESPACE_BEGIN

-#ifdef __KERNEL_USE_DATA_CONSTANTS__
-#  define SVM_CASE(node) \
-    case node: \
-      if (!kernel_data_svm_usage_##node) \
-        break;
-#else
-#  define SVM_CASE(node) case node:
-#endif
-
 /* Main Interpreter Loop */
 template<uint node_feature_mask, ShaderType type, typename ConstIntegratorGenericState>
 ccl_device void svm_eval_nodes(KernelGlobals kg,
@@ -228,10 +219,9 @@ ccl_device void svm_eval_nodes(KernelGlobals kg,
    uint4 node = read_node(kg, &offset);

    switch (node.x) {
-      SVM_CASE(NODE_END)
-      return;
-      SVM_CASE(NODE_SHADER_JUMP)
-      {
+      case NODE_END:
+        return;
+      case NODE_SHADER_JUMP: {
        if (type == SHADER_TYPE_SURFACE)
          offset = node.y;
        else if (type == SHADER_TYPE_VOLUME)
@@ -242,349 +232,351 @@ ccl_device void svm_eval_nodes(KernelGlobals kg,
          return;
        break;
      }
-      SVM_CASE(NODE_CLOSURE_BSDF)
-      offset = svm_node_closure_bsdf<node_feature_mask, type>(
-          kg, sd, stack, node, path_flag, offset);
-      break;
-      SVM_CASE(NODE_CLOSURE_EMISSION)
-      IF_KERNEL_NODES_FEATURE(EMISSION)
-      {
-        svm_node_closure_emission(sd, stack, node);
-      }
-      break;
-      SVM_CASE(NODE_CLOSURE_BACKGROUND)
-      IF_KERNEL_NODES_FEATURE(EMISSION)
-      {
-        svm_node_closure_background(sd, stack, node);
-      }
-      break;
-      SVM_CASE(NODE_CLOSURE_SET_WEIGHT)
-      svm_node_closure_set_weight(sd, node.y, node.z, node.w);
-      break;
-      SVM_CASE(NODE_CLOSURE_WEIGHT)
-      svm_node_closure_weight(sd, stack, node.y);
-      break;
-      SVM_CASE(NODE_EMISSION_WEIGHT)
-      IF_KERNEL_NODES_FEATURE(EMISSION)
-      {
-        svm_node_emission_weight(kg, sd, stack, node);
-      }
-      break;
-      SVM_CASE(NODE_MIX_CLOSURE)
-      svm_node_mix_closure(sd, stack, node);
-      break;
-      SVM_CASE(NODE_JUMP_IF_ZERO)
-      if (stack_load_float(stack, node.z) <= 0.0f)
-        offset += node.y;
-      break;
-      SVM_CASE(NODE_JUMP_IF_ONE)
-      if (stack_load_float(stack, node.z) >= 1.0f)
-        offset += node.y;
-      break;
-      SVM_CASE(NODE_GEOMETRY)
-      svm_node_geometry(kg, sd, stack, node.y, node.z);
-      break;
-      SVM_CASE(NODE_CONVERT)
-      svm_node_convert(kg, sd, stack, node.y, node.z, node.w);
-      break;
-      SVM_CASE(NODE_TEX_COORD)
-      offset = svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
-      break;
-      SVM_CASE(NODE_VALUE_F)
-      svm_node_value_f(kg, sd, stack, node.y, node.z);
-      break;
-      SVM_CASE(NODE_VALUE_V)
-      offset = svm_node_value_v(kg, sd, stack, node.y, offset);
-      break;
-      SVM_CASE(NODE_ATTR)
-      svm_node_attr<node_feature_mask>(kg, sd, stack, node);
-      break;
-      SVM_CASE(NODE_VERTEX_COLOR)
-      svm_node_vertex_color(kg, sd, stack, node.y, node.z, node.w);
-      break;
-      SVM_CASE(NODE_GEOMETRY_BUMP_DX)
-      IF_KERNEL_NODES_FEATURE(BUMP)
-      {
-        svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z);
-      }
-      break;
-      SVM_CASE(NODE_GEOMETRY_BUMP_DY)
-      IF_KERNEL_NODES_FEATURE(BUMP)
-      {
-        svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z);
-      }
-      break;
-      SVM_CASE(NODE_SET_DISPLACEMENT)
-      svm_node_set_displacement<node_feature_mask>(kg, sd, stack, node.y);
-      break;
-      SVM_CASE(NODE_DISPLACEMENT)
-      svm_node_displacement<node_feature_mask>(kg, sd, stack, node);
-      break;
-      SVM_CASE(NODE_VECTOR_DISPLACEMENT)
-      offset = svm_node_vector_displacement<node_feature_mask>(kg, sd, stack, node, offset);
-      break;
-      SVM_CASE(NODE_TEX_IMAGE)
-      offset = svm_node_tex_image(kg, sd, stack, node, offset);
-      break;
-      SVM_CASE(NODE_TEX_IMAGE_BOX)
-      svm_node_tex_image_box(kg, sd, stack, node);
-      break;
-      SVM_CASE(NODE_TEX_NOISE)
-      offset = svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, offset);
-      break;
-      SVM_CASE(NODE_SET_BUMP)
-      svm_node_set_bump<node_feature_mask>(kg, sd, stack, node);
-      break;
-      SVM_CASE(NODE_ATTR_BUMP_DX)
-      IF_KERNEL_NODES_FEATURE(BUMP)
-      {
-        svm_node_attr_bump_dx(kg, sd, stack, node);
-      }
-      break;
-      SVM_CASE(NODE_ATTR_BUMP_DY)
-      IF_KERNEL_NODES_FEATURE(BUMP)
-      {
-        svm_node_attr_bump_dy(kg, sd, stack, node);
-      }
-      break;
-      SVM_CASE(NODE_VERTEX_COLOR_BUMP_DX)
-      IF_KERNEL_NODES_FEATURE(BUMP)
-      {
-        svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w);
-      }
-      break;
-      SVM_CASE(NODE_VERTEX_COLOR_BUMP_DY)
-      IF_KERNEL_NODES_FEATURE(BUMP)
-      {
-        svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w);
-      }
-      break;
-      SVM_CASE(NODE_TEX_COORD_BUMP_DX)
-      IF_KERNEL_NODES_FEATURE(BUMP)
-      {
-        offset = svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, offset);
-      }
-      break;
-      SVM_CASE(NODE_TEX_COORD_BUMP_DY)
-      IF_KERNEL_NODES_FEATURE(BUMP)
-      {
-        offset = svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, offset);
-      }
-      break;
-      SVM_CASE(NODE_CLOSURE_SET_NORMAL)
-      IF_KERNEL_NODES_FEATURE(BUMP)
-      {
-        svm_node_set_normal(kg, sd, stack, node.y, node.z);
-      }
-      break;
-      SVM_CASE(NODE_ENTER_BUMP_EVAL)
-      IF_KERNEL_NODES_FEATURE(BUMP_STATE)
-      {
-        svm_node_enter_bump_eval(kg, sd, stack, node.y);
-      }
-      break;
-      SVM_CASE(NODE_LEAVE_BUMP_EVAL)
-      IF_KERNEL_NODES_FEATURE(BUMP_STATE)
-      {
-        svm_node_leave_bump_eval(kg, sd, stack, node.y);
-      }
-      break;
-      SVM_CASE(NODE_HSV)
-      svm_node_hsv(kg, sd, stack, node);
-      break;
-      SVM_CASE(NODE_CLOSURE_HOLDOUT)
-      svm_node_closure_holdout(sd, stack, node);
-      break;
-      SVM_CASE(NODE_FRESNEL)
-      svm_node_fresnel(sd, stack, node.y, node.z, node.w);
-      break;
-      SVM_CASE(NODE_LAYER_WEIGHT)
-      svm_node_layer_weight(sd, stack, node);
-      break;
-      SVM_CASE(NODE_CLOSURE_VOLUME)
-      IF_KERNEL_NODES_FEATURE(VOLUME)
-      {
-        svm_node_closure_volume<type>(kg, sd, stack, node);
-      }
-      break;
-      SVM_CASE(NODE_PRINCIPLED_VOLUME)
-      IF_KERNEL_NODES_FEATURE(VOLUME)
-      {
-        offset = svm_node_principled_volume<type>(kg, sd, stack, node, path_flag, offset);
-      }
-      break;
-      SVM_CASE(NODE_MATH)
-      svm_node_math(kg, sd, stack, node.y, node.z, node.w);
-      break;
-      SVM_CASE(NODE_VECTOR_MATH)
-      offset = svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, offset);
-      break;
-      SVM_CASE(NODE_RGB_RAMP)
-      offset = svm_node_rgb_ramp(kg, sd, stack, node, offset);
-      break;
-      SVM_CASE(NODE_GAMMA)
-      svm_node_gamma(sd, stack, node.y, node.z, node.w);
-      break;
-      SVM_CASE(NODE_BRIGHTCONTRAST)
-      svm_node_brightness(sd, stack, node.y, node.z, node.w);
-      break;
-      SVM_CASE(NODE_LIGHT_PATH)
-      svm_node_light_path<node_feature_mask>(kg, state, sd, stack, node.y, node.z, path_flag);
-      break;
-      SVM_CASE(NODE_OBJECT_INFO)
-      svm_node_object_info(kg, sd, stack, node.y, node.z);
-      break;
-      SVM_CASE(NODE_PARTICLE_INFO)
-      svm_node_particle_info(kg, sd, stack, node.y, node.z);
-      break;
+      case NODE_CLOSURE_BSDF:
+        offset = svm_node_closure_bsdf<node_feature_mask, type>(
+            kg, sd, stack, node, path_flag, offset);
+        break;
+      case NODE_CLOSURE_EMISSION:
+        IF_KERNEL_NODES_FEATURE(EMISSION)
+        {
+          svm_node_closure_emission(sd, stack, node);
+        }
+        break;
+      case NODE_CLOSURE_BACKGROUND:
+        IF_KERNEL_NODES_FEATURE(EMISSION)
+        {
+          svm_node_closure_background(sd, stack, node);
+        }
+        break;
+      case NODE_CLOSURE_SET_WEIGHT:
+        svm_node_closure_set_weight(sd, node.y, node.z, node.w);
+        break;
+      case NODE_CLOSURE_WEIGHT:
+        svm_node_closure_weight(sd, stack, node.y);
+        break;
+      case NODE_EMISSION_WEIGHT:
+        IF_KERNEL_NODES_FEATURE(EMISSION)
+        {
+          svm_node_emission_weight(kg, sd, stack, node);
+        }
+        break;
+      case NODE_MIX_CLOSURE:
+        svm_node_mix_closure(sd, stack, node);
+        break;
+      case NODE_JUMP_IF_ZERO:
+        if (stack_load_float(stack, node.z) <= 0.0f)
+          offset += node.y;
+        break;
+      case NODE_JUMP_IF_ONE:
+        if (stack_load_float(stack, node.z) >= 1.0f)
+          offset += node.y;
+        break;
+      case NODE_GEOMETRY:
+        svm_node_geometry(kg, sd, stack, node.y, node.z);
+        break;
+      case NODE_CONVERT:
+        svm_node_convert(kg, sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_TEX_COORD:
+        offset = svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
+        break;
+      case NODE_VALUE_F:
+        svm_node_value_f(kg, sd, stack, node.y, node.z);
+        break;
+      case NODE_VALUE_V:
+        offset = svm_node_value_v(kg, sd, stack, node.y, offset);
+        break;
+      case NODE_ATTR:
+        svm_node_attr<node_feature_mask>(kg, sd, stack, node);
+        break;
+      case NODE_VERTEX_COLOR:
+        svm_node_vertex_color(kg, sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_GEOMETRY_BUMP_DX:
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
+          svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z);
+        }
+        break;
+      case NODE_GEOMETRY_BUMP_DY:
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
+          svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z);
+        }
+        break;
+      case NODE_SET_DISPLACEMENT:
+        svm_node_set_displacement<node_feature_mask>(kg, sd, stack, node.y);
+        break;
+      case NODE_DISPLACEMENT:
+        svm_node_displacement<node_feature_mask>(kg, sd, stack, node);
+        break;
+      case NODE_VECTOR_DISPLACEMENT:
+        offset = svm_node_vector_displacement<node_feature_mask>(kg, sd, stack, node, offset);
+        break;
+      case NODE_TEX_IMAGE:
+        offset = svm_node_tex_image(kg, sd, stack, node, offset);
+        break;
+      case NODE_TEX_IMAGE_BOX:
+        svm_node_tex_image_box(kg, sd, stack, node);
+        break;
+      case NODE_TEX_NOISE:
+        offset = svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, offset);
+        break;
+      case NODE_SET_BUMP:
+        svm_node_set_bump<node_feature_mask>(kg, sd, stack, node);
+        break;
+      case NODE_ATTR_BUMP_DX:
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
+          svm_node_attr_bump_dx(kg, sd, stack, node);
+        }
+        break;
+      case NODE_ATTR_BUMP_DY:
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
+          svm_node_attr_bump_dy(kg, sd, stack, node);
+        }
+        break;
+      case NODE_VERTEX_COLOR_BUMP_DX:
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
+          svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w);
+        }
+        break;
+      case NODE_VERTEX_COLOR_BUMP_DY:
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
+          svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w);
+        }
+        break;
+      case NODE_TEX_COORD_BUMP_DX:
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
+          offset = svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, offset);
+        }
+        break;
+      case NODE_TEX_COORD_BUMP_DY:
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
+          offset = svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, offset);
+        }
+        break;
+      case NODE_CLOSURE_SET_NORMAL:
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
+          svm_node_set_normal(kg, sd, stack, node.y, node.z);
+        }
+        break;
+      case NODE_ENTER_BUMP_EVAL:
+        IF_KERNEL_NODES_FEATURE(BUMP_STATE)
+        {
+          svm_node_enter_bump_eval(kg, sd, stack, node.y);
+        }
+        break;
+      case NODE_LEAVE_BUMP_EVAL:
+        IF_KERNEL_NODES_FEATURE(BUMP_STATE)
+        {
+          svm_node_leave_bump_eval(kg, sd, stack, node.y);
+        }
+        break;
+      case NODE_HSV:
+        svm_node_hsv(kg, sd, stack, node);
+        break;
+
+      case NODE_CLOSURE_HOLDOUT:
+        svm_node_closure_holdout(sd, stack, node);
+        break;
+      case NODE_FRESNEL:
+        svm_node_fresnel(sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_LAYER_WEIGHT:
+        svm_node_layer_weight(sd, stack, node);
+        break;
+      case NODE_CLOSURE_VOLUME:
+        IF_KERNEL_NODES_FEATURE(VOLUME)
+        {
+          svm_node_closure_volume<type>(kg, sd, stack, node);
+        }
+        break;
+      case NODE_PRINCIPLED_VOLUME:
+        IF_KERNEL_NODES_FEATURE(VOLUME)
+        {
+          offset = svm_node_principled_volume<type>(kg, sd, stack, node, path_flag, offset);
+        }
+        break;
+      case NODE_MATH:
+        svm_node_math(kg, sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_VECTOR_MATH:
+        offset = svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, offset);
+        break;
+      case NODE_RGB_RAMP:
+        offset = svm_node_rgb_ramp(kg, sd, stack, node, offset);
+        break;
+      case NODE_GAMMA:
+        svm_node_gamma(sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_BRIGHTCONTRAST:
+        svm_node_brightness(sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_LIGHT_PATH:
+        svm_node_light_path<node_feature_mask>(kg, state, sd, stack, node.y, node.z, path_flag);
+        break;
+      case NODE_OBJECT_INFO:
+        svm_node_object_info(kg, sd, stack, node.y, node.z);
+        break;
+      case NODE_PARTICLE_INFO:
+        svm_node_particle_info(kg, sd, stack, node.y, node.z);
+        break;
 #if defined(__HAIR__)
-      SVM_CASE(NODE_HAIR_INFO)
-      svm_node_hair_info(kg, sd, stack, node.y, node.z);
-      break;
+      case NODE_HAIR_INFO:
+        svm_node_hair_info(kg, sd, stack, node.y, node.z);
+        break;
 #endif
 #if defined(__POINTCLOUD__)
-      SVM_CASE(NODE_POINT_INFO)
-      svm_node_point_info(kg, sd, stack, node.y, node.z);
-      break;
+      case NODE_POINT_INFO:
+        svm_node_point_info(kg, sd, stack, node.y, node.z);
+        break;
 #endif
-      SVM_CASE(NODE_TEXTURE_MAPPING)
-      offset = svm_node_texture_mapping(kg, sd, stack, node.y, node.z, offset);
-      break;
-      SVM_CASE(NODE_MAPPING)
-      svm_node_mapping(kg, sd, stack, node.y, node.z, node.w);
-      break;
-      SVM_CASE(NODE_MIN_MAX)
-      offset = svm_node_min_max(kg, sd, stack, node.y, node.z, offset);
-      break;
-      SVM_CASE(NODE_CAMERA)
-      svm_node_camera(kg, sd, stack, node.y, node.z, node.w);
-      break;
-      SVM_CASE(NODE_TEX_ENVIRONMENT)
-      svm_node_tex_environment(kg, sd, stack, node);
-      break;
-      SVM_CASE(NODE_TEX_SKY)
-      offset = svm_node_tex_sky(kg, sd, stack, node, offset);
-      break;
-      SVM_CASE(NODE_TEX_GRADIENT)
-      svm_node_tex_gradient(sd, stack, node);
-      break;
-      SVM_CASE(NODE_TEX_VORONOI)
-      offset = svm_node_tex_voronoi<node_feature_mask>(
-          kg, sd, stack, node.y, node.z, node.w, offset);
-      break;
-      SVM_CASE(NODE_TEX_MUSGRAVE)
-      offset = svm_node_tex_musgrave(kg, sd, stack, node.y, node.z, node.w, offset);
-      break;
-      SVM_CASE(NODE_TEX_WAVE)
-      offset = svm_node_tex_wave(kg, sd, stack, node, offset);
-      break;
-      SVM_CASE(NODE_TEX_MAGIC)
-      offset = svm_node_tex_magic(kg, sd, stack, node, offset);
-      break;
-      SVM_CASE(NODE_TEX_CHECKER)
-      svm_node_tex_checker(kg, sd, stack, node);
-      break;
-      SVM_CASE(NODE_TEX_BRICK)
-      offset = svm_node_tex_brick(kg, sd, stack, node, offset);
-      break;
-      SVM_CASE(NODE_TEX_WHITE_NOISE)
-      svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w);
-      break;
-      SVM_CASE(NODE_NORMAL)
-      offset = svm_node_normal(kg, sd, stack, node.y, node.z, node.w, offset);
-      break;
-      SVM_CASE(NODE_LIGHT_FALLOFF)
-      svm_node_light_falloff(sd, stack, node);
-      break;
-      SVM_CASE(NODE_IES)
-      svm_node_ies(kg, sd, stack, node);
-      break;
-      SVM_CASE(NODE_CURVES)
-      offset = svm_node_curves(kg, sd, stack, node, offset);
-      break;
-      SVM_CASE(NODE_FLOAT_CURVE)
-      offset = svm_node_curve(kg, sd, stack, node, offset);
-      break;
-      SVM_CASE(NODE_TANGENT)
-      svm_node_tangent(kg, sd, stack, node);
-      break;
-      SVM_CASE(NODE_NORMAL_MAP)
-      svm_node_normal_map(kg, sd, stack, node);
-      break;
-      SVM_CASE(NODE_INVERT)
-      svm_node_invert(sd, stack, node.y, node.z, node.w);
-      break;
-      SVM_CASE(NODE_MIX)
-      offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset);
-      break;
-      SVM_CASE(NODE_SEPARATE_COLOR)
-      svm_node_separate_color(kg, sd, stack, node.y, node.z, node.w);
-      break;
-      SVM_CASE(NODE_COMBINE_COLOR)
-      svm_node_combine_color(kg, sd, stack, node.y, node.z, node.w);
-      break;
-      SVM_CASE(NODE_SEPARATE_VECTOR)
-      svm_node_separate_vector(sd, stack, node.y, node.z, node.w);
-      break;
-      SVM_CASE(NODE_COMBINE_VECTOR)
-      svm_node_combine_vector(sd, stack, node.y, node.z, node.w);
-      break;
-      SVM_CASE(NODE_SEPARATE_HSV)
-      offset = svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, offset);
-      break;
-      SVM_CASE(NODE_COMBINE_HSV)
-      offset = svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, offset);
-      break;
-      SVM_CASE(NODE_VECTOR_ROTATE)
-      svm_node_vector_rotate(sd, stack, node.y, node.z, node.w);
-      break;
-      SVM_CASE(NODE_VECTOR_TRANSFORM)
-      svm_node_vector_transform(kg, sd, stack, node);
-      break;
-      SVM_CASE(NODE_WIREFRAME)
-      svm_node_wireframe(kg, sd, stack, node);
-      break;
-      SVM_CASE(NODE_WAVELENGTH)
-      svm_node_wavelength(kg, sd, stack, node.y, node.z);
-      break;
-      SVM_CASE(NODE_BLACKBODY)
-      svm_node_blackbody(kg, sd, stack, node.y, node.z);
-      break;
-      SVM_CASE(NODE_MAP_RANGE)
-      offset = svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, offset);
-      break;
-      SVM_CASE(NODE_VECTOR_MAP_RANGE)
-      offset = svm_node_vector_map_range(kg, sd, stack, node.y, node.z, node.w, offset);
-      break;
-      SVM_CASE(NODE_CLAMP)
-      offset = svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, offset);
-      break;
+      case NODE_TEXTURE_MAPPING:
+        offset = svm_node_texture_mapping(kg, sd, stack, node.y, node.z, offset);
+        break;
+      case NODE_MAPPING:
+        svm_node_mapping(kg, sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_MIN_MAX:
+        offset = svm_node_min_max(kg, sd, stack, node.y, node.z, offset);
+        break;
+      case NODE_CAMERA:
+        svm_node_camera(kg, sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_TEX_ENVIRONMENT:
+        svm_node_tex_environment(kg, sd, stack, node);
+        break;
+      case NODE_TEX_SKY:
+        offset = svm_node_tex_sky(kg, sd, stack, node, offset);
+        break;
+      case NODE_TEX_GRADIENT:
+        svm_node_tex_gradient(sd, stack, node);
+        break;
+      case NODE_TEX_VORONOI:
+        offset = svm_node_tex_voronoi<node_feature_mask>(
+            kg, sd, stack, node.y, node.z, node.w, offset);
+        break;
+      case NODE_TEX_MUSGRAVE:
+        offset = svm_node_tex_musgrave(kg, sd, stack, node.y, node.z, node.w, offset);
+        break;
+      case NODE_TEX_WAVE:
+        offset = svm_node_tex_wave(kg, sd, stack, node, offset);
+        break;
+      case NODE_TEX_MAGIC:
+        offset = svm_node_tex_magic(kg, sd, stack, node, offset);
+        break;
+      case NODE_TEX_CHECKER:
+        svm_node_tex_checker(kg, sd, stack, node);
+        break;
+      case NODE_TEX_BRICK:
+        offset = svm_node_tex_brick(kg, sd, stack, node, offset);
+        break;
+      case NODE_TEX_WHITE_NOISE:
+        svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_NORMAL:
+        offset = svm_node_normal(kg, sd, stack, node.y, node.z, node.w, offset);
+        break;
+      case NODE_LIGHT_FALLOFF:
+        svm_node_light_falloff(sd, stack, node);
+        break;
+      case NODE_IES:
+        svm_node_ies(kg, sd, stack, node);
+        break;
+      case NODE_RGB_CURVES:
+      case NODE_VECTOR_CURVES:
+        offset = svm_node_curves(kg, sd, stack, node, offset);
+        break;
+      case NODE_FLOAT_CURVE:
+        offset = svm_node_curve(kg, sd, stack, node, offset);
+        break;
+      case NODE_TANGENT:
+        svm_node_tangent(kg, sd, stack, node);
+        break;
+      case NODE_NORMAL_MAP:
+        svm_node_normal_map(kg, sd, stack, node);
+        break;
+      case NODE_INVERT:
+        svm_node_invert(sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_MIX:
+        offset = svm_node_mix(kg, sd, stack, node.y, node.z, node.w, offset);
+        break;
+      case NODE_SEPARATE_COLOR:
+        svm_node_separate_color(kg, sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_COMBINE_COLOR:
+        svm_node_combine_color(kg, sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_SEPARATE_VECTOR:
+        svm_node_separate_vector(sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_COMBINE_VECTOR:
+        svm_node_combine_vector(sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_SEPARATE_HSV:
+        offset = svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, offset);
+        break;
+      case NODE_COMBINE_HSV:
+        offset = svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, offset);
+        break;
+      case NODE_VECTOR_ROTATE:
+        svm_node_vector_rotate(sd, stack, node.y, node.z, node.w);
+        break;
+      case NODE_VECTOR_TRANSFORM:
+        svm_node_vector_transform(kg, sd, stack, node);
+        break;
+      case NODE_WIREFRAME:
+        svm_node_wireframe(kg, sd, stack, node);
+        break;
+      case NODE_WAVELENGTH:
+        svm_node_wavelength(kg, sd, stack, node.y, node.z);
+        break;
+      case NODE_BLACKBODY:
+        svm_node_blackbody(kg, sd, stack, node.y, node.z);
+        break;
+      case NODE_MAP_RANGE:
+        offset = svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, offset);
+        break;
+      case NODE_VECTOR_MAP_RANGE:
+        offset = svm_node_vector_map_range(kg, sd, stack, node.y, node.z, node.w, offset);
+        break;
+      case NODE_CLAMP:
+        offset = svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, offset);
+        break;
 #ifdef __SHADER_RAYTRACE__
-      SVM_CASE(NODE_BEVEL)
-      svm_node_bevel<node_feature_mask>(kg, state, sd, stack, node);
-      break;
-      SVM_CASE(NODE_AMBIENT_OCCLUSION)
-      svm_node_ao<node_feature_mask>(kg, state, sd, stack, node);
-      break;
+      case NODE_BEVEL:
+        svm_node_bevel<node_feature_mask>(kg, state, sd, stack, node);
+        break;
+      case NODE_AMBIENT_OCCLUSION:
+        svm_node_ao<node_feature_mask>(kg, state, sd, stack, node);
+        break;
 #endif

-      SVM_CASE(NODE_TEX_VOXEL)
-      IF_KERNEL_NODES_FEATURE(VOLUME)
-      {
-        offset = svm_node_tex_voxel(kg, sd, stack, node, offset);
-      }
-      break;
-      SVM_CASE(NODE_AOV_START)
-      if (!svm_node_aov_check(path_flag, render_buffer)) {
-        return;
-      }
-      break;
-      SVM_CASE(NODE_AOV_COLOR)
-      svm_node_aov_color<node_feature_mask>(kg, state, sd, stack, node, render_buffer);
-      break;
-      SVM_CASE(NODE_AOV_VALUE)
-      svm_node_aov_value<node_feature_mask>(kg, state, sd, stack, node, render_buffer);
-      break;
+      case NODE_TEX_VOXEL:
+        IF_KERNEL_NODES_FEATURE(VOLUME)
+        {
+          offset = svm_node_tex_voxel(kg, sd, stack, node, offset);
+        }
+        break;
+      case NODE_AOV_START:
+        if (!svm_node_aov_check(path_flag, render_buffer)) {
+          return;
+        }
+        break;
+      case NODE_AOV_COLOR:
+        svm_node_aov_color<node_feature_mask>(kg, state, sd, stack, node, render_buffer);
+        break;
+      case NODE_AOV_VALUE:
+        svm_node_aov_value<node_feature_mask>(kg, state, sd, stack, node, render_buffer);
+        break;
      default:
        kernel_assert(!"Unknown node type was passed to the SVM machine");
        return;
--- a/intern/cycles/kernel/svm/tex_coord.h
+++ b/intern/cycles/kernel/svm/tex_coord.h
@@ -138,7 +138,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg,
    case NODE_TEXCO_WINDOW: {
      if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
          kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
-        data = camera_world_to_ndc(kg, sd, sd->ray_P);
+        data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(sd->ray_dP, 0.0f, 0.0f));
      else
        data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx);
      data.z = 0.0f;
@@ -223,7 +223,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg,
    case NODE_TEXCO_WINDOW: {
      if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
          kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
-        data = camera_world_to_ndc(kg, sd, sd->ray_P);
+        data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(0.0f, sd->ray_dP, 0.0f));
      else
        data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy);
      data.z = 0.0f;
--- a/intern/cycles/kernel/svm/types.h
+++ b/intern/cycles/kernel/svm/types.h
@@ -17,9 +17,104 @@ CCL_NAMESPACE_BEGIN
 /* Nodes */

 typedef enum ShaderNodeType {
-#define SHADER_NODE_TYPE(name) name,
-#include "node_types_template.h"
-  NODE_NUM
+  NODE_END = 0,
+  NODE_SHADER_JUMP,
+  NODE_CLOSURE_BSDF,
+  NODE_CLOSURE_EMISSION,
+  NODE_CLOSURE_BACKGROUND,
+  NODE_CLOSURE_SET_WEIGHT,
+  NODE_CLOSURE_WEIGHT,
+  NODE_EMISSION_WEIGHT,
+  NODE_MIX_CLOSURE,
+  NODE_JUMP_IF_ZERO,
+  NODE_JUMP_IF_ONE,
+  NODE_GEOMETRY,
+  NODE_CONVERT,
+  NODE_TEX_COORD,
+  NODE_VALUE_F,
+  NODE_VALUE_V,
+  NODE_ATTR,
+  NODE_VERTEX_COLOR,
+  NODE_GEOMETRY_BUMP_DX,
+  NODE_GEOMETRY_BUMP_DY,
+  NODE_SET_DISPLACEMENT,
+  NODE_DISPLACEMENT,
+  NODE_VECTOR_DISPLACEMENT,
+  NODE_TEX_IMAGE,
+  NODE_TEX_IMAGE_BOX,
+  NODE_TEX_NOISE,
+  NODE_SET_BUMP,
+  NODE_ATTR_BUMP_DX,
+  NODE_ATTR_BUMP_DY,
+  NODE_VERTEX_COLOR_BUMP_DX,
+  NODE_VERTEX_COLOR_BUMP_DY,
+  NODE_TEX_COORD_BUMP_DX,
+  NODE_TEX_COORD_BUMP_DY,
+  NODE_CLOSURE_SET_NORMAL,
+  NODE_ENTER_BUMP_EVAL,
+  NODE_LEAVE_BUMP_EVAL,
+  NODE_HSV,
+  NODE_CLOSURE_HOLDOUT,
+  NODE_FRESNEL,
+  NODE_LAYER_WEIGHT,
+  NODE_CLOSURE_VOLUME,
+  NODE_PRINCIPLED_VOLUME,
+  NODE_MATH,
+  NODE_VECTOR_MATH,
+  NODE_RGB_RAMP,
+  NODE_GAMMA,
+  NODE_BRIGHTCONTRAST,
+  NODE_LIGHT_PATH,
+  NODE_OBJECT_INFO,
+  NODE_PARTICLE_INFO,
+  NODE_HAIR_INFO,
+  NODE_POINT_INFO,
+  NODE_TEXTURE_MAPPING,
+  NODE_MAPPING,
+  NODE_MIN_MAX,
+  NODE_CAMERA,
+  NODE_TEX_ENVIRONMENT,
+  NODE_TEX_SKY,
+  NODE_TEX_GRADIENT,
+  NODE_TEX_VORONOI,
+  NODE_TEX_MUSGRAVE,
+  NODE_TEX_WAVE,
+  NODE_TEX_MAGIC,
+  NODE_TEX_CHECKER,
+  NODE_TEX_BRICK,
+  NODE_TEX_WHITE_NOISE,
+  NODE_NORMAL,
+  NODE_LIGHT_FALLOFF,
+  NODE_IES,
+  NODE_RGB_CURVES,
+  NODE_VECTOR_CURVES,
+  NODE_TANGENT,
+  NODE_NORMAL_MAP,
+  NODE_INVERT,
+  NODE_MIX,
+  NODE_SEPARATE_COLOR,
+  NODE_COMBINE_COLOR,
+  NODE_SEPARATE_VECTOR,
+  NODE_COMBINE_VECTOR,
+  NODE_SEPARATE_HSV,
+  NODE_COMBINE_HSV,
+  NODE_VECTOR_ROTATE,
+  NODE_VECTOR_TRANSFORM,
+  NODE_WIREFRAME,
+  NODE_WAVELENGTH,
+  NODE_BLACKBODY,
+  NODE_MAP_RANGE,
+  NODE_VECTOR_MAP_RANGE,
+  NODE_CLAMP,
+  NODE_BEVEL,
+  NODE_AMBIENT_OCCLUSION,
+  NODE_TEX_VOXEL,
+  NODE_AOV_START,
+  NODE_AOV_COLOR,
+  NODE_AOV_VALUE,
+  NODE_FLOAT_CURVE,
+  /* NOTE: for best OpenCL performance, item definition in the enum must
+   * match the switch case order in `svm.h`. */
 } ShaderNodeType;

 typedef enum NodeAttributeOutputType {
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -535,8 +535,7 @@ typedef struct RaySelfPrimitives {
 typedef struct Ray {
  float3 P;   /* origin */
  float3 D;   /* direction */
-  float tmin; /* start distance */
-  float tmax; /* end distance */
+  float t;    /* length of the ray */
  float time; /* time (for motion blur) */

  RaySelfPrimitives self;
@@ -1073,6 +1072,94 @@ typedef struct KernelCamera {
 } KernelCamera;
 static_assert_align(KernelCamera, 16);

+typedef struct KernelFilm {
+  float exposure;
+  int pass_flag;
+
+  int light_pass_flag;
+  int pass_stride;
+
+  int pass_combined;
+  int pass_depth;
+  int pass_position;
+  int pass_normal;
+  int pass_roughness;
+  int pass_motion;
+
+  int pass_motion_weight;
+  int pass_uv;
+  int pass_object_id;
+  int pass_material_id;
+
+  int pass_diffuse_color;
+  int pass_glossy_color;
+  int pass_transmission_color;
+
+  int pass_diffuse_indirect;
+  int pass_glossy_indirect;
+  int pass_transmission_indirect;
+  int pass_volume_indirect;
+
+  int pass_diffuse_direct;
+  int pass_glossy_direct;
+  int pass_transmission_direct;
+  int pass_volume_direct;
+
+  int pass_emission;
+  int pass_background;
+  int pass_ao;
+  float pass_alpha_threshold;
+
+  int pass_shadow;
+  float pass_shadow_scale;
+
+  int pass_shadow_catcher;
+  int pass_shadow_catcher_sample_count;
+  int pass_shadow_catcher_matte;
+
+  int filter_table_offset;
+
+  int cryptomatte_passes;
+  int cryptomatte_depth;
+  int pass_cryptomatte;
+
+  int pass_adaptive_aux_buffer;
+  int pass_sample_count;
+
+  int pass_mist;
+  float mist_start;
+  float mist_inv_depth;
+  float mist_falloff;
+
+  int pass_denoising_normal;
+  int pass_denoising_albedo;
+  int pass_denoising_depth;
+
+  int pass_aov_color;
+  int pass_aov_value;
+  int pass_lightgroup;
+
+  /* XYZ to rendering color space transform. float4 instead of float3 to
+   * ensure consistent padding/alignment across devices. */
+  float4 xyz_to_r;
+  float4 xyz_to_g;
+  float4 xyz_to_b;
+  float4 rgb_to_y;
+  /* Rec709 to rendering color space. */
+  float4 rec709_to_r;
+  float4 rec709_to_g;
+  float4 rec709_to_b;
+  int is_rec709;
+
+  int pass_bake_primitive;
+  int pass_bake_differential;
+
+  int use_approximate_shadow_catcher;
+
+  int pad1;
+} KernelFilm;
+static_assert_align(KernelFilm, 16);
+
 typedef struct KernelFilmConvert {
  int pass_offset;
  int pass_stride;
@@ -1114,6 +1201,108 @@ typedef struct KernelFilmConvert {
 } KernelFilmConvert;
 static_assert_align(KernelFilmConvert, 16);

+typedef struct KernelBackground {
+  /* only shader index */
+  int surface_shader;
+  int volume_shader;
+  float volume_step_size;
+  int transparent;
+  float transparent_roughness_squared_threshold;
+
+  /* portal sampling */
+  float portal_weight;
+  int num_portals;
+  int portal_offset;
+
+  /* sun sampling */
+  float sun_weight;
+  /* xyz store direction, w the angle. float4 instead of float3 is used
+   * to ensure consistent padding/alignment across devices. */
+  float4 sun;
+
+  /* map sampling */
+  float map_weight;
+  int map_res_x;
+  int map_res_y;
+
+  int use_mis;
+
+  int lightgroup;
+
+  /* Padding */
+  int pad1, pad2;
+} KernelBackground;
+static_assert_align(KernelBackground, 16);
+
+typedef struct KernelIntegrator {
+  /* emission */
+  int use_direct_light;
+  int num_distribution;
+  int num_all_lights;
+  float pdf_triangles;
+  float pdf_lights;
+  float light_inv_rr_threshold;
+
+  /* bounces */
+  int min_bounce;
+  int max_bounce;
+
+  int max_diffuse_bounce;
+  int max_glossy_bounce;
+  int max_transmission_bounce;
+  int max_volume_bounce;
+
+  /* AO bounces */
+  int ao_bounces;
+  float ao_bounces_distance;
+  float ao_bounces_factor;
+  float ao_additive_factor;
+
+  /* transparent */
+  int transparent_min_bounce;
+  int transparent_max_bounce;
+  int transparent_shadows;
+
+  /* caustics */
+  int caustics_reflective;
+  int caustics_refractive;
+  float filter_glossy;
+
+  /* seed */
+  int seed;
+
+  /* clamp */
+  float sample_clamp_direct;
+  float sample_clamp_indirect;
+
+  /* mis */
+  int use_lamp_mis;
+
+  /* caustics */
+  int use_caustics;
+
+  /* sampler */
+  int sampling_pattern;
+
+  /* volume render */
+  int use_volumes;
+  int volume_max_steps;
+  float volume_step_rate;
+
+  int has_shadow_catcher;
+  float scrambling_distance;
+
+  /* Closure filter. */
+  int filter_closures;
+
+  /* MIS debugging. */
+  int direct_light_sampling_type;
+
+  /* padding */
+  int pad1;
+} KernelIntegrator;
+static_assert_align(KernelIntegrator, 16);
+
 typedef enum KernelBVHLayout {
  BVH_LAYOUT_NONE = 0,

@@ -1131,25 +1320,36 @@ typedef enum KernelBVHLayout {
  BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX | BVH_LAYOUT_METAL,
 } KernelBVHLayout;

-/* Specialized struct that can become constants in dynamic compilation. */
-#define KERNEL_STRUCT_BEGIN(name, parent) struct name {
-#define KERNEL_STRUCT_END(name) \
-  } \
-  ; \
-  static_assert_align(name, 16);
+typedef struct KernelBVH {
+  /* Own BVH */
+  int root;
+  int have_motion;
+  int have_curves;
+  int bvh_layout;
+  int use_bvh_steps;
+  int curve_subdivisions;

-#ifdef __KERNEL_USE_DATA_CONSTANTS__
-#  define KERNEL_STRUCT_MEMBER(parent, type, name) type __unused_##name;
+  /* Custom BVH */
+#ifdef __KERNEL_OPTIX__
+  OptixTraversableHandle scene;
+#elif defined __METALRT__
+  metalrt_as_type scene;
 #else
-#  define KERNEL_STRUCT_MEMBER(parent, type, name) type name;
+#  ifdef __EMBREE__
+  RTCScene scene;
+#    ifndef __KERNEL_64_BIT__
+  int pad2;
+#    endif
+#  else
+  int scene, pad2;
+#  endif
 #endif
-
-#include "kernel/data_template.h"
+} KernelBVH;
+static_assert_align(KernelBVH, 16);

 typedef struct KernelTables {
  int beckmann_offset;
-  int filter_table_offset;
-  int pad1, pad2;
+  int pad1, pad2, pad3;
 } KernelTables;
 static_assert_align(KernelTables, 16);

@@ -1162,37 +1362,18 @@ typedef struct KernelBake {
 static_assert_align(KernelBake, 16);

 typedef struct KernelData {
-  /* Features and limits. */
  uint kernel_features;
  uint max_closures;
  uint max_shaders;
  uint volume_stack_size;

-  /* Always dynamic data mambers. */
  KernelCamera cam;
-  KernelBake bake;
+  KernelFilm film;
+  KernelBackground background;
+  KernelIntegrator integrator;
+  KernelBVH bvh;
  KernelTables tables;
-
-  /* Potentially specialized data members. */
-#define KERNEL_STRUCT_BEGIN(name, parent) name parent;
-#include "kernel/data_template.h"
-
-  /* Device specific BVH. */
-#ifdef __KERNEL_OPTIX__
-  OptixTraversableHandle device_bvh;
-#elif defined __METALRT__
-  metalrt_as_type device_bvh;
-#else
-#  ifdef __EMBREE__
-  RTCScene device_bvh;
-#    ifndef __KERNEL_64_BIT__
-  int pad1;
-#    endif
-#  else
-  int device_bvh, pad1;
-#  endif
-#endif
-  int pad2, pad3;
+  KernelBake bake;
 } KernelData;
 static_assert_align(KernelData, 16);

--- a/intern/cycles/scene/film.cpp
+++ b/intern/cycles/scene/film.cpp
@@ -394,7 +394,7 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
  vector<float> table = filter_table(filter_type, filter_width);
  scene->lookup_tables->remove_table(&filter_table_offset_);
  filter_table_offset_ = scene->lookup_tables->add_table(dscene, table);
-  dscene->data.tables.filter_table_offset = (int)filter_table_offset_;
+  kfilm->filter_table_offset = (int)filter_table_offset_;

  /* mist pass parameters */
  kfilm->mist_start = mist_start;
--- a/intern/cycles/scene/geometry.cpp
+++ b/intern/cycles/scene/geometry.cpp
@@ -1362,7 +1362,7 @@ void GeometryManager::device_update_bvh(Device *device,
  dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
  dscene->data.bvh.curve_subdivisions = scene->params.curve_subdivisions();
  /* The scene handle is set in 'CPUDevice::const_copy_to' and 'OptiXDevice::const_copy_to' */
-  dscene->data.device_bvh = 0;
+  dscene->data.bvh.scene = 0;
 }

 /* Set of flags used to help determining what data has been modified or needs reallocation, so we
--- a/intern/cycles/scene/scene.cpp
+++ b/intern/cycles/scene/scene.cpp
@@ -369,8 +369,6 @@ void Scene::device_update(Device *device_, Progress &progress)
    device->const_copy_to("data", &dscene.data, sizeof(dscene.data));
  }

-  device->optimize_for_scene(this);
-
  if (print_stats) {
    size_t mem_used = util_guarded_get_mem_used();
    size_t mem_peak = util_guarded_get_mem_peak();
--- a/intern/cycles/scene/scene.h
+++ b/intern/cycles/scene/scene.h
@@ -82,7 +82,7 @@ class DeviceScene {

  device_vector<uint> patches;

-  /* point-cloud */
+  /* pointcloud */
  device_vector<float4> points;
  device_vector<uint> points_shader;

@@ -124,7 +124,7 @@ class DeviceScene {
  /* integrator */
  device_vector<float> sample_pattern_lut;

-  /* IES lights */
+  /* ies lights */
  device_vector<float> ies_lights;

  KernelData data;
--- a/intern/cycles/scene/shader_nodes.cpp
+++ b/intern/cycles/scene/shader_nodes.cpp
@@ -6671,7 +6671,7 @@ void CurvesNode::compile(SVMCompiler &compiler,

  ShaderInput *fac_in = input("Fac");

-  compiler.add_node(ShaderNodeType(type),
+  compiler.add_node(type,
                    compiler.encode_uchar4(compiler.stack_assign(fac_in),
                                           compiler.stack_assign(value_in),
                                           compiler.stack_assign(value_out),
@@ -6736,7 +6736,7 @@ void RGBCurvesNode::constant_fold(const ConstantFolder &folder)

 void RGBCurvesNode::compile(SVMCompiler &compiler)
 {
-  CurvesNode::compile(compiler, NODE_CURVES, input("Color"), output("Color"));
+  CurvesNode::compile(compiler, NODE_RGB_CURVES, input("Color"), output("Color"));
 }

 void RGBCurvesNode::compile(OSLCompiler &compiler)
@@ -6774,7 +6774,7 @@ void VectorCurvesNode::constant_fold(const ConstantFolder &folder)

 void VectorCurvesNode::compile(SVMCompiler &compiler)
 {
-  CurvesNode::compile(compiler, NODE_CURVES, input("Vector"), output("Vector"));
+  CurvesNode::compile(compiler, NODE_VECTOR_CURVES, input("Vector"), output("Vector"));
 }

 void VectorCurvesNode::compile(OSLCompiler &compiler)
--- a/intern/cycles/scene/svm.cpp
+++ b/intern/cycles/scene/svm.cpp
@@ -44,6 +44,8 @@ void SVMShaderManager::device_update_shader(Scene *scene,
  }
  assert(shader->graph);

+  svm_nodes->push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
+
  SVMCompiler::Summary summary;
  SVMCompiler compiler(scene);
  compiler.background = (shader == scene->background->get_shader(scene));
@@ -168,9 +170,6 @@ SVMCompiler::SVMCompiler(Scene *scene) : scene(scene)
  background = false;
  mix_weight_offset = SVM_STACK_INVALID;
  compile_failed = false;
-
-  /* This struct has one entry for every node, in order of ShaderNodeType definition. */
-  svm_node_types_used = (std::atomic_int *)&scene->dscene.data.svm_usage;
 }

 int SVMCompiler::stack_size(SocketType::Type type)
@@ -379,13 +378,11 @@ void SVMCompiler::add_node(int a, int b, int c, int d)

 void SVMCompiler::add_node(ShaderNodeType type, int a, int b, int c)
 {
-  svm_node_types_used[type] = true;
  current_svm_nodes.push_back_slow(make_int4(type, a, b, c));
 }

 void SVMCompiler::add_node(ShaderNodeType type, const float3 &f)
 {
-  svm_node_types_used[type] = true;
  current_svm_nodes.push_back_slow(
      make_int4(type, __float_as_int(f.x), __float_as_int(f.y), __float_as_int(f.z)));
 }
@@ -666,7 +663,6 @@ void SVMCompiler::generate_multi_closure(ShaderNode *root_node,
        /* Add instruction to skip closure and its dependencies if mix
         * weight is zero.
         */
-        svm_node_types_used[NODE_JUMP_IF_ONE] = true;
        current_svm_nodes.push_back_slow(make_int4(NODE_JUMP_IF_ONE, 0, stack_assign(facin), 0));
        int node_jump_skip_index = current_svm_nodes.size() - 1;

@@ -682,7 +678,6 @@ void SVMCompiler::generate_multi_closure(ShaderNode *root_node,
        /* Add instruction to skip closure and its dependencies if mix
         * weight is zero.
         */
-        svm_node_types_used[NODE_JUMP_IF_ZERO] = true;
        current_svm_nodes.push_back_slow(make_int4(NODE_JUMP_IF_ZERO, 0, stack_assign(facin), 0));
        int node_jump_skip_index = current_svm_nodes.size() - 1;

@@ -849,9 +844,6 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty

 void SVMCompiler::compile(Shader *shader, array<int4> &svm_nodes, int index, Summary *summary)
 {
-  svm_node_types_used[NODE_SHADER_JUMP] = true;
-  svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
-
  /* copy graph for shader with bump mapping */
  ShaderNode *output = shader->graph->output();
  int start_num_svm_nodes = svm_nodes.size();
--- a/intern/cycles/scene/svm.h
+++ b/intern/cycles/scene/svm.h
@@ -211,7 +211,6 @@ class SVMCompiler {
  /* compile */
  void compile_type(Shader *shader, ShaderGraph *graph, ShaderType type);

-  std::atomic_int *svm_node_types_used;
  array<int4> current_svm_nodes;
  ShaderType current_type;
  Shader *current_shader;
--- a/intern/cycles/util/math_intersect.h
+++ b/intern/cycles/util/math_intersect.h
@@ -10,8 +10,7 @@ CCL_NAMESPACE_BEGIN

 ccl_device bool ray_sphere_intersect(float3 ray_P,
                                     float3 ray_D,
-                                     float ray_tmin,
-                                     float ray_tmax,
+                                     float ray_t,
                                     float3 sphere_P,
                                     float sphere_radius,
                                     ccl_private float3 *isect_P,
@@ -34,7 +33,7 @@ ccl_device bool ray_sphere_intersect(float3 ray_P,
      return false;
    }
    const float t = tp - sqrtf(radiussq - dsq); /* pythagoras */
-    if (t > ray_tmin && t < ray_tmax) {
+    if (t < ray_t) {
      *isect_t = t;
      *isect_P = ray_P + ray_D * t;
      return true;
@@ -45,8 +44,7 @@ ccl_device bool ray_sphere_intersect(float3 ray_P,

 ccl_device bool ray_aligned_disk_intersect(float3 ray_P,
                                           float3 ray_D,
-                                           float ray_tmin,
-                                           float ray_tmax,
+                                           float ray_t,
                                           float3 disk_P,
                                           float disk_radius,
                                           ccl_private float3 *isect_P,
@@ -61,7 +59,7 @@ ccl_device bool ray_aligned_disk_intersect(float3 ray_P,
  }
  /* Compute t to intersection point. */
  const float t = -disk_t / div;
-  if (!(t > ray_tmin && t < ray_tmax)) {
+  if (t < 0.0f || t > ray_t) {
    return false;
  }
  /* Test if within radius. */
@@ -76,8 +74,7 @@ ccl_device bool ray_aligned_disk_intersect(float3 ray_P,

 ccl_device bool ray_disk_intersect(float3 ray_P,
                                   float3 ray_D,
-                                   float ray_tmin,
-                                   float ray_tmax,
+                                   float ray_t,
                                   float3 disk_P,
                                   float3 disk_N,
                                   float disk_radius,
@@ -95,8 +92,7 @@ ccl_device bool ray_disk_intersect(float3 ray_P,
    }
    float3 P = ray_P + t * ray_D;
    float3 T = P - disk_P;
-
-    if (dot(T, T) < sqr(disk_radius) && (t > ray_tmin && t < ray_tmax)) {
+    if (dot(T, T) < sqr(disk_radius) /*&& t > 0.f*/ && t <= ray_t) {
      *isect_P = ray_P + t * ray_D;
      *isect_t = t;
      return true;
@@ -107,8 +103,7 @@ ccl_device bool ray_disk_intersect(float3 ray_P,

 ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
                                                   float3 ray_dir,
-                                                   float ray_tmin,
-                                                   float ray_tmax,
+                                                   float ray_t,
                                                   const float3 tri_a,
                                                   const float3 tri_b,
                                                   const float3 tri_c,
@@ -154,14 +149,16 @@ ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,

  /* Perform depth test. */
  const float T = dot3(v0, Ng);
-  const float t = T / den;
-  if (!(t >= ray_tmin && t <= ray_tmax)) {
+  const int sign_den = (__float_as_int(den) & 0x80000000);
+  const float sign_T = xor_signmask(T, sign_den);
+  if ((sign_T < 0.0f) || (sign_T > ray_t * xor_signmask(den, sign_den))) {
    return false;
  }

-  *isect_u = U / den;
-  *isect_v = V / den;
-  *isect_t = t;
+  const float inv_den = 1.0f / den;
+  *isect_u = U * inv_den;
+  *isect_v = V * inv_den;
+  *isect_t = T * inv_den;
  return true;

 #undef dot3
@@ -174,8 +171,8 @@ ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
 */
 ccl_device bool ray_quad_intersect(float3 ray_P,
                                   float3 ray_D,
-                                   float ray_tmin,
-                                   float ray_tmax,
+                                   float ray_mint,
+                                   float ray_maxt,
                                   float3 quad_P,
                                   float3 quad_u,
                                   float3 quad_v,
@@ -188,7 +185,7 @@ ccl_device bool ray_quad_intersect(float3 ray_P,
 {
  /* Perform intersection test. */
  float t = -(dot(ray_P, quad_n) - dot(quad_P, quad_n)) / dot(ray_D, quad_n);
-  if (!(t > ray_tmin && t < ray_tmax)) {
+  if (t < ray_mint || t > ray_maxt) {
    return false;
  }
  const float3 hit = ray_P + t * ray_D;
--- a/intern/cycles/util/string.cpp
+++ b/intern/cycles/util/string.cpp
@@ -136,19 +136,6 @@ void string_replace(string &haystack, const string &needle, const string &other)
  }
 }

-void string_replace_same_length(string &haystack, const string &needle, const string &other)
-{
-  assert(needle.size() == other.size());
-  size_t pos = 0;
-  while (pos != string::npos) {
-    pos = haystack.find(needle, pos);
-    if (pos != string::npos) {
-      memcpy(haystack.data() + pos, other.data(), other.size());
-      pos += other.size();
-    }
-  }
-}
-
 string string_remove_trademark(const string &s)
 {
  string result = s;
@@ -177,11 +164,6 @@ string to_string(const char *str)
  return string(str);
 }

-string to_string(const float4 &v)
-{
-  return string_printf("%f,%f,%f,%f", v.x, v.y, v.z, v.w);
-}
-
 string string_to_lower(const string &s)
 {
  string r = s;
--- a/intern/cycles/util/string.h
+++ b/intern/cycles/util/string.h
@@ -38,14 +38,12 @@ void string_split(vector<string> &tokens,
                  const string &separators = "\t ",
                  bool skip_empty_tokens = true);
 void string_replace(string &haystack, const string &needle, const string &other);
-void string_replace_same_length(string &haystack, const string &needle, const string &other);
 bool string_startswith(string_view s, string_view start);
 bool string_endswith(string_view s, string_view end);
 string string_strip(const string &s);
 string string_remove_trademark(const string &s);
 string string_from_bool(const bool var);
 string to_string(const char *str);
-string to_string(const float4 &v);
 string string_to_lower(const string &s);

 /* Wide char strings are only used on Windows to deal with non-ASCII
--- a/intern/ghost/CMakeLists.txt
+++ b/intern/ghost/CMakeLists.txt
@@ -3,7 +3,6 @@

 set(INC
  .
-  ../clog
  ../glew-mx
  ../../source/blender/imbuf
  ../../source/blender/makesdna
@@ -66,8 +65,6 @@ set(SRC
  intern/GHOST_Util.h
  intern/GHOST_Window.h
  intern/GHOST_WindowManager.h
-  intern/GHOST_utildefines.h
-  intern/GHOST_utildefines_variadic.h
 )

 set(LIB
--- a/intern/ghost/GHOST_Types.h
+++ b/intern/ghost/GHOST_Types.h
@@ -547,15 +547,21 @@ typedef struct {
  /** The key code. */
  GHOST_TKey key;

+  /* ascii / utf8: both should always be set when possible,
+   * - ascii may be '\0' however if the user presses a non ascii key
+   * - unicode may not be set if the system has no unicode support
+   *
+   * These values are intended to be used as follows.
+   * For text input use unicode when available, fallback to ascii.
+   * For areas where unicode is not needed, number input for example, always
+   * use ascii, unicode is ignored - campbell.
+   */
+  /** The ascii code for the key event ('\0' if none). */
+  char ascii;
  /** The unicode character. if the length is 6, not NULL terminated if all 6 are set. */
  char utf8_buf[6];

-  /**
-   * Enabled when the key is held (auto-repeat).
-   * In this case press events are sent without a corresponding release/up event.
-   *
-   * All back-ends must set this variable for correct behavior regarding repeatable keys.
-   */
+  /** Generated by auto-repeat. */
  char is_repeat;
 } GHOST_TEventKeyData;

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Hans Goudey	b39cd9a849	Test: Retrieve original mesh information from the editmesh	2022-07-07 17:25:36 -05:00
Hans Goudey	e142387b2f	Threadsafe access to face corner normals	2022-07-07 17:09:47 -05:00
Hans Goudey	349682e5bf	Cleanup to naming and other small tweaks	2022-07-07 16:32:02 -05:00
Hans Goudey	01a223fe0c	D14864 from Jacques	2022-07-07 13:49:09 -05:00