From 378d37ed3d8c99e9c2656dea77736bf859ab6478 Mon Sep 17 00:00:00 2001
From: Jun Mizutani <jmztn@noreply.localhost>
Date: Mon, 25 Mar 2024 09:25:24 +0100
Subject: [PATCH 01/36] Fix #119812: 'Add Child Collection' adds a brother bone
 collection

Change label from "Add Child Collection" to "Add Bone Collection", as the
newly added collection becomes a sibling, not a child, of the active one.

Pull Request: https://projects.blender.org/blender/blender/pulls/119845
---
 scripts/startup/bl_ui/properties_data_armature.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/startup/bl_ui/properties_data_armature.py b/scripts/startup/bl_ui/properties_data_armature.py
index 7f14f4bd75b..a8582708dd4 100644
--- a/scripts/startup/bl_ui/properties_data_armature.py
+++ b/scripts/startup/bl_ui/properties_data_armature.py
@@ -158,7 +158,7 @@ class ARMATURE_MT_collection_tree_context_menu(Menu):
         # editable or not. That means this menu has to do the disabling for it.
         sub = layout.column()
         sub.enabled = not active_bcoll_is_locked
-        sub.operator("armature.collection_add", text="Add Child Collection")
+        sub.operator("armature.collection_add", text="Add Bone Collection")
         sub.operator("armature.collection_remove")
         sub.operator("armature.collection_remove_unused", text="Remove Unused Collections")
 
-- 
2.30.2


From 8f015d3bfca5237c81dd01d2ce2e486ff68f83c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sybren=20A=2E=20St=C3=BCvel?= <sybren@blender.org>
Date: Mon, 25 Mar 2024 10:18:13 +0100
Subject: [PATCH 02/36] Modeling: Merge Vertices, correct spelling of report

Fix the "Merge Vertices" report, replacing "vertice(s)" with either
"vertex" or "vertices". The singular "vertice" is not a word in English,
and thus the regular "append (s)" approach is incorrect.

Pull Request: https://projects.blender.org/blender/blender/pulls/119863
---
 source/blender/editors/mesh/editmesh_tools.cc | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/source/blender/editors/mesh/editmesh_tools.cc b/source/blender/editors/mesh/editmesh_tools.cc
index 37c7c5c9628..673c69d1cd1 100644
--- a/source/blender/editors/mesh/editmesh_tools.cc
+++ b/source/blender/editors/mesh/editmesh_tools.cc
@@ -3584,7 +3584,10 @@ static int edbm_remove_doubles_exec(bContext *C, wmOperator *op)
     }
   }
 
-  BKE_reportf(op->reports, RPT_INFO, "Removed %d vertice(s)", count_multi);
+  BKE_reportf(op->reports,
+              RPT_INFO,
+              count_multi == 1 ? "Removed %d vertex" : "Removed %d vertices",
+              count_multi);
 
   return OPERATOR_FINISHED;
 }
-- 
2.30.2


From cc580136c687cf07c5500ae6165a923eb14ba32c Mon Sep 17 00:00:00 2001
From: Jun Mizutani <jmztn@noreply.localhost>
Date: Mon, 25 Mar 2024 09:25:24 +0100
Subject: [PATCH 03/36] Fix #119812: 'Add Child Collection' adds a brother bone
 collection

Change label from "Add Child Collection" to "Add Bone Collection", as the
newly added collection becomes a sibling, not a child, of the active one.

Pull Request: https://projects.blender.org/blender/blender/pulls/119845
---
 scripts/startup/bl_ui/properties_data_armature.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/startup/bl_ui/properties_data_armature.py b/scripts/startup/bl_ui/properties_data_armature.py
index d565c0cb7b7..2ba371ef0af 100644
--- a/scripts/startup/bl_ui/properties_data_armature.py
+++ b/scripts/startup/bl_ui/properties_data_armature.py
@@ -165,7 +165,7 @@ class ARMATURE_MT_collection_tree_context_menu(Menu):
         # editable or not. That means this menu has to do the disabling for it.
         sub = layout.column()
         sub.enabled = not active_bcoll_is_locked
-        sub.operator("armature.collection_add", text="Add Child Collection")
+        sub.operator("armature.collection_add", text="Add Bone Collection")
         sub.operator("armature.collection_remove")
         sub.operator("armature.collection_remove_unused", text="Remove Unused Collections")
 
-- 
2.30.2


From 7e2d54e786de74eb2e96d8e32cf2db53e5f7112f Mon Sep 17 00:00:00 2001
From: Sergey Sharybin <sergey@blender.org>
Date: Mon, 25 Mar 2024 11:28:28 +0100
Subject: [PATCH 04/36] Fix: Assert when exiting Metal rendered viewport

This commit fixes the following assert:
  mtl_command_buffer.mm:165, submit(), at 'MTLBackend::get()->is_inside_render_boundary()'

It happens when toggling rendered state of viewport on macOS, and is
caused by incorrect order of setting active GPU context to null and
calling GPU_render_end.

This change makes the flow of GPU_render_{beign, end} and GPU context
activation closer to what it is in the draw manager's functions
DRW_render_context_{enable, disable}.

Pull Request: https://projects.blender.org/blender/blender/pulls/119868
---
 source/blender/render/intern/engine.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/blender/render/intern/engine.cc b/source/blender/render/intern/engine.cc
index 5c68fc0ee98..d079fc1b70a 100644
--- a/source/blender/render/intern/engine.cc
+++ b/source/blender/render/intern/engine.cc
@@ -1314,8 +1314,8 @@ bool RE_engine_gpu_context_enable(RenderEngine *engine)
     /* Activate RenderEngine System and Blender GPU Context. */
     WM_system_gpu_context_activate(engine->system_gpu_context);
     if (engine->blender_gpu_context) {
-      GPU_context_active_set(engine->blender_gpu_context);
       GPU_render_begin();
+      GPU_context_active_set(engine->blender_gpu_context);
     }
     return true;
   }
@@ -1330,8 +1330,8 @@ void RE_engine_gpu_context_disable(RenderEngine *engine)
   else {
     if (engine->system_gpu_context) {
       if (engine->blender_gpu_context) {
-        GPU_render_end();
         GPU_context_active_set(nullptr);
+        GPU_render_end();
       }
       WM_system_gpu_context_release(engine->system_gpu_context);
       /* Restore DRW state context if previously active. */
-- 
2.30.2


From bffcb000e80b5d1eede74fb0a34c4f09732999ed Mon Sep 17 00:00:00 2001
From: Sergey Sharybin <sergey@blender.org>
Date: Mon, 25 Mar 2024 11:36:15 +0100
Subject: [PATCH 05/36] Fix: Cycles crash on Metal GPU with ASAN builds

Running a very simple files when Blender is built with the
WITH_COMPILER_ASAN=ON and WITH_CYCLES_KERNEL_ASAN=ON CMake options
leads to ASAN reporting an unknown-crash at line where the worker
pool is being filled in.

It is not entirely clear if it is a real issue in the code, since
placing debug prints with `this` address report proper addresses,
however there is no harm on capturing `this` pointer by value and
it does solve the ASAN reporting issues.

It is possible to reproduce the ASAN crash with the following steps:
- Start with --factory-startup
- Enable Metal device in User Preferences
- Switch render device to GPU Compute
- Switch viewport more to Rendered

Pull Request: https://projects.blender.org/blender/blender/pulls/119867
---
 intern/cycles/device/metal/kernel.mm | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index b9da74e2ff3..125c7129de0 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -106,7 +106,7 @@ struct ShaderCache {
 
   friend ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice);
 
-  void compile_thread_func(int thread_index);
+  void compile_thread_func();
 
   using PipelineCollection = std::vector<unique_ptr<MetalKernelPipeline>>;
 
@@ -174,7 +174,7 @@ void ShaderCache::wait_for_all()
   }
 }
 
-void ShaderCache::compile_thread_func(int /*thread_index*/)
+void ShaderCache::compile_thread_func()
 {
   while (running) {
 
@@ -309,7 +309,7 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
 
       metal_printf("Spawning %d Cycles kernel compilation threads\n", max_mtlcompiler_threads);
       for (int i = 0; i < max_mtlcompiler_threads; i++) {
-        compile_threads.push_back(std::thread([&] { compile_thread_func(i); }));
+        compile_threads.push_back(std::thread([this] { this->compile_thread_func(); }));
       }
     }
   }
-- 
2.30.2


From 26337b9fb4fad0ef70656a8ed084eef9e6302ca9 Mon Sep 17 00:00:00 2001
From: Aras Pranckevicius <aras@nesnausk.org>
Date: Mon, 25 Mar 2024 11:40:20 +0100
Subject: [PATCH 06/36] Metal: implement support for compressed textures

Noticed lack of it via #119793. Now DDS images using BC1/BC2/BC3
(aka DXT1/DXT3/DXT5) formats can keep on being GPU compressed
on Metal too, just like e.g. on OpenGL.

Pull Request: https://projects.blender.org/blender/blender/pulls/119835
---
 source/blender/gpu/metal/mtl_texture.mm      | 31 +++++++++++++++++---
 source/blender/gpu/metal/mtl_texture_util.mm | 22 ++++++++++++--
 2 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm
index ca73c206e38..f9d46627ada 100644
--- a/source/blender/gpu/metal/mtl_texture.mm
+++ b/source/blender/gpu/metal/mtl_texture.mm
@@ -527,6 +527,8 @@ void gpu::MTLTexture::update_sub(
     }
   }
 
+  const bool is_compressed = (format_flag_ & GPU_FORMAT_COMPRESSED);
+
   @autoreleasepool {
     /* Determine totalsize of INPUT Data. */
     int num_channels = to_component_len(format_);
@@ -593,10 +595,12 @@ void gpu::MTLTexture::update_sub(
         false /* Not a clear. */
     };
 
-    /* Determine whether we can do direct BLIT or not. */
+    /* Determine whether we can do direct BLIT or not. For compressed textures,
+     * always assume a direct blit (input data pretends to be float, but it is
+     * not). */
     bool can_use_direct_blit = true;
-    if (expected_dst_bytes_per_pixel != input_bytes_per_pixel ||
-        num_channels != destination_num_channels)
+    if (!is_compressed && (expected_dst_bytes_per_pixel != input_bytes_per_pixel ||
+                           num_channels != destination_num_channels))
     {
       can_use_direct_blit = false;
     }
@@ -620,7 +624,7 @@ void gpu::MTLTexture::update_sub(
 
     /* Safety Checks. */
     if (type == GPU_DATA_UINT_24_8 || type == GPU_DATA_10_11_11_REV ||
-        type == GPU_DATA_2_10_10_10_REV)
+        type == GPU_DATA_2_10_10_10_REV || is_compressed)
     {
       BLI_assert(can_use_direct_blit &&
                  "Special input data type must be a 1-1 mapping with destination texture as it "
@@ -755,6 +759,12 @@ void gpu::MTLTexture::update_sub(
                                       extent[0] :
                                       ctx->pipeline_state.unpack_row_length);
           size_t bytes_per_image = bytes_per_row;
+          if (is_compressed) {
+            size_t block_size = to_block_size(format_);
+            size_t blocks_x = divide_ceil_u(extent[0], 4);
+            bytes_per_row = blocks_x * block_size;
+            bytes_per_image = bytes_per_row;
+          }
           int max_array_index = ((type_ == GPU_TEXTURE_1D_ARRAY) ? extent[1] : 1);
           for (int array_index = 0; array_index < max_array_index; array_index++) {
 
@@ -827,6 +837,13 @@ void gpu::MTLTexture::update_sub(
                                       extent[0] :
                                       ctx->pipeline_state.unpack_row_length);
           size_t bytes_per_image = bytes_per_row * extent[1];
+          if (is_compressed) {
+            size_t block_size = to_block_size(format_);
+            size_t blocks_x = divide_ceil_u(extent[0], 4);
+            size_t blocks_y = divide_ceil_u(extent[1], 4);
+            bytes_per_row = blocks_x * block_size;
+            bytes_per_image = bytes_per_row * blocks_y;
+          }
 
           size_t texture_array_relative_offset = 0;
           int base_slice = (type_ == GPU_TEXTURE_2D_ARRAY) ? offset[2] : 0;
@@ -1218,6 +1235,12 @@ void gpu::MTLTexture::ensure_mipmaps(int miplvl)
 
 void gpu::MTLTexture::generate_mipmap()
 {
+  /* Compressed textures allow users to provide their own custom mipmaps. And
+   * we can't generate them at runtime anyway. */
+  if (format_flag_ & GPU_FORMAT_COMPRESSED) {
+    return;
+  }
+
   /* Fetch Active Context. */
   MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
   BLI_assert(ctx);
diff --git a/source/blender/gpu/metal/mtl_texture_util.mm b/source/blender/gpu/metal/mtl_texture_util.mm
index 86773f048b7..8b29a582833 100644
--- a/source/blender/gpu/metal/mtl_texture_util.mm
+++ b/source/blender/gpu/metal/mtl_texture_util.mm
@@ -160,13 +160,17 @@ MTLPixelFormat gpu_texture_format_to_metal(eGPUTextureFormat tex_format)
       return MTLPixelFormatR8Snorm;
     /* Special formats, texture only. */
     case GPU_SRGB8_A8_DXT1:
+      return MTLPixelFormatBC1_RGBA_sRGB;
     case GPU_SRGB8_A8_DXT3:
+      return MTLPixelFormatBC2_RGBA_sRGB;
     case GPU_SRGB8_A8_DXT5:
+      return MTLPixelFormatBC3_RGBA_sRGB;
     case GPU_RGBA8_DXT1:
+      return MTLPixelFormatBC1_RGBA;
     case GPU_RGBA8_DXT3:
+      return MTLPixelFormatBC2_RGBA;
     case GPU_RGBA8_DXT5:
-      BLI_assert_msg(false, "Compressed texture not implemented yet!\n");
-      return MTLPixelFormatRGBA8Unorm;
+      return MTLPixelFormatBC3_RGBA;
     case GPU_SRGB8:
       /* 24-Bit pixel format are not supported. Emulate using a padded type with alpha. */
       return MTLPixelFormatRGBA8Unorm_sRGB;
@@ -247,6 +251,14 @@ size_t get_mtl_format_bytesize(MTLPixelFormat tex_format)
       return 4;
     case MTLPixelFormatDepth16Unorm:
       return 2;
+    case MTLPixelFormatBC1_RGBA:
+    case MTLPixelFormatBC1_RGBA_sRGB:
+      return 1; /* Note: not quite correct (BC1 is 0.5 bpp). */
+    case MTLPixelFormatBC2_RGBA:
+    case MTLPixelFormatBC2_RGBA_sRGB:
+    case MTLPixelFormatBC3_RGBA:
+    case MTLPixelFormatBC3_RGBA_sRGB:
+      return 1;
 
     default:
       BLI_assert_msg(false, "Unrecognised GPU pixel format!\n");
@@ -272,6 +284,12 @@ int get_mtl_format_num_components(MTLPixelFormat tex_format)
     case MTLPixelFormatRGBA8Unorm_sRGB:
     case MTLPixelFormatRGB10A2Uint:
     case MTLPixelFormatRGB10A2Unorm:
+    case MTLPixelFormatBC1_RGBA_sRGB:
+    case MTLPixelFormatBC2_RGBA_sRGB:
+    case MTLPixelFormatBC3_RGBA_sRGB:
+    case MTLPixelFormatBC1_RGBA:
+    case MTLPixelFormatBC2_RGBA:
+    case MTLPixelFormatBC3_RGBA:
       return 4;
 
     case MTLPixelFormatRG11B10Float:
-- 
2.30.2


From 7371b11bb5a5eed0205fba2c8400546073a628d6 Mon Sep 17 00:00:00 2001
From: Campbell Barton <campbell@blender.org>
Date: Mon, 25 Mar 2024 22:04:32 +1100
Subject: [PATCH 07/36] Fix #119871: Window contents invalid on startup under
 Wayland & GNOME

Resolve a glitch where the window contents didn't fit the window
on startup under GNOME. This also avoids flickering whenever the
window manager changed the window size from the requested size.

See code-comments for details.
---
 intern/ghost/intern/GHOST_WindowWayland.cc | 24 ++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/intern/ghost/intern/GHOST_WindowWayland.cc b/intern/ghost/intern/GHOST_WindowWayland.cc
index 93b40b941e3..0b397732744 100644
--- a/intern/ghost/intern/GHOST_WindowWayland.cc
+++ b/intern/ghost/intern/GHOST_WindowWayland.cc
@@ -1886,6 +1886,30 @@ GHOST_WindowWayland::GHOST_WindowWayland(GHOST_SystemWayland *system,
     gwl_window_state_set(window_, state);
   }
 
+  /* NOTE(@ideasman42): Round trips are important before committing.
+   * This is needed because setting the state is likely to resize the window
+   * (in the case of maximized & full-screen), "normal" windows may still be resized when
+   * they are too large or with tiling window-managers.
+   *
+   * The additional updates allow for the actual size to be configured by the window manager
+   * which is read back before committing the surface. This avoids displaying the buffer
+   * before it's resized (avoiding flickering).
+   *
+   * Without the round-trip here:
+   * - The window will be created and this function will return using the requested buffer size,
+   *   instead of the window size which ends up being used (causing a visible flicker).
+   *   This has the down side that Blender's internal window state has the outdated size
+   *   which then gets immediately resized, causing a noticeable glitch.
+   * - The window decorations will be displayed at the wrong size before refreshing
+   *   at the new size.
+   * - On GNOME-Shell 46 shows the previous buffer-size under some conditions, see #119871.
+   * - 2x updates are needed for RIVER & HYPRLAND.
+   */
+  for (int i = 0; i < 2; i++) {
+    wl_display_flush(system->wl_display_get());
+    wl_display_dispatch(system->wl_display_get());
+  }
+
   /* Commit after setting the buffer.
    * While postponing until after the buffer drawing is context is set
    * isn't essential, it reduces flickering. */
-- 
2.30.2


From 2e8259e4d4bb0eb05d4134b0c56f85411914fc03 Mon Sep 17 00:00:00 2001
From: Pratik Borhade <pratikborhade302@gmail.com>
Date: Mon, 25 Mar 2024 12:17:03 +0100
Subject: [PATCH 08/36] GPv3: Clean loose points in draw mode menu

Include `clean loose` operator in draw mode menu.
And move this operator inside `Cleanup menu` (same as GPv2)

Pull Request: https://projects.blender.org/blender/blender/pulls/119782
---
 scripts/startup/bl_ui/space_view3d.py                 | 11 ++++++++++-
 .../grease_pencil/intern/grease_pencil_edit.cc        |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/scripts/startup/bl_ui/space_view3d.py b/scripts/startup/bl_ui/space_view3d.py
index ef75e104829..a69b8ec453d 100644
--- a/scripts/startup/bl_ui/space_view3d.py
+++ b/scripts/startup/bl_ui/space_view3d.py
@@ -2187,6 +2187,7 @@ class VIEW3D_MT_paint_grease_pencil(Menu):
         layout.separator()
 
         layout.menu("VIEW3D_MT_edit_greasepencil_showhide")
+        layout.menu("VIEW3D_MT_edit_greasepencil_cleanup")
 
         layout.separator()
 
@@ -5802,6 +5803,13 @@ class VIEW3D_MT_edit_greasepencil_showhide(Menu):
         layout.operator("grease_pencil.layer_hide", text="Hide Active Layer").unselected = False
         layout.operator("grease_pencil.layer_hide", text="Hide Inactive Layers").unselected = True
 
+class VIEW3D_MT_edit_greasepencil_cleanup(Menu):
+    bl_label = "Cleanup"
+
+    def draw(self, _context):
+        layout = self.layout
+
+        layout.operator("grease_pencil.clean_loose")
 
 class VIEW3D_MT_edit_greasepencil(Menu):
     bl_label = "Grease Pencil"
@@ -5828,7 +5836,7 @@ class VIEW3D_MT_edit_greasepencil(Menu):
 
         layout.menu("VIEW3D_MT_edit_greasepencil_showhide")
         layout.operator_menu_enum("grease_pencil.separate", "mode", text="Separate")
-        layout.operator("grease_pencil.clean_loose")
+        layout.menu("VIEW3D_MT_edit_greasepencil_cleanup")
 
         layout.separator()
 
@@ -8988,6 +8996,7 @@ classes = (
     VIEW3D_MT_edit_gpencil_delete,
     VIEW3D_MT_edit_gpencil_showhide,
     VIEW3D_MT_edit_greasepencil_showhide,
+    VIEW3D_MT_edit_greasepencil_cleanup,
     VIEW3D_MT_weight_gpencil,
     VIEW3D_MT_gpencil_animation,
     VIEW3D_MT_gpencil_simplify,
diff --git a/source/blender/editors/grease_pencil/intern/grease_pencil_edit.cc b/source/blender/editors/grease_pencil/intern/grease_pencil_edit.cc
index 57f1e1696d8..3524274ef81 100644
--- a/source/blender/editors/grease_pencil/intern/grease_pencil_edit.cc
+++ b/source/blender/editors/grease_pencil/intern/grease_pencil_edit.cc
@@ -1406,7 +1406,7 @@ static void GREASE_PENCIL_OT_clean_loose(wmOperatorType *ot)
 
   ot->invoke = WM_operator_props_popup_confirm;
   ot->exec = grease_pencil_clean_loose_exec;
-  ot->poll = editable_grease_pencil_poll;
+  ot->poll = active_grease_pencil_layer_poll;
 
   ot->flag = OPTYPE_REGISTER | OPTYPE_UNDO;
 
-- 
2.30.2


From ab93a426a0c316f263c7f40d7956e7bf680361f3 Mon Sep 17 00:00:00 2001
From: Campbell Barton <campbell@blender.org>
Date: Mon, 25 Mar 2024 22:26:42 +1100
Subject: [PATCH 09/36] Fix assert from recent cleanup

Change from [0] didn't account for edit-meshes without faces.

[0]: 3805974b6fa8e15ae4409f597747bc7beee51fa3
---
 source/blender/blenkernel/intern/editmesh.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/blender/blenkernel/intern/editmesh.cc b/source/blender/blenkernel/intern/editmesh.cc
index 750be28c146..cc1c8cf16ea 100644
--- a/source/blender/blenkernel/intern/editmesh.cc
+++ b/source/blender/blenkernel/intern/editmesh.cc
@@ -91,7 +91,7 @@ void BKE_editmesh_looptris_calc_with_partial_ex(BMEditMesh *em,
                                                 const BMeshCalcTessellation_Params *params)
 {
   BLI_assert(em->looptris.size() == poly_to_tri_count(em->bm->totface, em->bm->totloop));
-  BLI_assert(!em->looptris.is_empty());
+  BLI_assert(!(em->bm->totface && em->looptris.is_empty()));
 
   BM_mesh_calc_tessellation_with_partial_ex(em->bm, em->looptris, bmpinfo, params);
 }
-- 
2.30.2


From 082b68fcb9905e2e290e433ee67f1646146bbad8 Mon Sep 17 00:00:00 2001
From: Weizhen Huang <weizhen@blender.org>
Date: Mon, 25 Mar 2024 13:02:02 +0100
Subject: [PATCH 10/36] Cycles: improve equiangular sampling in volume

By restricting the sample range along the ray to the valid segment.

Supports

**Mesh Light**
- [x] restrict the ray segment to the side with MIS

**Area Light**
- [x] when the spread is zero, find the intersection of the ray and the bounding box/cylinder of the rectangle/ellipse area light beam
- [x] when the spread is non-zero, find the intersection of the ray and the minimal enclosing cone of the area light beam
*note the result is also unbiased when we just consider the cone from the sampled point in volume segment. Far away from the light source it's less noisy than the current solution, but near the light source it's much noisier. We have to restrict the sample region on the area light to the part that lits the ray then, I haven't tried yet to see if it would be less noisy.*

**Point Light**
- [x] the complete ray segment should be valid.

**Spot Light**
- [x] intersect the ray with the spot light cone
- [x] support non-zero radius

Pull Request: https://projects.blender.org/blender/blender/pulls/119438
---
 .../cycles/kernel/integrator/shade_volume.h   |  80 ++++++++---
 intern/cycles/kernel/light/area.h             |  58 +++++++-
 intern/cycles/kernel/light/spot.h             |  18 +++
 intern/cycles/kernel/light/triangle.h         |  19 +++
 intern/cycles/kernel/types.h                  |   2 +
 intern/cycles/scene/light.cpp                 |   3 +
 intern/cycles/util/math.h                     |  40 ++++++
 intern/cycles/util/math_intersect.h           | 134 ++++++++++++++++++
 intern/cycles/util/transform.h                |  11 ++
 tests/data                                    |   2 +-
 10 files changed, 343 insertions(+), 24 deletions(-)

diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h
index d94a29b7f49..594396d987e 100644
--- a/intern/cycles/kernel/integrator/shade_volume.h
+++ b/intern/cycles/kernel/integrator/shade_volume.h
@@ -64,6 +64,11 @@ typedef struct VolumeShaderCoefficients {
   Spectrum emission;
 } VolumeShaderCoefficients;
 
+typedef struct EquiangularCoefficients {
+  float3 P;
+  float2 t_range;
+} EquiangularCoefficients;
+
 /* Evaluate shader to get extinction coefficient at P. */
 ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg,
                                                    IntegratorShadowState state,
@@ -264,18 +269,18 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
 #  define VOLUME_SAMPLE_PDF_CUTOFF 1e-8f
 
 ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict ray,
-                                           const float3 light_P,
+                                           ccl_private const EquiangularCoefficients &coeffs,
                                            const float xi,
                                            ccl_private float *pdf)
 {
-  const float tmin = ray->tmin;
-  const float tmax = ray->tmax;
-  const float delta = dot((light_P - ray->P), ray->D);
-  const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+  const float delta = dot((coeffs.P - ray->P), ray->D);
+  const float D = safe_sqrtf(len_squared(coeffs.P - ray->P) - delta * delta);
   if (UNLIKELY(D == 0.0f)) {
     *pdf = 0.0f;
     return 0.0f;
   }
+  const float tmin = coeffs.t_range.x;
+  const float tmax = coeffs.t_range.y;
   const float theta_a = atan2f(tmin - delta, D);
   const float theta_b = atan2f(tmax - delta, D);
   const float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
@@ -289,17 +294,17 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r
 }
 
 ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
-                                        const float3 light_P,
+                                        ccl_private const EquiangularCoefficients &coeffs,
                                         const float sample_t)
 {
-  const float delta = dot((light_P - ray->P), ray->D);
-  const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+  const float delta = dot((coeffs.P - ray->P), ray->D);
+  const float D = safe_sqrtf(len_squared(coeffs.P - ray->P) - delta * delta);
   if (UNLIKELY(D == 0.0f)) {
     return 0.0f;
   }
 
-  const float tmin = ray->tmin;
-  const float tmax = ray->tmax;
+  const float tmin = coeffs.t_range.x;
+  const float tmax = coeffs.t_range.y;
   const float t_ = sample_t - delta;
 
   const float theta_a = atan2f(tmin - delta, D);
@@ -313,6 +318,36 @@ ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
   return pdf;
 }
 
+ccl_device_inline bool volume_equiangular_valid_ray_segment(KernelGlobals kg,
+                                                            const float3 ray_P,
+                                                            const float3 ray_D,
+                                                            ccl_private float2 *t_range,
+                                                            const ccl_private LightSample *ls)
+{
+#  ifdef __LIGHT_TREE__
+  /* Do not compute ray segment until #119389 is landed. */
+  if (kernel_data.integrator.use_light_tree) {
+    return true;
+  }
+#  endif
+
+  if (ls->type == LIGHT_SPOT) {
+    ccl_global const KernelLight *klight = &kernel_data_fetch(lights, ls->lamp);
+    return spot_light_valid_ray_segment(klight, ray_P, ray_D, t_range);
+  }
+  if (ls->type == LIGHT_AREA) {
+    ccl_global const KernelLight *klight = &kernel_data_fetch(lights, ls->lamp);
+    return area_light_valid_ray_segment(&klight->area, ray_P - klight->co, ray_D, t_range);
+  }
+  if (ls->type == LIGHT_TRIANGLE) {
+    return triangle_light_valid_ray_segment(kg, ray_P - ls->P, ray_D, t_range, ls);
+  }
+
+  /* Point light, the whole range of the ray is visible. */
+  kernel_assert(ls->type == LIGHT_POINT);
+  return true;
+}
+
 /* Distance sampling */
 
 ccl_device float volume_distance_sample(float max_t,
@@ -403,7 +438,7 @@ typedef struct VolumeIntegrateState {
 ccl_device_forceinline void volume_integrate_step_scattering(
     ccl_private const ShaderData *sd,
     ccl_private const Ray *ray,
-    const float3 equiangular_light_P,
+    ccl_private const EquiangularCoefficients &equiangular_coeffs,
     ccl_private const VolumeShaderCoefficients &ccl_restrict coeff,
     const Spectrum transmittance,
     ccl_private VolumeIntegrateState &ccl_restrict vstate,
@@ -474,7 +509,7 @@ ccl_device_forceinline void volume_integrate_step_scattering(
 
           /* Multiple importance sampling. */
           if (vstate.use_mis) {
-            const float equiangular_pdf = volume_equiangular_pdf(ray, equiangular_light_P, new_t);
+            const float equiangular_pdf = volume_equiangular_pdf(ray, equiangular_coeffs, new_t);
             const float mis_weight = power_heuristic(vstate.distance_pdf * distance_pdf,
                                                      equiangular_pdf);
             result.direct_throughput *= 2.0f * mis_weight;
@@ -509,7 +544,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
     ccl_global float *ccl_restrict render_buffer,
     const float object_step_size,
     const VolumeSampleMethod direct_sample_method,
-    const float3 equiangular_light_P,
+    ccl_private const EquiangularCoefficients &equiangular_coeffs,
     ccl_private VolumeIntegrateResult &result)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INTEGRATE);
@@ -560,7 +595,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
   /* Equiangular sampling: compute distance and PDF in advance. */
   if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR) {
     result.direct_t = volume_equiangular_sample(
-        ray, equiangular_light_P, vstate.rscatter, &vstate.equiangular_pdf);
+        ray, equiangular_coeffs, vstate.rscatter, &vstate.equiangular_pdf);
   }
 #  ifdef __PATH_GUIDING__
   result.direct_sample_method = vstate.direct_sample_method;
@@ -614,7 +649,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
 
           /* Scattering and absorption. */
           volume_integrate_step_scattering(
-              sd, ray, equiangular_light_P, coeff, transmittance, vstate, result);
+              sd, ray, equiangular_coeffs, coeff, transmittance, vstate, result);
         }
         else {
           /* Absorption only. */
@@ -673,7 +708,7 @@ ccl_device_forceinline bool integrate_volume_equiangular_sample_light(
     ccl_private const Ray *ccl_restrict ray,
     ccl_private const ShaderData *ccl_restrict sd,
     ccl_private const RNGState *ccl_restrict rng_state,
-    ccl_private float3 *ccl_restrict P)
+    ccl_private EquiangularCoefficients *ccl_restrict equiangular_coeffs)
 {
   /* Test if there is a light or BSDF that needs direct light. */
   if (!kernel_data.integrator.use_direct_light) {
@@ -708,9 +743,10 @@ ccl_device_forceinline bool integrate_volume_equiangular_sample_light(
     return false;
   }
 
-  *P = ls.P;
+  equiangular_coeffs->P = ls.P;
 
-  return true;
+  return volume_equiangular_valid_ray_segment(
+      kg, ray->P, ray->D, &equiangular_coeffs->t_range, &ls);
 }
 
 /* Path tracing: sample point on light and evaluate light shader, then
@@ -990,10 +1026,12 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
   /* Sample light ahead of volume stepping, for equiangular sampling. */
   /* TODO: distant lights are ignored now, but could instead use even distribution. */
   const bool need_light_sample = !(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_TERMINATE);
-  float3 equiangular_P = zero_float3();
+
+  EquiangularCoefficients equiangular_coeffs = {zero_float3(), make_float2(ray->tmin, ray->tmax)};
+
   const bool have_equiangular_sample = need_light_sample &&
                                        integrate_volume_equiangular_sample_light(
-                                           kg, state, ray, &sd, &rng_state, &equiangular_P);
+                                           kg, state, ray, &sd, &rng_state, &equiangular_coeffs);
 
   VolumeSampleMethod direct_sample_method = (have_equiangular_sample) ?
                                                 volume_stack_sample_method(kg, state) :
@@ -1023,7 +1061,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
                                  render_buffer,
                                  step_size,
                                  direct_sample_method,
-                                 equiangular_P,
+                                 equiangular_coeffs,
                                  result);
 
   /* Perform path termination. The intersect_closest will have already marked this path
diff --git a/intern/cycles/kernel/light/area.h b/intern/cycles/kernel/light/area.h
index eb03ca866ef..51a66265546 100644
--- a/intern/cycles/kernel/light/area.h
+++ b/intern/cycles/kernel/light/area.h
@@ -233,6 +233,11 @@ ccl_device bool area_light_spread_clamp_light(const float3 P,
   return true;
 }
 
+ccl_device_forceinline bool area_light_is_ellipse(const ccl_global KernelAreaLight *light)
+{
+  return light->invarea < 0.0f;
+}
+
 /* Common API. */
 /* Compute `eval_fac` and `pdf`. Also sample a new position on the light if `sample_coord`. */
 template<bool in_volume_segment>
@@ -338,7 +343,7 @@ ccl_device_inline bool area_light_sample(const ccl_global KernelLight *klight,
   const float light_v = dot(inplane, klight->area.axis_v) / klight->area.len_v;
 
   if (!in_volume_segment) {
-    const bool is_ellipse = (klight->area.invarea < 0.0f);
+    const bool is_ellipse = area_light_is_ellipse(&klight->area);
 
     /* Sampled point lies outside of the area light. */
     if (is_ellipse && (sqr(light_u) + sqr(light_v) > 0.25f)) {
@@ -380,7 +385,7 @@ ccl_device_inline bool area_light_intersect(const ccl_global KernelLight *klight
 {
   /* Area light. */
   const float invarea = fabsf(klight->area.invarea);
-  const bool is_ellipse = (klight->area.invarea < 0.0f);
+  const bool is_ellipse = area_light_is_ellipse(&klight->area);
   if (invarea == 0.0f) {
     return false;
   }
@@ -428,6 +433,55 @@ ccl_device_inline bool area_light_sample_from_intersection(
   return area_light_eval<false>(klight, ray_P, &light_P, ls, zero_float2(), false);
 }
 
+/* Returns the maximal distance between the light center and the boundary. */
+ccl_device_forceinline float area_light_max_extent(const ccl_global KernelAreaLight *light)
+{
+  return 0.5f * (area_light_is_ellipse(light) ? fmaxf(light->len_u, light->len_v) :
+                                                len(make_float2(light->len_u, light->len_v)));
+}
+
+/* Find the ray segment lit by the area light. */
+ccl_device_inline bool area_light_valid_ray_segment(const ccl_global KernelAreaLight *light,
+                                                    float3 P,
+                                                    float3 D,
+                                                    ccl_private float2 *t_range)
+{
+  bool valid;
+  const float tan_half_spread = light->tan_half_spread;
+  float3 axis = light->dir;
+
+  const bool angle_almost_zero = (tan_half_spread < 1e-5f);
+  if (angle_almost_zero) {
+    /* Map to local coordinate of the light. Do not use `itfm` in `KernelLight` as there might be
+     * additional scaling in the light size. */
+    const Transform tfm = make_transform(light->axis_u, light->axis_v, axis);
+    P = transform_point(&tfm, P);
+    D = transform_direction(&tfm, D);
+    axis = make_float3(0.0f, 0.0f, 1.0f);
+
+    const float half_len_u = 0.5f * light->len_u;
+    const float half_len_v = 0.5f * light->len_v;
+    if (area_light_is_ellipse(light)) {
+      valid = ray_infinite_cylinder_intersect(P, D, half_len_u, half_len_v, t_range);
+    }
+    else {
+      const float3 bbox_min = make_float3(-half_len_u, -half_len_v, 0.0f);
+      const float3 bbox_max = make_float3(half_len_u, half_len_v, FLT_MAX);
+      valid = ray_aabb_intersect(bbox_min, bbox_max, P, D, t_range);
+    }
+  }
+  else {
+    /* Conservative estimation with the smallest possible cone covering the whole spread. */
+    const float3 apex_to_point = P + area_light_max_extent(light) / tan_half_spread * axis;
+    const float cos_angle_sq = 1.0f / (1.0f + sqr(tan_half_spread));
+
+    valid = ray_cone_intersect(axis, apex_to_point, D, cos_angle_sq, t_range);
+  }
+
+  /* Limit the range to the positive side of the area light. */
+  return valid && ray_plane_intersect(axis, P, D, t_range);
+}
+
 template<bool in_volume_segment>
 ccl_device_forceinline bool area_light_tree_parameters(const ccl_global KernelLight *klight,
                                                        const float3 centroid,
diff --git a/intern/cycles/kernel/light/spot.h b/intern/cycles/kernel/light/spot.h
index c5090573d4d..56989933ded 100644
--- a/intern/cycles/kernel/light/spot.h
+++ b/intern/cycles/kernel/light/spot.h
@@ -265,6 +265,24 @@ ccl_device_inline bool spot_light_sample_from_intersection(
   return true;
 }
 
+/* Find the ray segment lit by the spot light. */
+ccl_device_inline bool spot_light_valid_ray_segment(const ccl_global KernelLight *klight,
+                                                    const float3 P,
+                                                    const float3 D,
+                                                    ccl_private float2 *t_range)
+{
+  /* Convert to local space of the spot light. */
+  const Transform itfm = klight->itfm;
+  float3 local_P = P + klight->spot.dir * klight->spot.ray_segment_dp;
+  local_P = transform_point(&itfm, local_P);
+  const float3 local_D = transform_direction(&itfm, D);
+  const float3 axis = make_float3(0.0f, 0.0f, -1.0f);
+
+  /* Intersect the ray with the smallest enclosing cone of the light spread. */
+  return ray_cone_intersect(
+      axis, local_P, local_D, sqr(klight->spot.cos_half_spot_angle), t_range);
+}
+
 template<bool in_volume_segment>
 ccl_device_forceinline bool spot_light_tree_parameters(const ccl_global KernelLight *klight,
                                                        const float3 centroid,
diff --git a/intern/cycles/kernel/light/triangle.h b/intern/cycles/kernel/light/triangle.h
index 58fc8ea1d92..16834555f1a 100644
--- a/intern/cycles/kernel/light/triangle.h
+++ b/intern/cycles/kernel/light/triangle.h
@@ -269,6 +269,25 @@ ccl_device_forceinline bool triangle_light_sample(KernelGlobals kg,
   return (ls->pdf > 0.0f);
 }
 
+/* Find the ray segment lit by the triangle light. */
+ccl_device_inline bool triangle_light_valid_ray_segment(KernelGlobals kg,
+                                                        const float3 P,
+                                                        const float3 D,
+                                                        ccl_private float2 *t_range,
+                                                        const ccl_private LightSample *ls)
+{
+  const int shader_flag = kernel_data_fetch(shaders, ls->shader & SHADER_MASK).flags;
+  const int SD_MIS_BOTH = SD_MIS_BACK | SD_MIS_FRONT;
+  if ((shader_flag & SD_MIS_BOTH) == SD_MIS_BOTH) {
+    /* Both sides are sampled, the complete ray segment is visible. */
+    return true;
+  }
+
+  /* Only one side is sampled, intersect the ray and the triangle light plane to find the visible
+   * ray segment. Flip normal if Emission Sampling is set to back. */
+  return ray_plane_intersect((shader_flag & SD_MIS_BACK) ? -ls->Ng : ls->Ng, P, D, t_range);
+}
+
 template<bool in_volume_segment>
 ccl_device_forceinline bool triangle_light_tree_parameters(
     KernelGlobals kg,
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index 2ed5a790199..b4978744cc7 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -1376,6 +1376,8 @@ typedef struct KernelSpotLight {
   int is_sphere;
   /* For non-uniform object scaling, the actual spread might be different. */
   float cos_half_larger_spread;
+  /* Distance from the apex of the smallest enclosing cone of the light spread to light center. */
+  float ray_segment_dp;
 } KernelSpotLight;
 
 /* PointLight is SpotLight with only radius and invarea being used. */
diff --git a/intern/cycles/scene/light.cpp b/intern/cycles/scene/light.cpp
index 78d237bcd8c..fb424a8fadf 100644
--- a/intern/cycles/scene/light.cpp
+++ b/intern/cycles/scene/light.cpp
@@ -1362,6 +1362,9 @@ void LightManager::device_update_lights(Device *device, DeviceScene *dscene, Sce
       /* Choose the angle which spans a larger cone. */
       klights[light_index].spot.cos_half_larger_spread = inversesqrtf(
           1.0f + tan_sq * fmaxf(len_u_sq, len_v_sq) / len_w_sq);
+      /* radius / sin(half_angle_small) */
+      klights[light_index].spot.ray_segment_dp =
+          light->size * sqrtf(1.0f + len_w_sq / (tan_sq * fminf(len_u_sq, len_v_sq)));
     }
 
     klights[light_index].shader_id = shader_id;
diff --git a/intern/cycles/util/math.h b/intern/cycles/util/math.h
index 7d5cab7e30c..cdea258c916 100644
--- a/intern/cycles/util/math.h
+++ b/intern/cycles/util/math.h
@@ -1030,6 +1030,46 @@ ccl_device_inline uint32_t reverse_integer_bits(uint32_t x)
 #endif
 }
 
+/* Check if intervals (first->x, first->y) and (second.x, second.y) intersect, and replace the
+ * first interval with their intersection. */
+ccl_device_inline bool intervals_intersect(ccl_private float2 *first, const float2 second)
+{
+  first->x = fmaxf(first->x, second.x);
+  first->y = fminf(first->y, second.y);
+
+  return first->x < first->y;
+}
+
+/* Solve quadratic equation a*x^2 + b*x + c = 0, adapted from Mitsuba 3
+ * The solution is ordered so that x1 <= x2.
+ * Returns true if at least one solution is found.  */
+ccl_device_inline bool solve_quadratic(
+    const float a, const float b, const float c, ccl_private float &x1, ccl_private float &x2)
+{
+  /* If the equation is linear, the solution is -c/b, but b has to be non-zero. */
+  const bool valid_linear = (a == 0.0f) && (b != 0.0f);
+  x1 = x2 = -c / b;
+
+  const float discriminant = sqr(b) - 4.0f * a * c;
+  /* Allow slightly negative discriminant in case of numerical precision issues. */
+  const bool valid_quadratic = (a != 0.0f) && (discriminant > -1e-5f);
+
+  if (valid_quadratic) {
+    /* Numerically stable version of (-b ± sqrt(discriminant)) / (2 * a), avoiding catastrophic
+     * cancellation when `b` is very close to `sqrt(discriminant)`, by finding the solution of
+     * greater magnitude which does not suffer from loss of precision, then using the identity
+     * x1 * x2 = c / a. */
+    const float temp = -0.5f * (b + copysignf(safe_sqrtf(discriminant), b));
+    const float r1 = temp / a;
+    const float r2 = c / temp;
+
+    x1 = fminf(r1, r2);
+    x2 = fmaxf(r1, r2);
+  }
+
+  return (valid_linear || valid_quadratic);
+}
+
 CCL_NAMESPACE_END
 
 #endif /* __UTIL_MATH_H__ */
diff --git a/intern/cycles/util/math_intersect.h b/intern/cycles/util/math_intersect.h
index b09cf2a4b1b..2e4b9c979f7 100644
--- a/intern/cycles/util/math_intersect.h
+++ b/intern/cycles/util/math_intersect.h
@@ -302,6 +302,140 @@ ccl_device bool ray_quad_intersect(float3 ray_P,
   return true;
 }
 
+/* Find the ray segment that lies in the same side as the normal `N` of the plane.
+ * `P` is the vector pointing from any point on the plane to the ray origin. */
+ccl_device bool ray_plane_intersect(const float3 N,
+                                    const float3 P,
+                                    const float3 ray_D,
+                                    ccl_private float2 *t_range)
+{
+  const float DN = dot(ray_D, N);
+
+  /* Distance from P to the plane. */
+  const float t = -dot(P, N) / DN;
+
+  /* Limit the range to the positive side. */
+  if (DN > 0.0f) {
+    t_range->x = fmaxf(t_range->x, t);
+  }
+  else {
+    t_range->y = fminf(t_range->y, t);
+  }
+
+  return t_range->x < t_range->y;
+}
+
+/* Find the ray segment inside an axis-aligned bounding box. */
+ccl_device bool ray_aabb_intersect(const float3 bbox_min,
+                                   const float3 bbox_max,
+                                   const float3 ray_P,
+                                   const float3 ray_D,
+                                   ccl_private float2 *t_range)
+{
+  const float3 inv_ray_D = rcp(ray_D);
+
+  /* Absolute distances to lower and upper box coordinates; */
+  const float3 t_lower = (bbox_min - ray_P) * inv_ray_D;
+  const float3 t_upper = (bbox_max - ray_P) * inv_ray_D;
+
+  /* The four t-intervals (for x-/y-/z-slabs, and ray p(t)). */
+  const float4 tmins = float3_to_float4(min(t_lower, t_upper), t_range->x);
+  const float4 tmaxes = float3_to_float4(max(t_lower, t_upper), t_range->y);
+
+  /* Max of mins and min of maxes. */
+  const float tmin = reduce_max(tmins);
+  const float tmax = reduce_min(tmaxes);
+
+  *t_range = make_float2(tmin, tmax);
+
+  return tmin < tmax;
+}
+
+/* Find the segment of a ray defined by P + D * t that lies inside a cylinder defined by
+ * (x / len_u)^2 + (y / len_v)^2 = 1. */
+ccl_device_inline bool ray_infinite_cylinder_intersect(const float3 P,
+                                                       const float3 D,
+                                                       const float len_u,
+                                                       const float len_v,
+                                                       ccl_private float2 *t_range)
+{
+  /* Convert to a 2D problem. */
+  const float2 inv_len = 1.0f / make_float2(len_u, len_v);
+  float2 P_proj = float3_to_float2(P) * inv_len;
+  const float2 D_proj = float3_to_float2(D) * inv_len;
+
+  /* Solve quadratic equation a*t^2 + 2b*t + c = 0. */
+  const float a = dot(D_proj, D_proj);
+  float b = dot(P_proj, D_proj);
+
+  /* Move ray origin closer to the cylinder to prevent precision issue when the ray is far away. */
+  const float t_mid = -b / a;
+  P_proj += D_proj * t_mid;
+
+  /* Recompute b from the shifted origin. */
+  b = dot(P_proj, D_proj);
+  const float c = dot(P_proj, P_proj) - 1.0f;
+
+  float tmin, tmax;
+  const bool valid = solve_quadratic(a, 2.0f * b, c, tmin, tmax);
+
+  return valid && intervals_intersect(t_range, make_float2(tmin, tmax) + t_mid);
+}
+
+/* *
+ * Find the ray segment inside a single-sided cone.
+ *
+ * \param axis: a unit-length direction around which the cone has a circular symmetry
+ * \param P: the vector pointing from the cone apex to the ray origin
+ * \param D: the direction of the ray, does not need to have unit-length
+ * \param cos_angle_sq: `sqr(cos(half_aperture_of_the_cone))`
+ * \param t_range: the lower and upper bounds between which the ray lies inside the cone
+ * \return whether the intersection exists and is in the provided range
+ *
+ * See https://www.geometrictools.com/Documentation/IntersectionLineCone.pdf for illustration
+ */
+ccl_device_inline bool ray_cone_intersect(const float3 axis,
+                                          const float3 P,
+                                          float3 D,
+                                          const float cos_angle_sq,
+                                          ccl_private float2 *t_range)
+{
+  if (cos_angle_sq < 1e-4f) {
+    /* The cone is nearly a plane. */
+    return ray_plane_intersect(axis, P, D, t_range);
+  }
+
+  const float inv_len = inversesqrtf(len_squared(D));
+  D *= inv_len;
+
+  const float AD = dot(axis, D);
+  const float AP = dot(axis, P);
+
+  const float a = sqr(AD) - cos_angle_sq;
+  const float b = 2.0f * (AD * AP - cos_angle_sq * dot(D, P));
+  const float c = sqr(AP) - cos_angle_sq * dot(P, P);
+
+  float tmin = 0.0f, tmax = FLT_MAX;
+  bool valid = solve_quadratic(a, b, c, tmin, tmax);
+
+  /* Check if the intersections are in the same hemisphere as the cone. */
+  const bool tmin_valid = AP + tmin * AD > 0.0f;
+  const bool tmax_valid = AP + tmax * AD > 0.0f;
+
+  valid &= (tmin_valid || tmax_valid);
+
+  if (!tmax_valid) {
+    tmax = tmin;
+    tmin = 0.0f;
+  }
+  else if (!tmin_valid) {
+    tmin = tmax;
+    tmax = FLT_MAX;
+  }
+
+  return valid && intervals_intersect(t_range, make_float2(tmin, tmax) * inv_len);
+}
+
 CCL_NAMESPACE_END
 
 #endif /* __UTIL_MATH_INTERSECT_H__ */
diff --git a/intern/cycles/util/transform.h b/intern/cycles/util/transform.h
index 208c68dc5a1..0263be7c841 100644
--- a/intern/cycles/util/transform.h
+++ b/intern/cycles/util/transform.h
@@ -161,6 +161,17 @@ ccl_device_inline Transform make_transform(float a,
   return t;
 }
 
+ccl_device_inline Transform make_transform(const float3 x, const float3 y, const float3 z)
+{
+  Transform t;
+
+  t.x = float3_to_float4(x, 0.0f);
+  t.y = float3_to_float4(y, 0.0f);
+  t.z = float3_to_float4(z, 0.0f);
+
+  return t;
+}
+
 ccl_device_inline Transform euler_to_transform(const float3 euler)
 {
   float cx = cosf(euler.x);
diff --git a/tests/data b/tests/data
index 5038ad7165f..00af9c65712 160000
--- a/tests/data
+++ b/tests/data
@@ -1 +1 @@
-Subproject commit 5038ad7165fd1a77e61e0d2d6efdadd6ea7c0dfb
+Subproject commit 00af9c65712b6aa78ce6eb0c62c5aafb7a867f18
-- 
2.30.2


From b1c85fa78c61c60db7d68317958ba0336b760f22 Mon Sep 17 00:00:00 2001
From: Hans Goudey <hans@blender.org>
Date: Mon, 25 Mar 2024 08:10:12 -0400
Subject: [PATCH 11/36] Fix: Python import error for GPUVertBuf

Caused by fe76d8c946e5ce7a76d45acaee0a67407cb3b599
---
 source/blender/python/gpu/gpu_py_vertex_buffer.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/source/blender/python/gpu/gpu_py_vertex_buffer.cc b/source/blender/python/gpu/gpu_py_vertex_buffer.cc
index 310e4b5aa21..0602e5ecf98 100644
--- a/source/blender/python/gpu/gpu_py_vertex_buffer.cc
+++ b/source/blender/python/gpu/gpu_py_vertex_buffer.cc
@@ -241,7 +241,7 @@ static PyObject *pygpu_vertbuf__tp_new(PyTypeObject * /*type*/, PyObject *args,
       PY_ARG_PARSER_HEAD_COMPAT()
       "O!" /* `format` */
       "I"  /* `len` */
-      ":blender::gpu::VertBuf.__new__",
+      ":GPUVertBuf.__new__",
       _keywords,
       nullptr,
   };
@@ -307,7 +307,7 @@ static PyObject *pygpu_vertbuf_attr_fill(BPyGPUVertBuf *self, PyObject *args, Py
     return nullptr;
   }
 
-  if (!pygpu_vertbuf_fill(self->buf, id, data, "blender::gpu::VertBuf.attr_fill")) {
+  if (!pygpu_vertbuf_fill(self->buf, id, data, "GPUVertBuf.attr_fill")) {
     return nullptr;
   }
 
@@ -340,7 +340,7 @@ static void pygpu_vertbuf__tp_dealloc(BPyGPUVertBuf *self)
 PyDoc_STRVAR(
     /* Wrap. */
     pygpu_vertbuf__tp_doc,
-    ".. class:: blender::gpu::VertBuf(format, len)\n"
+    ".. class:: GPUVertBuf(format, len)\n"
     "\n"
     "   Contains a VBO.\n"
     "\n"
@@ -350,7 +350,7 @@ PyDoc_STRVAR(
     "   :type len: int\n");
 PyTypeObject BPyGPUVertBuf_Type = {
     /*ob_base*/ PyVarObject_HEAD_INIT(nullptr, 0)
-    /*tp_name*/ "blender::gpu::VertBuf",
+    /*tp_name*/ "GPUVertBuf",
     /*tp_basicsize*/ sizeof(BPyGPUVertBuf),
     /*tp_itemsize*/ 0,
     /*tp_dealloc*/ (destructor)pygpu_vertbuf__tp_dealloc,
-- 
2.30.2


From f66cb1e63534f26665124c6c3da9ac91a3b3baff Mon Sep 17 00:00:00 2001
From: Germano Cavalcante <germano.costa@ig.com.br>
Date: Mon, 25 Mar 2024 13:35:52 +0100
Subject: [PATCH 12/36] UI: rename 'Snap With' and 'Snap To' to 'Snap Base' and
 'Snap Target'

Since the `Set Snap Base` feature has been implemented, it would be
convenient to reflect the use of the term `Snap Base` in other areas in
the UI.

Pull Request: https://projects.blender.org/blender/blender/pulls/119723
---
 scripts/startup/bl_ui/space_view3d.py             |  6 +++---
 source/blender/editors/transform/transform_ops.cc | 14 +++++++++-----
 source/blender/makesrna/intern/rna_scene.cc       |  2 +-
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/scripts/startup/bl_ui/space_view3d.py b/scripts/startup/bl_ui/space_view3d.py
index a69b8ec453d..81190289eb0 100644
--- a/scripts/startup/bl_ui/space_view3d.py
+++ b/scripts/startup/bl_ui/space_view3d.py
@@ -7534,14 +7534,14 @@ class VIEW3D_PT_snapping(Panel):
         layout = self.layout
         col = layout.column()
 
-        col.label(text="Snap With")
+        col.label(text="Snap Base")
         row = col.row(align=True)
         row.prop(tool_settings, "snap_target", expand=True)
 
-        col.label(text="Snap To")
+        col.label(text="Snap Target")
         col.prop(tool_settings, "snap_elements_base", expand=True)
 
-        col.label(text="Snap Individual Elements To")
+        col.label(text="Snap Target for Individual Elements")
         col.prop(tool_settings, "snap_elements_individual", expand=True)
 
         col.separator()
diff --git a/source/blender/editors/transform/transform_ops.cc b/source/blender/editors/transform/transform_ops.cc
index c43f6c5621f..6e1345d8e3d 100644
--- a/source/blender/editors/transform/transform_ops.cc
+++ b/source/blender/editors/transform/transform_ops.cc
@@ -705,11 +705,15 @@ void Transform_Properties(wmOperatorType *ot, int flags)
 
       RNA_def_boolean(ot->srna, "use_snap_project", false, "Project Individual Elements", "");
 
-      /* TODO(@gfxcoder): Rename `snap_target` to `snap_source` to avoid previous ambiguity of
-       * "target" (now, "source" is geometry to be moved and "target" is geometry to which moved
-       * geometry is snapped).  Use "Source snap point" and "Point on source that will snap to
-       * target" for name and description, respectively. */
-      prop = RNA_def_enum(ot->srna, "snap_target", rna_enum_snap_source_items, 0, "Snap With", "");
+      /* TODO(@gfxcoder): Rename `snap_target` to `snap_base` to avoid previous ambiguity of
+       * "target" (now, "base" or "source" is geometry to be moved and "target" is geometry to
+       * which moved geometry is snapped). */
+      prop = RNA_def_enum(ot->srna,
+                          "snap_target",
+                          rna_enum_snap_source_items,
+                          0,
+                          "Snap Base",
+                          "Point on source that will snap to target");
       RNA_def_property_flag(prop, PROP_HIDDEN);
 
       /* Target selection. */
diff --git a/source/blender/makesrna/intern/rna_scene.cc b/source/blender/makesrna/intern/rna_scene.cc
index 1cece2ee975..a4bbd1de0ff 100644
--- a/source/blender/makesrna/intern/rna_scene.cc
+++ b/source/blender/makesrna/intern/rna_scene.cc
@@ -3574,7 +3574,7 @@ static void rna_def_tool_settings(BlenderRNA *brna)
       prop, "rna_ToolSettings_snap_mode_get", "rna_ToolSettings_snap_mode_set", nullptr);
   RNA_def_property_flag(prop, PROP_ENUM_FLAG);
   RNA_def_property_ui_text(
-      prop, "Snap Element", "Type of element for the \"Snap With\" to snap to");
+      prop, "Snap Element", "Type of element for the \"Snap Base\" to snap to");
   RNA_def_property_update(prop, NC_SCENE | ND_TOOLSETTINGS, nullptr); /* header redraw */
 
   prop = RNA_def_property(srna, "snap_elements_individual", PROP_ENUM, PROP_NONE);
-- 
2.30.2


From 436b9a5aae9fe89b3296c107249907f86157f760 Mon Sep 17 00:00:00 2001
From: Pratik Borhade <pratikborhade302@gmail.com>
Date: Mon, 25 Mar 2024 13:55:57 +0100
Subject: [PATCH 13/36] Fix #119862: poly build crash when adding first vertex
 in empty geometry

object_index is not found (-1) when all mesh data is deleted hence it
crashes when accessing base from the vector at index -1.
So skip the further execution to prevent crash.

Pull Request: https://projects.blender.org/blender/blender/pulls/119865
---
 .../blender/editors/space_view3d/view3d_gizmo_preselect_type.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/blender/editors/space_view3d/view3d_gizmo_preselect_type.cc b/source/blender/editors/space_view3d/view3d_gizmo_preselect_type.cc
index 6d677d55a04..669a359e806 100644
--- a/source/blender/editors/space_view3d/view3d_gizmo_preselect_type.cc
+++ b/source/blender/editors/space_view3d/view3d_gizmo_preselect_type.cc
@@ -505,7 +505,7 @@ void ED_view3d_gizmo_mesh_preselect_get_active(bContext *C,
   /* weak, allocate an array just to access the index. */
   Base *base = nullptr;
   Object *obedit = nullptr;
-  {
+  if (object_index != -1) {
     Vector<Base *> bases = BKE_view_layer_array_from_bases_in_edit_mode(
         scene, view_layer, CTX_wm_view3d(C));
     if (object_index < bases.size()) {
-- 
2.30.2


From 209db414e0e019e6a2667f6a4e4b7dfdcea0df68 Mon Sep 17 00:00:00 2001
From: Pratik Borhade <pratikborhade302@gmail.com>
Date: Mon, 25 Mar 2024 14:00:38 +0100
Subject: [PATCH 14/36] Fix #119860: Undo crash in asset browser poll function

`asset_lib` is null when calling undo in active file browser space.
This causes crash in `asset undo poll`. So exit out of the poll function
when active file space is filebrowser.

Pull Request: https://projects.blender.org/blender/blender/pulls/119870
---
 source/blender/editors/asset/intern/asset_ops.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/blender/editors/asset/intern/asset_ops.cc b/source/blender/editors/asset/intern/asset_ops.cc
index 99150ad06ef..5d9d1c740db 100644
--- a/source/blender/editors/asset/intern/asset_ops.cc
+++ b/source/blender/editors/asset/intern/asset_ops.cc
@@ -582,7 +582,7 @@ static void ASSET_OT_catalog_delete(wmOperatorType *ot)
 static asset_system::AssetCatalogService *get_catalog_service(bContext *C)
 {
   const SpaceFile *sfile = CTX_wm_space_file(C);
-  if (!sfile) {
+  if (!sfile || ED_fileselect_is_file_browser(sfile)) {
     return nullptr;
   }
 
-- 
2.30.2


From 4971b144a43fec795dda169404bb699e3ff75516 Mon Sep 17 00:00:00 2001
From: Omar Emara <mail@OmarEmara.dev>
Date: Mon, 25 Mar 2024 14:09:54 +0100
Subject: [PATCH 15/36] Compositor: Unify sRGB to Linear between CPU and GPU

This patch unifies the sRGB to Linear color space conversion between the
CPU and GPU compositors. This is because CPU uses an optimized path that
produces values that are very slightly off. To fix this, for the GPU, we
do the conversion CPU side instead of doing it in a shader. Since images
are cached, the performance implications are not significant.

Another added benefit is that we no longer get differences due to the
order of alpha pre-multiplication and sRGB conversion, demonstrated in
#114305. And we no longer require any preprocessing of the images.

This patch adds some new utilities to the Image Buffer module to assign
float, byte, and compressed buffers along with their color spaces. It
also adds an ownership flag to compressed data. Those were added as a
way to facilitate the implementation.

Pull Request: https://projects.blender.org/blender/blender/pulls/118624
---
 .../cached_resources/intern/cached_image.cc   | 149 +++++++-----------
 source/blender/imbuf/IMB_imbuf.hh             |  15 ++
 source/blender/imbuf/IMB_imbuf_types.hh       |  24 +--
 source/blender/imbuf/intern/allocimbuf.cc     |  53 ++++++-
 source/blender/imbuf/intern/format_dds.cc     |   1 +
 5 files changed, 132 insertions(+), 110 deletions(-)

diff --git a/source/blender/compositor/realtime_compositor/cached_resources/intern/cached_image.cc b/source/blender/compositor/realtime_compositor/cached_resources/intern/cached_image.cc
index 2121f13a1e6..29ed5b0909d 100644
--- a/source/blender/compositor/realtime_compositor/cached_resources/intern/cached_image.cc
+++ b/source/blender/compositor/realtime_compositor/cached_resources/intern/cached_image.cc
@@ -15,6 +15,7 @@
 #include "GPU_shader.hh"
 #include "GPU_texture.hh"
 
+#include "IMB_colormanagement.hh"
 #include "IMB_imbuf.hh"
 #include "IMB_imbuf_types.hh"
 
@@ -56,74 +57,6 @@ bool operator==(const CachedImageKey &a, const CachedImageKey &b)
  * Cached Image.
  */
 
-/* Returns a new texture of the given format and precision preprocessed using the given shader. The
- * input texture is freed. */
-static GPUTexture *preprocess_texture(Context &context,
-                                      GPUTexture *input_texture,
-                                      eGPUTextureFormat target_format,
-                                      ResultPrecision precision,
-                                      const char *shader_name)
-{
-  const int2 size = int2(GPU_texture_width(input_texture), GPU_texture_height(input_texture));
-
-  GPUTexture *preprocessed_texture = GPU_texture_create_2d(
-      "Cached Image", size.x, size.y, 1, target_format, GPU_TEXTURE_USAGE_GENERAL, nullptr);
-
-  GPUShader *shader = context.get_shader(shader_name, precision);
-  GPU_shader_bind(shader);
-
-  const int input_unit = GPU_shader_get_sampler_binding(shader, "input_tx");
-  GPU_texture_bind(input_texture, input_unit);
-
-  const int image_unit = GPU_shader_get_sampler_binding(shader, "output_img");
-  GPU_texture_image_bind(preprocessed_texture, image_unit);
-
-  compute_dispatch_threads_at_least(shader, size);
-
-  GPU_shader_unbind();
-  GPU_texture_unbind(input_texture);
-  GPU_texture_image_unbind(preprocessed_texture);
-  GPU_texture_free(input_texture);
-
-  return preprocessed_texture;
-}
-
-/* Compositor images are expected to be always pre-multiplied, so identify if the GPU texture
- * returned by the IMB module is straight and needs to be pre-multiplied. An exception is when
- * the image has an alpha mode of channel packed or alpha ignore, in which case, we always ignore
- * pre-multiplication. */
-static bool should_premultiply_alpha(Image *image, ImBuf *image_buffer)
-{
-  if (ELEM(image->alpha_mode, IMA_ALPHA_CHANNEL_PACKED, IMA_ALPHA_IGNORE)) {
-    return false;
-  }
-
-  return !BKE_image_has_gpu_texture_premultiplied_alpha(image, image_buffer);
-}
-
-/* Get a suitable texture format supported by the compositor given the format of the texture
- * returned by the IMB module. See imb_gpu_get_format for the formats that needs to be handled. */
-static eGPUTextureFormat get_compatible_texture_format(eGPUTextureFormat original_format)
-{
-  switch (original_format) {
-    case GPU_R16F:
-    case GPU_R32F:
-    case GPU_RGBA16F:
-    case GPU_RGBA32F:
-      return original_format;
-    case GPU_R8:
-      return GPU_R16F;
-    case GPU_RGBA8:
-    case GPU_SRGB8_A8:
-      return GPU_RGBA16F;
-    default:
-      break;
-  }
-
-  BLI_assert_unreachable();
-  return original_format;
-}
-
 /* Get the selected render layer selected assuming the image is a multilayer image. */
 static RenderLayer *get_render_layer(Image *image, ImageUser &image_user)
 {
@@ -205,6 +138,56 @@ static ImageUser compute_image_user_for_pass(Context &context,
   return image_user_for_pass;
 }
 
+/* The image buffer might be stored as an sRGB 8-bit image, while the compositor expects linear
+ * float images, so compute a linear float buffer for the image buffer. This will also do linear
+ * space conversion and alpha pre-multiplication as needed. We could store those images in sRGB GPU
+ * textures and let the GPU do the linear space conversion, but the issues is that we don't control
+ * how the GPU does the conversion and so we get tiny differences across CPU and GPU compositing,
+ * and potentially even across GPUs/Drivers. Further, if alpha pre-multiplication is needed, we
+ * would need to do it ourself, which means alpha pre-multiplication will happen before linear
+ * space conversion, which would produce yet another difference. So we just do everything on the
+ * CPU, since this is already a cached resource.
+ *
+ * To avoid conflicts with other threads, create a new image buffer and assign all the necessary
+ * information to it, with IB_DO_NOT_TAKE_OWNERSHIP for buffers since a deep copy is not needed.
+ *
+ * The caller should free the returned image buffer. */
+static ImBuf *compute_linear_buffer(ImBuf *image_buffer)
+{
+  /* Do not pass the flags to the allocation function to avoid buffer allocation, but assign them
+   * after to retain important information like precision and alpha mode. */
+  ImBuf *linear_image_buffer = IMB_allocImBuf(
+      image_buffer->x, image_buffer->y, image_buffer->planes, 0);
+  linear_image_buffer->flags = image_buffer->flags;
+
+  /* Assign the float buffer if it exists, as well as its number of channels. */
+  IMB_assign_float_buffer(
+      linear_image_buffer, image_buffer->float_buffer, IB_DO_NOT_TAKE_OWNERSHIP);
+  linear_image_buffer->channels = image_buffer->channels;
+
+  /* If no float buffer exists, assign it then compute a float buffer from it. This is the main
+   * call of this function. */
+  if (!linear_image_buffer->float_buffer.data) {
+    IMB_assign_byte_buffer(
+        linear_image_buffer, image_buffer->byte_buffer, IB_DO_NOT_TAKE_OWNERSHIP);
+    IMB_float_from_rect(linear_image_buffer);
+  }
+
+  /* If the image buffer contained compressed data, assign them as well, but only if the color
+   * space of the buffer is linear or data, since we need linear data and can't preprocess the
+   * compressed buffer. If not, we fallback to the float buffer already assigned, which is
+   * guaranteed to exist as a fallback for compressed textures. */
+  const bool is_suitable_compressed_color_space =
+      IMB_colormanagement_space_is_data(image_buffer->byte_buffer.colorspace) ||
+      IMB_colormanagement_space_is_scene_linear(image_buffer->byte_buffer.colorspace);
+  if (image_buffer->ftype == IMB_FTYPE_DDS && is_suitable_compressed_color_space) {
+    linear_image_buffer->ftype = IMB_FTYPE_DDS;
+    IMB_assign_dds_data(linear_image_buffer, image_buffer->dds_data, IB_DO_NOT_TAKE_OWNERSHIP);
+  }
+
+  return linear_image_buffer;
+}
+
 CachedImage::CachedImage(Context &context,
                          Image *image,
                          ImageUser *image_user,
@@ -227,34 +210,12 @@ CachedImage::CachedImage(Context &context,
       context, image, image_user, pass_name);
 
   ImBuf *image_buffer = BKE_image_acquire_ibuf(image, &image_user_for_pass, nullptr);
-  const bool is_premultiplied = BKE_image_has_gpu_texture_premultiplied_alpha(image, image_buffer);
-  texture_ = IMB_create_gpu_texture("Image Texture", image_buffer, true, is_premultiplied);
+  ImBuf *linear_image_buffer = compute_linear_buffer(image_buffer);
+
+  texture_ = IMB_create_gpu_texture("Image Texture", linear_image_buffer, true, true);
   GPU_texture_update_mipmap_chain(texture_);
 
-  const eGPUTextureFormat original_format = GPU_texture_format(texture_);
-  const eGPUTextureFormat target_format = get_compatible_texture_format(original_format);
-  const ResultType result_type = Result::type(target_format);
-  const ResultPrecision precision = Result::precision(target_format);
-
-  /* The GPU image returned by the IMB module can be in a format not supported by the compositor,
-   * or it might need pre-multiplication, so preprocess them first. */
-  if (result_type == ResultType::Color && should_premultiply_alpha(image, image_buffer)) {
-    texture_ = preprocess_texture(
-        context, texture_, target_format, precision, "compositor_premultiply_alpha");
-  }
-  else if (original_format != target_format) {
-    const char *conversion_shader_name = result_type == ResultType::Float ?
-                                             "compositor_convert_float_to_float" :
-                                             "compositor_convert_color_to_color";
-    texture_ = preprocess_texture(
-        context, texture_, target_format, precision, conversion_shader_name);
-  }
-
-  /* Set the alpha to 1 using swizzling if alpha is ignored. */
-  if (result_type == ResultType::Color && image->alpha_mode == IMA_ALPHA_IGNORE) {
-    GPU_texture_swizzle_set(texture_, "rgb1");
-  }
-
+  IMB_freeImBuf(linear_image_buffer);
   BKE_image_release_ibuf(image, image_buffer, nullptr);
 }
 
diff --git a/source/blender/imbuf/IMB_imbuf.hh b/source/blender/imbuf/IMB_imbuf.hh
index 16377b37edd..a945811b406 100644
--- a/source/blender/imbuf/IMB_imbuf.hh
+++ b/source/blender/imbuf/IMB_imbuf.hh
@@ -118,6 +118,21 @@ ImBuf *IMB_allocFromBuffer(const uint8_t *byte_buffer,
 void IMB_assign_byte_buffer(ImBuf *ibuf, uint8_t *buffer_data, ImBufOwnership ownership);
 void IMB_assign_float_buffer(ImBuf *ibuf, float *buffer_data, ImBufOwnership ownership);
 
+/**
+ * Assign the content and the color space of the corresponding buffer the data from the given
+ * buffer.
+ *
+ * \note Does not modify the topology (width, height, number of channels)
+ * or the mipmaps in any way.
+ *
+ * \note The ownership of the data in the source buffer is ignored.
+ */
+void IMB_assign_byte_buffer(ImBuf *ibuf, const ImBufByteBuffer &buffer, ImBufOwnership ownership);
+void IMB_assign_float_buffer(ImBuf *ibuf,
+                             const ImBufFloatBuffer &buffer,
+                             ImBufOwnership ownership);
+void IMB_assign_dds_data(ImBuf *ibuf, const DDSData &data, ImBufOwnership ownership);
+
 /**
  * Make corresponding buffers available for modification.
  * Is achieved by ensuring that the given ImBuf is the only owner of the underlying buffer data.
diff --git a/source/blender/imbuf/IMB_imbuf_types.hh b/source/blender/imbuf/IMB_imbuf_types.hh
index e40f109c8a0..629bf94fb1e 100644
--- a/source/blender/imbuf/IMB_imbuf_types.hh
+++ b/source/blender/imbuf/IMB_imbuf_types.hh
@@ -34,17 +34,6 @@ struct IDProperty;
 #define IMB_MIPMAP_LEVELS 20
 #define IMB_FILEPATH_SIZE 1024
 
-struct DDSData {
-  /** DDS fourcc info */
-  unsigned int fourcc;
-  /** The number of mipmaps in the dds file */
-  unsigned int nummipmaps;
-  /** The compressed image data */
-  unsigned char *data;
-  /** The size of the compressed data */
-  unsigned int size;
-};
-
 /**
  * \ingroup imbuf
  * This is the abstraction of an image. ImBuf is the basic type used for all imbuf operations.
@@ -143,6 +132,19 @@ enum ImBufOwnership {
   IB_TAKE_OWNERSHIP = 1,
 };
 
+struct DDSData {
+  /** DDS fourcc info */
+  unsigned int fourcc;
+  /** The number of mipmaps in the dds file */
+  unsigned int nummipmaps;
+  /** The compressed image data */
+  unsigned char *data;
+  /** The size of the compressed data */
+  unsigned int size;
+  /** Who owns the data buffer. */
+  ImBufOwnership ownership;
+};
+
 /* Different storage specialization.
  *
  * NOTE: Avoid direct assignments and allocations, use the buffer utilities from the IMB_imbuf.hh
diff --git a/source/blender/imbuf/intern/allocimbuf.cc b/source/blender/imbuf/intern/allocimbuf.cc
index 5cfea171dea..40b2e3aac97 100644
--- a/source/blender/imbuf/intern/allocimbuf.cc
+++ b/source/blender/imbuf/intern/allocimbuf.cc
@@ -84,6 +84,27 @@ template<class BufferType> static void imb_free_buffer(BufferType &buffer)
   buffer.ownership = IB_DO_NOT_TAKE_OWNERSHIP;
 }
 
+/* Free the specified DDS buffer storage, freeing memory when needed and restoring the state of the
+ * buffer to its defaults. */
+static void imb_free_dds_buffer(DDSData &dds_data)
+{
+  if (dds_data.data) {
+    switch (dds_data.ownership) {
+      case IB_DO_NOT_TAKE_OWNERSHIP:
+        break;
+
+      case IB_TAKE_OWNERSHIP:
+        /* dds_data.data is allocated by DirectDrawSurface::readData(), so don't use MEM_freeN! */
+        free(dds_data.data);
+        break;
+    }
+  }
+
+  /* Reset buffer to defaults. */
+  dds_data.data = nullptr;
+  dds_data.ownership = IB_DO_NOT_TAKE_OWNERSHIP;
+}
+
 /* Allocate pixel storage of the given buffer. The buffer owns the allocated memory.
  * Returns true of allocation succeeded, false otherwise. */
 template<class BufferType>
@@ -249,11 +270,7 @@ void IMB_freeImBuf(ImBuf *ibuf)
     IMB_free_gpu_textures(ibuf);
     IMB_metadata_free(ibuf->metadata);
     colormanage_cache_free(ibuf);
-
-    if (ibuf->dds_data.data != nullptr) {
-      /* dds_data.data is allocated by DirectDrawSurface::readData(), so don't use MEM_freeN! */
-      free(ibuf->dds_data.data);
-    }
+    imb_free_dds_buffer(ibuf->dds_data);
     MEM_freeN(ibuf);
   }
 }
@@ -472,6 +489,32 @@ void IMB_assign_float_buffer(ImBuf *ibuf, float *buffer_data, const ImBufOwnersh
   }
 }
 
+void IMB_assign_byte_buffer(ImBuf *ibuf,
+                            const ImBufByteBuffer &buffer,
+                            const ImBufOwnership ownership)
+{
+  IMB_assign_byte_buffer(ibuf, buffer.data, ownership);
+  ibuf->byte_buffer.colorspace = buffer.colorspace;
+}
+
+void IMB_assign_float_buffer(ImBuf *ibuf,
+                             const ImBufFloatBuffer &buffer,
+                             const ImBufOwnership ownership)
+{
+  IMB_assign_float_buffer(ibuf, buffer.data, ownership);
+  ibuf->float_buffer.colorspace = buffer.colorspace;
+}
+
+void IMB_assign_dds_data(ImBuf *ibuf, const DDSData &data, const ImBufOwnership ownership)
+{
+  BLI_assert(ibuf->ftype == IMB_FTYPE_DDS);
+
+  imb_free_dds_buffer(ibuf->dds_data);
+
+  ibuf->dds_data = data;
+  ibuf->dds_data.ownership = ownership;
+}
+
 ImBuf *IMB_allocFromBufferOwn(
     uint8_t *byte_buffer, float *float_buffer, uint w, uint h, uint channels)
 {
diff --git a/source/blender/imbuf/intern/format_dds.cc b/source/blender/imbuf/intern/format_dds.cc
index b5b26a10ca0..0b6a88dbcb6 100644
--- a/source/blender/imbuf/intern/format_dds.cc
+++ b/source/blender/imbuf/intern/format_dds.cc
@@ -330,6 +330,7 @@ static void LoadDXTCImage(ImBuf *ibuf, Filesystem::IOMemReader &mem_reader)
     ibuf->dds_data.size = mem_reader.size() - dds_header_size;
     ibuf->dds_data.data = (uchar *)malloc(ibuf->dds_data.size);
     mem_reader.pread(ibuf->dds_data.data, ibuf->dds_data.size, dds_header_size);
+    ibuf->dds_data.ownership = IB_TAKE_OWNERSHIP;
 
     /* Flip compressed image data to match OpenGL convention. */
     FlipDXTCImage(ibuf);
-- 
2.30.2


From 9d88fa483c10d2d571da1fa2d440de4244d9d681 Mon Sep 17 00:00:00 2001
From: Hans Goudey <hans@blender.org>
Date: Mon, 25 Mar 2024 09:13:27 -0400
Subject: [PATCH 16/36] Cleanup: Make format

---
 scripts/startup/bl_ui/space_view3d.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/startup/bl_ui/space_view3d.py b/scripts/startup/bl_ui/space_view3d.py
index 81190289eb0..1fbd1c136a4 100644
--- a/scripts/startup/bl_ui/space_view3d.py
+++ b/scripts/startup/bl_ui/space_view3d.py
@@ -5803,6 +5803,7 @@ class VIEW3D_MT_edit_greasepencil_showhide(Menu):
         layout.operator("grease_pencil.layer_hide", text="Hide Active Layer").unselected = False
         layout.operator("grease_pencil.layer_hide", text="Hide Inactive Layers").unselected = True
 
+
 class VIEW3D_MT_edit_greasepencil_cleanup(Menu):
     bl_label = "Cleanup"
 
@@ -5811,6 +5812,7 @@ class VIEW3D_MT_edit_greasepencil_cleanup(Menu):
 
         layout.operator("grease_pencil.clean_loose")
 
+
 class VIEW3D_MT_edit_greasepencil(Menu):
     bl_label = "Grease Pencil"
 
-- 
2.30.2


From fa3e47523eb2c8ef52ac643bbc0144424daeffea Mon Sep 17 00:00:00 2001
From: Omar Emara <mail@OmarEmara.dev>
Date: Mon, 25 Mar 2024 14:21:00 +0100
Subject: [PATCH 17/36] Compositor: Port GLSL SMAA to CPU compositor

This patch ports the GLSL SMAA library to the CPU compositor in order to
unify the anti-aliasing behavior between the CPU and GPU compositor.
Additionally, the SMAA texture generator was removed since it is now
unused.

Previously, we used an external C++ library for SMAA anti-aliasing,
which is itself a port of the GLSL SMAA library. However, the code
structure and results of the library were different, which made it quite
difficult to match results between CPU and GPU, hence the decision to
port the library ourselves.

The port was performed through a complete copy of the library to C++,
retaining the same function and variable names, even if they are
different from Blender's naming conversions. The necessary code changes
were done to make it work in C++, including manually doing swizzling
which changes the code structure a bit.

Even after porting the library, there were still major differences
between CPU and GPU, due to different arithmetic precision. To fix this
some of the bilinear samplers used in branches and selections were
carefully changed to use point samplers to avoid discontinuities around
branches, also resulting in a nice performance improvement. Some slight
differences still exist due to different bilinear interpolation, but
they shall be looked into later once we have a baseline implementation.

The new implementation is slower than the existing implementation, most
likely due to the liberal use of bilinear interpolation, since it is
quite cheap on GPUs and the code even does more work to use bilinear
interpolation to avoid multiple texture fetches, except this causes a
slow down on CPUs. Some of those were alleviated as mentioned in the
previous section, but we can probably look into optimizing it further.

Pull Request: https://projects.blender.org/blender/blender/pulls/119414
---
 extern/CMakeLists.txt                         |    4 -
 extern/smaa_areatex/CMakeLists.txt            |    5 -
 extern/smaa_areatex/README.blender            |    5 -
 extern/smaa_areatex/smaa_areatex.cpp          | 1210 ----------
 source/blender/compositor/CMakeLists.txt      |   14 -
 .../compositor/nodes/COM_AntiAliasingNode.cc  |   52 +-
 .../compositor/nodes/COM_CornerPinNode.cc     |   25 +-
 .../compositor/nodes/COM_DilateErodeNode.cc   |   24 +-
 .../compositor/nodes/COM_IDMaskNode.cc        |   25 +-
 .../nodes/COM_PlaneTrackDeformNode.cc         |   25 +-
 .../compositor/nodes/COM_ZCombineNode.cc      |   23 +-
 .../operations/COM_SMAAOperation.cc           | 2139 +++++++++++------
 .../compositor/operations/COM_SMAAOperation.h |   95 +-
 13 files changed, 1496 insertions(+), 2150 deletions(-)
 delete mode 100644 extern/smaa_areatex/CMakeLists.txt
 delete mode 100644 extern/smaa_areatex/README.blender
 delete mode 100644 extern/smaa_areatex/smaa_areatex.cpp

diff --git a/extern/CMakeLists.txt b/extern/CMakeLists.txt
index 30842f32441..c9b8a2deea2 100644
--- a/extern/CMakeLists.txt
+++ b/extern/CMakeLists.txt
@@ -104,10 +104,6 @@ if(WITH_MOD_FLUID)
   add_subdirectory(mantaflow)
 endif()
 
-if(WITH_COMPOSITOR_CPU)
-  add_subdirectory(smaa_areatex)
-endif()
-
 if(WITH_VULKAN_BACKEND)
   add_subdirectory(vulkan_memory_allocator)
 endif()
diff --git a/extern/smaa_areatex/CMakeLists.txt b/extern/smaa_areatex/CMakeLists.txt
deleted file mode 100644
index ace1406c37f..00000000000
--- a/extern/smaa_areatex/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# SPDX-FileCopyrightText: 2017 Blender Foundation
-#
-# SPDX-License-Identifier: GPL-2.0-or-later
-
-add_executable(smaa_areatex smaa_areatex.cpp)
diff --git a/extern/smaa_areatex/README.blender b/extern/smaa_areatex/README.blender
deleted file mode 100644
index 9c409142ae8..00000000000
--- a/extern/smaa_areatex/README.blender
+++ /dev/null
@@ -1,5 +0,0 @@
-Project: smaa-cpp
-URL: https://github.com/iRi-E/smaa-cpp
-License: MIT
-Upstream version: 0.4.0
-Local modifications:
diff --git a/extern/smaa_areatex/smaa_areatex.cpp b/extern/smaa_areatex/smaa_areatex.cpp
deleted file mode 100644
index c61543e10a0..00000000000
--- a/extern/smaa_areatex/smaa_areatex.cpp
+++ /dev/null
@@ -1,1210 +0,0 @@
-/**
- * Copyright (C) 2016-2017 IRIE Shinsuke
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * smaa_areatex.cpp  version 0.4.0
- *
- * This is a part of smaa-cpp that is an implementation of
- * Enhanced Subpixel Morphological Antialiasing (SMAA) written in C++.
- *
- * This program is C++ rewrite of AreaTex.py included in the original
- * SMAA ditribution:
- *
- *   https://github.com/iryoku/smaa/tree/master/Scripts
- */
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-
-#include <cmath>
-
-/*------------------------------------------------------------------------------*/
-/* Type Definitions */
-
-class Int2;
-class Dbl2;
-
-class Int2 {
-public:
-	int x, y;
-
-	Int2() { this->x = this->y = 0; }
-	Int2(int x) { this->x = this->y = x; }
-	Int2(int x, int y) { this->x = x; this->y = y; }
-
-	operator Dbl2();
-
-	Int2 operator + (Int2 other) { return Int2(x + other.x, y + other.y); }
-	Int2 operator * (Int2 other) { return Int2(x * other.x, y * other.y); }
-};
-
-class Dbl2 {
-public:
-	double x, y;
-
-	Dbl2() { this->x = this->y = 0.0; }
-	Dbl2(double x) { this->x = this->y = x; }
-	Dbl2(double x, double y) { this->x = x; this->y = y; }
-
-	Dbl2 apply(double (* func)(double)) { return Dbl2(func(x), func(y)); }
-
-	operator Int2();
-
-	Dbl2 operator + (Dbl2 other) { return Dbl2(x + other.x, y + other.y); }
-	Dbl2 operator - (Dbl2 other) { return Dbl2(x - other.x, y - other.y); }
-	Dbl2 operator * (Dbl2 other) { return Dbl2(x * other.x, y * other.y); }
-	Dbl2 operator / (Dbl2 other) { return Dbl2(x / other.x, y / other.y); }
-	Dbl2 operator += (Dbl2 other) { return Dbl2(x += other.x, y += other.y); }
-	bool operator == (Dbl2 other) { return (x == other.x && y == other.y); }
-};
-
-Int2::operator Dbl2() { return Dbl2((double)x, (double)y); }
-Dbl2::operator Int2() { return Int2((int)x, (int)y); }
-
-/*------------------------------------------------------------------------------*/
-/* Data to Calculate Areatex */
-
-/* Texture sizes: */
-/* (it's quite possible that this is not easily configurable) */
-static const int SUBSAMPLES_ORTHO = 7;
-static const int SUBSAMPLES_DIAG  = 5;
-static const int MAX_DIST_ORTHO_COMPAT = 16;
-static const int MAX_DIST_ORTHO = 20;
-static const int MAX_DIST_DIAG  = 20;
-static const int TEX_SIZE_ORTHO = 80; /* 16 * 5 slots = 80 */
-static const int TEX_SIZE_DIAG  = 80; /* 20 * 4 slots = 80 */
-
-/* Number of samples for calculating areas in the diagonal textures: */
-/* (diagonal areas are calculated using brute force sampling) */
-static const int SAMPLES_DIAG = 30;
-
-/* Maximum distance for smoothing u-shapes: */
-static const int SMOOTH_MAX_DISTANCE = 32;
-
-/*------------------------------------------------------------------------------*/
-/* Offset Tables */
-
-/* Offsets for subsample rendering */
-static const double subsample_offsets_ortho[SUBSAMPLES_ORTHO] = {
-	0.0,    /* 0 */
-	-0.25,  /* 1 */
-	0.25,   /* 2 */
-	-0.125, /* 3 */
-	0.125,  /* 4 */
-	-0.375, /* 5 */
-	0.375   /* 6 */
-};
-
-static const Dbl2 subsample_offsets_diag[SUBSAMPLES_DIAG] = {
-	{ 0.00,   0.00},  /* 0 */
-	{ 0.25,  -0.25},  /* 1 */
-	{-0.25,   0.25},  /* 2 */
-	{ 0.125, -0.125}, /* 3 */
-	{-0.125,  0.125}  /* 4 */
-};
-
-/* Mapping offsets for placing each pattern subtexture into its place */
-enum edgesorthoIndices
-{
-	EDGESORTHO_NONE_NONE = 0,
-	EDGESORTHO_NONE_NEGA = 1,
-	EDGESORTHO_NONE_POSI = 2,
-	EDGESORTHO_NONE_BOTH = 3,
-	EDGESORTHO_NEGA_NONE = 4,
-	EDGESORTHO_NEGA_NEGA = 5,
-	EDGESORTHO_NEGA_POSI = 6,
-	EDGESORTHO_NEGA_BOTH = 7,
-	EDGESORTHO_POSI_NONE = 8,
-	EDGESORTHO_POSI_NEGA = 9,
-	EDGESORTHO_POSI_POSI = 10,
-	EDGESORTHO_POSI_BOTH = 11,
-	EDGESORTHO_BOTH_NONE = 12,
-	EDGESORTHO_BOTH_NEGA = 13,
-	EDGESORTHO_BOTH_POSI = 14,
-	EDGESORTHO_BOTH_BOTH = 15,
-};
-
-static const Int2 edgesortho_compat[16] = {
-	{0, 0}, {0, 1}, {0, 3}, {0, 4}, {1, 0}, {1, 1}, {1, 3}, {1, 4},
-	{3, 0}, {3, 1}, {3, 3}, {3, 4}, {4, 0}, {4, 1}, {4, 3}, {4, 4}
-};
-
-static const Int2 edgesortho[16] = {
-	{0, 0}, {0, 1}, {0, 2}, {0, 3}, {1, 0}, {1, 1}, {1, 2}, {1, 3},
-	{2, 0}, {2, 1}, {2, 2}, {2, 3}, {3, 0}, {3, 1}, {3, 2}, {3, 3}
-};
-
-enum edgesdiagIndices
-{
-	EDGESDIAG_NONE_NONE = 0,
-	EDGESDIAG_NONE_VERT = 1,
-	EDGESDIAG_NONE_HORZ = 2,
-	EDGESDIAG_NONE_BOTH = 3,
-	EDGESDIAG_VERT_NONE = 4,
-	EDGESDIAG_VERT_VERT = 5,
-	EDGESDIAG_VERT_HORZ = 6,
-	EDGESDIAG_VERT_BOTH = 7,
-	EDGESDIAG_HORZ_NONE = 8,
-	EDGESDIAG_HORZ_VERT = 9,
-	EDGESDIAG_HORZ_HORZ = 10,
-	EDGESDIAG_HORZ_BOTH = 11,
-	EDGESDIAG_BOTH_NONE = 12,
-	EDGESDIAG_BOTH_VERT = 13,
-	EDGESDIAG_BOTH_HORZ = 14,
-	EDGESDIAG_BOTH_BOTH = 15,
-};
-
-static const Int2 edgesdiag[16] = {
-	{0, 0}, {0, 1}, {0, 2}, {0, 3}, {1, 0}, {1, 1}, {1, 2}, {1, 3},
-	{2, 0}, {2, 1}, {2, 2}, {2, 3}, {3, 0}, {3, 1}, {3, 2}, {3, 3}
-};
-
-/*------------------------------------------------------------------------------*/
-/* Miscellaneous Utility Functions */
-
-/* Linear interpolation: */
-static Dbl2 lerp(Dbl2 a, Dbl2 b, double p)
-{
-	return a + (b - a) * Dbl2(p);
-}
-
-/* Saturates a value to [0..1] range: */
-static double saturate(double x)
-{
-	return 0.0 < x ? (x < 1.0 ? x : 1.0) : 0.0;
-}
-
-/*------------------------------------------------------------------------------*/
-/* Horizontal/Vertical Areas */
-
-class AreaOrtho {
-	double m_data[SUBSAMPLES_ORTHO][TEX_SIZE_ORTHO][TEX_SIZE_ORTHO][2];
-	bool m_compat;
-	bool m_orig_u;
-public:
-	AreaOrtho(bool compat, bool orig_u) : m_compat(compat), m_orig_u(orig_u) {}
-
-	double *getData() { return (double *)&m_data; }
-	Dbl2 getPixel(int offset_index, Int2 coords) {
-		return Dbl2(m_data[offset_index][coords.y][coords.x][0],
-			    m_data[offset_index][coords.y][coords.x][1]);
-	}
-
-	void areaTex(int offset_index);
-private:
-	void putPixel(int offset_index, Int2 coords, Dbl2 pixel) {
-		m_data[offset_index][coords.y][coords.x][0] = pixel.x;
-		m_data[offset_index][coords.y][coords.x][1] = pixel.y;
-	}
-
-	Dbl2 smoothArea(double d, Dbl2 a1, Dbl2 a2);
-	Dbl2 makeQuad(int x, double d, double o);
-	Dbl2 area(Dbl2 p1, Dbl2 p2, int x);
-	Dbl2 calculate(int pattern, int left, int right, double offset);
-};
-
-/* Smoothing function for small u-patterns: */
-Dbl2 AreaOrtho::smoothArea(double d, Dbl2 a1, Dbl2 a2)
-{
-	Dbl2 b1 = (a1 * Dbl2(2.0)).apply(sqrt) * Dbl2(0.5);
-	Dbl2 b2 = (a2 * Dbl2(2.0)).apply(sqrt) * Dbl2(0.5);
-	double p = saturate(d / (double)SMOOTH_MAX_DISTANCE);
-	return lerp(b1, a1, p) + lerp(b2, a2, p);
-}
-
-/* Smoothing u-patterns by quadratic function: */
-Dbl2 AreaOrtho::makeQuad(int x, double d, double o)
-{
-	double r = (double)x;
-
-	/* fmin() below is a trick to smooth tiny u-patterns: */
-	return Dbl2(r, (1.0 - fmin(4.0, d) * r * (d - r) / (d * d)) * o);
-}
-
-/* Calculates the area under the line p1->p2, for the pixel x..x+1: */
-Dbl2 AreaOrtho::area(Dbl2 p1, Dbl2 p2, int x)
-{
-	Dbl2 d = p2 - p1;
-	double x1 = (double)x;
-	double x2 = x1 + 1.0;
-
-	if ((x1 >= p1.x && x1 < p2.x) || (x2 > p1.x && x2 <= p2.x)) { /* inside? */
-		double y1 = p1.y + (x1 - p1.x) * d.y / d.x;
-		double y2 = p1.y + (x2 - p1.x) * d.y / d.x;
-
-		if ((copysign(1.0, y1) == copysign(1.0, y2) ||
-		     fabs(y1) < 1e-4 || fabs(y2) < 1e-4)) { /* trapezoid? */
-			double a = (y1 + y2) / 2.0;
-			if (a < 0.0)
-				return Dbl2(fabs(a), 0.0);
-			else
-				return Dbl2(0.0, fabs(a));
-		}
-		else { /* Then, we got two triangles: */
-			double x = p1.x - p1.y * d.x / d.y, xi;
-			double a1 = x > p1.x ? y1 * modf(x, &xi) / 2.0 : 0.0;
-			double a2 = x < p2.x ? y2 * (1.0 - modf(x, &xi)) / 2.0 : 0.0;
-			double a = fabs(a1) > fabs(a2) ? a1 : -a2;
-			if (a < 0.0)
-				return Dbl2(fabs(a1), fabs(a2));
-			else
-				return Dbl2(fabs(a2), fabs(a1));
-		}
-	}
-	else
-		return Dbl2(0.0, 0.0);
-}
-
-/* Calculates the area for a given pattern and distances to the left and to the */
-/* right, biased by an offset: */
-Dbl2 AreaOrtho::calculate(int pattern, int left, int right, double offset)
-{
-	Dbl2 a1, a2;
-
-	/*
-	 * o1           |
-	 *      .-------´
-	 * o2   |
-	 *
-	 *      <---d--->
-	 */
-	double d = (double)(left + right + 1);
-
-	double o1 = 0.5 + offset;
-	double o2 = 0.5 + offset - 1.0;
-
-	switch (pattern) {
-		case EDGESORTHO_NONE_NONE:
-		{
-			/*
-			 *
-			 *    ------
-			 *
-			 */
-			return Dbl2(0.0, 0.0);
-			break;
-		}
-		case EDGESORTHO_POSI_NONE:
-		{
-			/*
-			 *
-			 *   .------
-			 *   |
-			 *
-			 * We only offset L patterns in the crossing edge side, to make it
-			 * converge with the unfiltered pattern 0 (we don't want to filter the
-			 * pattern 0 to avoid artifacts).
-			 */
-			if (left <= right)
-				return area(Dbl2(0.0, o2), Dbl2(d / 2.0, 0.0), left);
-			else
-				return Dbl2(0.0, 0.0);
-			break;
-		}
-		case EDGESORTHO_NONE_POSI:
-		{
-			/*
-			 *
-			 *    ------.
-			 *          |
-			 */
-			if (left >= right)
-				return area(Dbl2(d / 2.0, 0.0), Dbl2(d, o2), left);
-			else
-				return Dbl2(0.0, 0.0);
-			break;
-		}
-		case EDGESORTHO_POSI_POSI:
-		{
-			/*
-			 *
-			 *   .------.
-			 *   |      |
-			 */
-			if (m_orig_u) {
-				a1 = area(Dbl2(0.0, o2), Dbl2(d / 2.0, 0.0), left);
-				a2 = area(Dbl2(d / 2.0, 0.0), Dbl2(d, o2), left);
-				return smoothArea(d, a1, a2);
-			}
-			else
-				return area(makeQuad(left, d, o2), makeQuad(left + 1, d, o2), left);
-			break;
-		}
-		case EDGESORTHO_NEGA_NONE:
-		{
-			/*
-			 *   |
-			 *   `------
-			 *
-			 */
-			if (left <= right)
-				return area(Dbl2(0.0, o1), Dbl2(d / 2.0, 0.0), left);
-			else
-				return Dbl2(0.0, 0.0);
-			break;
-		}
-		case EDGESORTHO_BOTH_NONE:
-		{
-			/*
-			 *   |
-			 *   +------
-			 *   |
-			 */
-			return Dbl2(0.0, 0.0);
-			break;
-		}
-		case EDGESORTHO_NEGA_POSI:
-		{
-			/*
-			 *   |
-			 *   `------.
-			 *          |
-			 *
-			 * A problem of not offseting L patterns (see above), is that for certain
-			 * max search distances, the pixels in the center of a Z pattern will
-			 * detect the full Z pattern, while the pixels in the sides will detect a
-			 * L pattern. To avoid discontinuities, we blend the full offsetted Z
-			 * revectorization with partially offsetted L patterns.
-			 */
-			if (fabs(offset) > 0.0) {
-				a1 = area(Dbl2(0.0, o1), Dbl2(d, o2), left);
-				a2 = area(Dbl2(0.0, o1), Dbl2(d / 2.0, 0.0), left);
-				a2 += area(Dbl2(d / 2.0, 0.0), Dbl2(d, o2), left);
-				return (a1 + a2) / Dbl2(2.0);
-			}
-			else
-				return area(Dbl2(0.0, o1), Dbl2(d, o2), left);
-			break;
-		}
-		case EDGESORTHO_BOTH_POSI:
-		{
-			/*
-			 *   |
-			 *   +------.
-			 *   |      |
-			 */
-			return area(Dbl2(0.0, o1), Dbl2(d, o2), left);
-			break;
-		}
-		case EDGESORTHO_NONE_NEGA:
-		{
-			/*
-			 *          |
-			 *    ------´
-			 *
-			 */
-			if (left >= right)
-				return area(Dbl2(d / 2.0, 0.0), Dbl2(d, o1), left);
-			else
-				return Dbl2(0.0, 0.0);
-			break;
-		}
-		case EDGESORTHO_POSI_NEGA:
-		{
-			/*
-			 *          |
-			 *   .------´
-			 *   |
-			 */
-			if (fabs(offset) > 0.0) {
-				a1 = area(Dbl2(0.0, o2), Dbl2(d, o1), left);
-				a2 = area(Dbl2(0.0, o2), Dbl2(d / 2.0, 0.0), left);
-				a2 += area(Dbl2(d / 2.0, 0.0), Dbl2(d, o1), left);
-				return (a1 + a2) / Dbl2(2.0);
-			}
-			else
-				return area(Dbl2(0.0, o2), Dbl2(d, o1), left);
-			break;
-		}
-		case EDGESORTHO_NONE_BOTH:
-		{
-			/*
-			 *          |
-			 *    ------+
-			 *          |
-			 */
-			return Dbl2(0.0, 0.0);
-			break;
-		}
-		case EDGESORTHO_POSI_BOTH:
-		{
-			/*
-			 *          |
-			 *   .------+
-			 *   |      |
-			 */
-			return area(Dbl2(0.0, o2), Dbl2(d, o1), left);
-			break;
-		}
-		case EDGESORTHO_NEGA_NEGA:
-		{
-			/*
-			 *   |      |
-			 *   `------´
-			 *
-			 */
-			if (m_orig_u) {
-				a1 = area(Dbl2(0.0, o1), Dbl2(d / 2.0, 0.0), left);
-				a2 = area(Dbl2(d / 2.0, 0.0), Dbl2(d, o1), left);
-				return smoothArea(d, a1, a2);
-			}
-			else
-				return area(makeQuad(left, d, o1), makeQuad(left + 1, d, o1), left);
-			break;
-		}
-		case EDGESORTHO_BOTH_NEGA:
-		{
-			/*
-			 *   |      |
-			 *   +------´
-			 *   |
-			 */
-			return area(Dbl2(0.0, o2), Dbl2(d, o1), left);
-			break;
-		}
-		case EDGESORTHO_NEGA_BOTH:
-		{
-			/*
-			 *   |      |
-			 *   `------+
-			 *          |
-			 */
-			return area(Dbl2(0.0, o1), Dbl2(d, o2), left);
-			break;
-		}
-		case EDGESORTHO_BOTH_BOTH:
-		{
-			/*
-			 *   |      |
-			 *   +------+
-			 *   |      |
-			 */
-			return Dbl2(0.0, 0.0);
-			break;
-		}
-	}
-
-	return Dbl2(0.0, 0.0);
-}
-
-/*------------------------------------------------------------------------------*/
-/* Diagonal Areas */
-
-class AreaDiag {
-	double m_data[SUBSAMPLES_DIAG][TEX_SIZE_DIAG][TEX_SIZE_DIAG][2];
-	bool m_numeric;
-	bool m_orig_u;
-public:
-	AreaDiag(bool numeric, bool orig_u) : m_numeric(numeric), m_orig_u(orig_u) {}
-
-	double *getData() { return (double *)&m_data; }
-	Dbl2 getPixel(int offset_index, Int2 coords) {
-		return Dbl2(m_data[offset_index][coords.y][coords.x][0],
-			    m_data[offset_index][coords.y][coords.x][1]);
-	}
-
-	void areaTex(int offset_index);
-private:
-	void putPixel(int offset_index, Int2 coords, Dbl2 pixel) {
-		m_data[offset_index][coords.y][coords.x][0] = pixel.x;
-		m_data[offset_index][coords.y][coords.x][1] = pixel.y;
-	}
-
-	double area1(Dbl2 p1, Dbl2 p2, Int2 p);
-	Dbl2 area(Dbl2 p1, Dbl2 p2, int left);
-	Dbl2 areaTriangle(Dbl2 p1L, Dbl2 p2L, Dbl2 p1R, Dbl2 p2R, int left);
-	Dbl2 calculate(int pattern, int left, int right, Dbl2 offset);
-};
-
-/* Calculates the area under the line p1->p2 for the pixel 'p' using brute */
-/* force sampling: */
-/* (quick and dirty solution, but it works) */
-double AreaDiag::area1(Dbl2 p1, Dbl2 p2, Int2 p)
-{
-	if (p1 == p2)
-		return 1.0;
-
-	double xm = (p1.x + p2.x) / 2.0, ym = (p1.y + p2.y) / 2.0;
-	double a = p2.y - p1.y;
-	double b = p1.x - p2.x;
-	int count = 0;
-
-	for (int ix = 0; ix < SAMPLES_DIAG; ix++) {
-		double x = (double)p.x + (double)ix / (double)(SAMPLES_DIAG - 1);
-		for (int iy = 0; iy < SAMPLES_DIAG; iy++) {
-			double y = (double)p.y + (double)iy / (double)(SAMPLES_DIAG - 1);
-			if (a * (x - xm) + b * (y - ym) > 0.0) /* inside? */
-				count++;
-		}
-	}
-	return (double)count / (double)(SAMPLES_DIAG * SAMPLES_DIAG);
-}
-
-/* Calculates the area under the line p1->p2: */
-/* (includes the pixel and its opposite) */
-Dbl2 AreaDiag::area(Dbl2 p1, Dbl2 p2, int left)
-{
-	if (m_numeric) {
-		double a1 = area1(p1, p2, Int2(1, 0) + Int2(left));
-		double a2 = area1(p1, p2, Int2(1, 1) + Int2(left));
-		return Dbl2(1.0 - a1, a2);
-	}
-
-	/* Calculates the area under the line p1->p2 for the pixel 'p' analytically */
-	Dbl2 d = p2 - p1;
-	if (d.x == 0.0)
-		return Dbl2(0.0, 1.0);
-	if (d.y == 0.0)
-		return Dbl2(1.0, 0.0);
-
-	double x1 = (double)(1 + left);
-	double x2 = x1 + 1.0;
-	double ymid = x1;
-	double xtop = p1.x + (ymid + 1.0 - p1.y) * d.x / d.y;
-	double xmid = p1.x + (ymid       - p1.y) * d.x / d.y;
-	double xbot = p1.x + (ymid - 1.0 - p1.y) * d.x / d.y;
-
-	double y1 = p1.y + (x1 - p1.x) * d.y / d.x;
-	double y2 = p1.y + (x2 - p1.x) * d.y / d.x;
-	double fy1 = y1 - floor(y1);
-	double fy2 = y2 - floor(y2);
-	int iy1 = (int)floor(y1 - ymid);
-	int iy2 = (int)floor(y2 - ymid);
-
-	if (iy1 <= -2) {
-		if (iy2 == -1)
-			return Dbl2(1.0 - (x2 - xbot) * fy2 * 0.5, 0.0);
-		else if (iy2 == 0)
-			return Dbl2((xmid + xbot) * 0.5 - x1, (x2 - xmid) * fy2 * 0.5);
-		else if (iy2 >= 1)
-			return Dbl2((xmid + xbot) * 0.5 - x1, x2 -  (xtop + xmid) * 0.5);
-		else /* iy2 < -1 */
-			return Dbl2(1.0, 0.0);
-	}
-	else if (iy1 == -1) {
-		if (iy2 == -1)
-			return Dbl2(1.0 - (fy1 + fy2) * 0.5, 0.0);
-		else if (iy2 == 0)
-			return Dbl2((xmid - x1) * (1.0 - fy1) * 0.5, (x2 - xmid) * fy2 * 0.5);
-		else if (iy2 >= 1)
-			return Dbl2((xmid - x1) * (1.0 - fy1) * 0.5, x2 - (xtop + xmid) * 0.5);
-		else /* iy2 < -1 */
-			return Dbl2(1.0 - (xbot - x1) * fy1 * 0.5, 0.0);
-	}
-	else if (iy1 == 0) {
-		if (iy2 == -1)
-			return Dbl2((x2 - xmid) * (1.0 - fy2) * 0.5, (xmid - x1) * fy1 * 0.5);
-		else if (iy2 == 0)
-			return Dbl2(0.0, (fy1 + fy2) * 0.5);
-		else if (iy2 >= 1)
-			return Dbl2(0.0, 1.0 - (xtop - x1) * (1.0 - fy1) * 0.5);
-		else /* iy2 < -1 */
-			return Dbl2(x2 - (xmid + xbot) * 0.5, (xmid - x1) * fy1 * 0.5);
-	}
-	else { /* iy1 > 0 */
-		if (iy2 == -1)
-			return Dbl2((x2 - xtop) * (1.0 - fy2) * 0.5, (xtop + xmid) * 0.5 - x1);
-		else if (iy2 == 0)
-			return Dbl2(0.0, 1.0 - (x1 - xtop) * (1.0 - fy2) * 0.5);
-		else if (iy2 >= 1)
-			return Dbl2(0.0, 1.0);
-		else /* iy2 < -1 */
-			return Dbl2(x2 - (xmid + xbot) * 0.5, (xtop + xmid) * 0.5 - x1);
-	}
-}
-
-/* Calculate u-patterns using a triangle: */
-Dbl2 AreaDiag::areaTriangle(Dbl2 p1L, Dbl2 p2L, Dbl2 p1R, Dbl2 p2R, int left)
-{
-	double x1 = (double)(1 + left);
-	double x2 = x1 + 1.0;
-
-	Dbl2 dL = p2L - p1L;
-	Dbl2 dR = p2R - p1R;
-	double xm = ((p1L.x * dL.y / dL.x - p1L.y) - (p1R.x * dR.y / dR.x - p1R.y)) / (dL.y / dL.x - dR.y / dR.x);
-
-	double y1 = (x1 < xm) ? p1L.y + (x1 - p1L.x) * dL.y / dL.x : p1R.y + (x1 - p1R.x) * dR.y / dR.x;
-	double y2 = (x2 < xm) ? p1L.y + (x2 - p1L.x) * dL.y / dL.x : p1R.y + (x2 - p1R.x) * dR.y / dR.x;
-
-	return area(Dbl2(x1, y1), Dbl2(x2, y2), left);
-}
-
-/* Calculates the area for a given pattern and distances to the left and to the */
-/* right, biased by an offset: */
-Dbl2 AreaDiag::calculate(int pattern, int left, int right, Dbl2 offset)
-{
-	Dbl2 a1, a2;
-
-	double d = (double)(left + right + 1);
-
-	/*
-	 * There is some Black Magic around diagonal area calculations. Unlike
-	 * orthogonal patterns, the 'null' pattern (one without crossing edges) must be
-	 * filtered, and the ends of both the 'null' and L patterns are not known: L
-	 * and U patterns have different endings, and we don't know what is the
-	 * adjacent pattern. So, what we do is calculate a blend of both possibilites.
-	 */
-	switch (pattern) {
-		case EDGESDIAG_NONE_NONE:
-		{
-			/*
-			 *
-			 *         .-´
-			 *       .-´
-			 *     .-´
-			 *   .-´
-			 *   ´
-			 *
-			 */
-			a1 = area(Dbl2(1.0, 1.0), Dbl2(1.0, 1.0) + Dbl2(d), left); /* 1st possibility */
-			a2 = area(Dbl2(1.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d), left); /* 2nd possibility */
-			return (a1 + a2) / Dbl2(2.0); /* Blend them */
-			break;
-		}
-		case EDGESDIAG_VERT_NONE:
-		{
-			/*
-			 *
-			 *         .-´
-			 *       .-´
-			 *     .-´
-			 *   .-´
-			 *   |
-			 *   |
-			 */
-			a1 = area(Dbl2(1.0, 0.0) + offset, Dbl2(0.0, 0.0) + Dbl2(d), left);
-			a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d), left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_NONE_HORZ:
-		{
-			/*
-			 *
-			 *         .----
-			 *       .-´
-			 *     .-´
-			 *   .-´
-			 *   ´
-			 *
-			 */
-			a1 = area(Dbl2(0.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			a2 = area(Dbl2(1.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_VERT_HORZ:
-		{
-			/*
-			 *
-			 *         .----
-			 *       .-´
-			 *     .-´
-			 *   .-´
-			 *   |
-			 *   |
-			 */
-			if (m_orig_u)
-				return area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			else
-				return areaTriangle(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d),
-						    Dbl2(0.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			break;
-		}
-		case EDGESDIAG_HORZ_NONE:
-		{
-			/*
-			 *
-			 *         .-´
-			 *       .-´
-			 *     .-´
-			 * ----´
-			 *
-			 *
-			 */
-			a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(0.0, 0.0) + Dbl2(d), left);
-			a2 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d), left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_BOTH_NONE:
-		{
-			/*
-			 *
-			 *         .-´
-			 *       .-´
-			 *     .-´
-			 * --.-´
-			 *   |
-			 *   |
-			 */
-			a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(0.0, 0.0) + Dbl2(d), left);
-			a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d), left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_HORZ_HORZ:
-		{
-			/*
-			 *
-			 *         .----
-			 *       .-´
-			 *     .-´
-			 * ----´
-			 *
-			 *
-			 */
-			return area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			break;
-		}
-		case EDGESDIAG_BOTH_HORZ:
-		{
-			/*
-			 *
-			 *         .----
-			 *       .-´
-			 *     .-´
-			 * --.-´
-			 *   |
-			 *   |
-			 */
-			a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_NONE_VERT:
-		{
-			/*
-			 *         |
-			 *         |
-			 *       .-´
-			 *     .-´
-			 *   .-´
-			 *   ´
-			 *
-			 */
-			a1 = area(Dbl2(0.0, 0.0), Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			a2 = area(Dbl2(1.0, 0.0), Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_VERT_VERT:
-		{
-			/*
-			 *         |
-			 *         |
-			 *       .-´
-			 *     .-´
-			 *   .-´
-			 *   |
-			 *   |
-			 */
-			return area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			break;
-		}
-		case EDGESDIAG_NONE_BOTH:
-		{
-			/*
-			 *         |
-			 *         .----
-			 *       .-´
-			 *     .-´
-			 *   .-´
-			 *   ´
-			 *
-			 */
-			a1 = area(Dbl2(0.0, 0.0), Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			a2 = area(Dbl2(1.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_VERT_BOTH:
-		{
-			/*
-			 *         |
-			 *         .----
-			 *       .-´
-			 *     .-´
-			 *   .-´
-			 *   |
-			 *   |
-			 */
-			a1 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_HORZ_VERT:
-		{
-			/*
-			 *         |
-			 *         |
-			 *       .-´
-			 *     .-´
-			 * ----´
-			 *
-			 *
-			 */
-			if (m_orig_u)
-				return area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			else
-				return areaTriangle(Dbl2(1.0, 1.0) + offset, Dbl2(2.0, 1.0) + Dbl2(d),
-						    Dbl2(1.0, 0.0), Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			break;
-		}
-		case EDGESDIAG_BOTH_VERT:
-		{
-			/*
-			 *         |
-			 *         |
-			 *       .-´
-			 *     .-´
-			 * --.-´
-			 *   |
-			 *   |
-			 */
-			a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_HORZ_BOTH:
-		{
-			/*
-			 *         |
-			 *         .----
-			 *       .-´
-			 *     .-´
-			 * ----´
-			 *
-			 *
-			 */
-			a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			a2 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_BOTH_BOTH:
-		{
-			/*
-			 *         |
-			 *         .----
-			 *       .-´
-			 *     .-´
-			 * --.-´
-			 *   |
-			 *   |
-			 */
-			a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-	}
-
-	return Dbl2(0.0, 0.0);
-}
-
-/*------------------------------------------------------------------------------*/
-/* Main Loops */
-
-void AreaOrtho::areaTex(int offset_index)
-{
-	double offset = subsample_offsets_ortho[offset_index];
-	int max_dist = m_compat ? MAX_DIST_ORTHO_COMPAT : MAX_DIST_ORTHO;
-
-	for (int pattern = 0; pattern < 16; pattern++) {
-		Int2 e = Int2(max_dist) * (m_compat ? edgesortho_compat : edgesortho)[pattern];
-		for (int left = 0; left < max_dist; left++) {
-			for (int right = 0; right < max_dist; right++) {
-				Dbl2 p = calculate(pattern, left * left, right * right, offset);
-				Int2 coords = e + Int2(left, right);
-
-				putPixel(offset_index, coords, p);
-			}
-		}
-	}
-	return;
-}
-
-void AreaDiag::areaTex(int offset_index)
-{
-	Dbl2 offset = subsample_offsets_diag[offset_index];
-
-	for (int pattern = 0; pattern < 16; pattern++) {
-		Int2 e = Int2(MAX_DIST_DIAG) * edgesdiag[pattern];
-		for (int left = 0; left < MAX_DIST_DIAG; left++) {
-			for (int right = 0; right < MAX_DIST_DIAG; right++) {
-				Dbl2 p = calculate(pattern, left, right, offset);
-				Int2 coords = e + Int2(left, right);
-
-				putPixel(offset_index, coords, p);
-			}
-		}
-	}
-	return;
-}
-
-/*------------------------------------------------------------------------------*/
-/* Write File to Specified Location on Disk */
-
-/* C/C++ source code (arrays of floats) */
-static void write_double_array(FILE *fp, const double *ptr, int length, const char *array_name, bool quantize)
-{
-	fprintf(fp, "static const float %s[%d] = {", array_name, length);
-
-	for (int n = 0; n < length; n++) {
-		if (n > 0)
-			fprintf(fp, ",");
-		fprintf(fp, (n % 8 != 0) ? " " : "\n\t");
-
-		if (quantize)
-			fprintf(fp, "%3d / 255.0", (int)(*(ptr++) * 255.0));
-		else
-			fprintf(fp, "%1.8lf", *(ptr++));
-	}
-
-	fprintf(fp, "\n};\n");
-}
-
-static void write_csource(AreaOrtho *ortho, AreaDiag *diag, FILE *fp, bool subsampling, bool quantize)
-{
-	fprintf(fp, "/* This file was generated by smaa_areatex.cpp */\n");
-
-	fprintf(fp, "\n/* Horizontal/Vertical Areas */\n");
-	write_double_array(fp, ortho->getData(),
-			   TEX_SIZE_ORTHO * TEX_SIZE_ORTHO * 2 * (subsampling ? SUBSAMPLES_ORTHO : 1),
-			   "areatex", quantize);
-
-	fprintf(fp, "\n/* Diagonal Areas */\n");
-	write_double_array(fp, diag->getData(),
-			   TEX_SIZE_DIAG * TEX_SIZE_DIAG * 2 * (subsampling ? SUBSAMPLES_DIAG : 1),
-			   "areatex_diag", quantize);
-}
-
-/* .tga File (RGBA 32bit uncompressed) */
-static void write_tga(AreaOrtho *ortho, AreaDiag *diag, FILE *fp, bool subsampling)
-{
-	int subsamples = subsampling ? SUBSAMPLES_ORTHO : 1;
-	unsigned char header[18] = {0, 0,
-				    2,   /* uncompressed RGB */
-				    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-				    32,  /* 32bit */
-				    8};  /* 8bit alpha, left to right, bottom to top */
-
-	/* Set width and height */
-	header[12] = (TEX_SIZE_ORTHO + TEX_SIZE_DIAG)      & 0xff;
-	header[13] = ((TEX_SIZE_ORTHO + TEX_SIZE_DIAG) >> 8) & 0xff;
-	header[14] = (subsamples * TEX_SIZE_ORTHO)      & 0xff;
-	header[15] = ((subsamples * TEX_SIZE_ORTHO) >> 8) & 0xff;
-
-	/* Write .tga header */
-	fwrite(header, sizeof(unsigned char), sizeof(header) / sizeof(unsigned char), fp);
-
-	/* Write pixel data  */
-	for (int i = subsamples - 1; i >= 0; i--) {
-		for (int y = TEX_SIZE_ORTHO - 1; y >= 0; y--) {
-			for (int x = 0; x < TEX_SIZE_ORTHO; x++) {
-				Dbl2 p = ortho->getPixel(i, Int2(x, y));
-				fputc(0, fp);                            /* B */
-				fputc((unsigned char)(p.y * 255.0), fp); /* G */
-				fputc((unsigned char)(p.x * 255.0), fp); /* R */
-				fputc(0, fp);                            /* A */
-			}
-
-			for (int x = 0; x < TEX_SIZE_DIAG; x++) {
-				if (i < SUBSAMPLES_DIAG) {
-					Dbl2 p = diag->getPixel(i, Int2(x, y));
-					fputc(0, fp);                            /* B */
-					fputc((unsigned char)(p.y * 255.0), fp); /* G */
-					fputc((unsigned char)(p.x * 255.0), fp); /* R */
-					fputc(0, fp);                            /* A */
-				}
-				else {
-					fputc(0, fp);
-					fputc(0, fp);
-					fputc(0, fp);
-					fputc(0, fp);
-				}
-			}
-		}
-	}
-}
-
-/* .raw File (R8G8 raw data) */
-static void write_raw(AreaOrtho *ortho, AreaDiag *diag, FILE *fp, bool subsampling)
-{
-	int subsamples = subsampling ? SUBSAMPLES_ORTHO : 1;
-
-	/* Write pixel data  */
-	for (int i = 0; i < subsamples; i++) {
-		for (int y = 0; y < TEX_SIZE_ORTHO; y++) {
-			for (int x = 0; x < TEX_SIZE_ORTHO; x++) {
-				Dbl2 p = ortho->getPixel(i, Int2(x, y));
-				fputc((unsigned char)(p.x * 255.0), fp); /* R */
-				fputc((unsigned char)(p.y * 255.0), fp); /* G */
-			}
-
-			for (int x = 0; x < TEX_SIZE_DIAG; x++) {
-				if (i < SUBSAMPLES_DIAG) {
-					Dbl2 p = diag->getPixel(i, Int2(x, y));
-					fputc((unsigned char)(p.x * 255.0), fp); /* R */
-					fputc((unsigned char)(p.y * 255.0), fp); /* G */
-				}
-				else {
-					fputc(0, fp);
-					fputc(0, fp);
-				}
-			}
-		}
-	}
-}
-
-static int generate_file(AreaOrtho *ortho, AreaDiag *diag, const char *path, bool subsampling, bool quantize, bool tga, bool raw)
-{
-	FILE *fp = fopen(path, tga ? "wb" : "w");
-
-	if (!fp) {
-		fprintf(stderr, "Unable to open file: %s\n", path);
-		return 1;
-	}
-
-	// fprintf(stderr, "Generating %s\n", path);
-
-	if (tga)
-		write_tga(ortho, diag, fp, subsampling);
-	else if (raw)
-		write_raw(ortho, diag, fp, subsampling);
-	else
-		write_csource(ortho, diag, fp, subsampling, quantize);
-
-	fclose(fp);
-
-	return 0;
-}
-
-int main(int argc, char **argv)
-{
-	bool subsampling = false;
-	bool quantize = false;
-	bool tga = false;
-	bool raw = false;
-	bool compat = false;
-	bool numeric = false;
-	bool orig_u = false;
-	bool help = false;
-	char *outfile = NULL;
-	int status = 0;
-
-	for (int i = 1; i < argc; i++) {
-		char *ptr = argv[i];
-		if (*ptr++ == '-' && *ptr != '\0') {
-			char c;
-			while ((c = *ptr++) != '\0') {
-				if (c == 's')
-					subsampling = true;
-				else if (c == 'q')
-					quantize = true;
-				else if (c == 't')
-					tga = true;
-				else if (c == 'r')
-					raw = true;
-				else if (c == 'c')
-					compat = true;
-				else if (c == 'n')
-					numeric = true;
-				else if (c == 'u')
-					orig_u = true;
-				else if (c == 'h')
-					help = true;
-				else {
-					fprintf(stderr, "Unknown option: -%c\n", c);
-					status = 1;
-					break;
-				}
-			}
-		}
-		else if (outfile) {
-			fprintf(stderr, "Too much file names: %s, %s\n", outfile, argv[i]);
-			status = 1;
-		}
-		else
-			outfile = argv[i];
-
-		if (status != 0)
-			break;
-	}
-
-	if (status == 0 && !help && !outfile) {
-		fprintf(stderr, "File name was not specified.\n");
-		status = 1;
-	}
-
-	if (status != 0 || help) {
-		fprintf(stderr, "Usage: %s [OPTION]... OUTFILE\n", argv[0]);
-		fprintf(stderr, "Options:\n");
-		fprintf(stderr, "    -s    Calculate data for subpixel rendering\n");
-		fprintf(stderr, "    -q    Quantize data to 256 levels\n");
-		fprintf(stderr, "    -t    Write TGA image instead of C/C++ source\n");
-		fprintf(stderr, "    -r    Write R8G8 raw image instead of C/C++ source\n");
-		fprintf(stderr, "    -c    Generate compatible orthogonal data that subtexture size is 16\n");
-		fprintf(stderr, "    -n    Numerically calculate diagonal data using brute force sampling\n");
-		fprintf(stderr, "    -u    Process orthogonal / diagonal U patterns in older ways\n");
-		fprintf(stderr, "    -h    Print this help and exit\n");
-		fprintf(stderr, "File name OUTFILE usually should have an extension such as .c, .h, or .tga,\n");
-		fprintf(stderr, "except for a special name '-' that means standard output.\n\n");
-		fprintf(stderr, "Example:\n");
-		fprintf(stderr, "  Generate TGA file exactly same as AreaTexDX10.tga bundled with the\n");
-		fprintf(stderr, "  original implementation:\n\n");
-		fprintf(stderr, "  $ smaa_areatex -stcnu AreaTexDX10.tga\n\n");
-		return status;
-	}
-
-	AreaOrtho *ortho = new AreaOrtho(compat, orig_u);
-	AreaDiag *diag = new AreaDiag(numeric, orig_u);
-
-	/* Calculate areatex data */
-	for (int i = 0; i < (subsampling ? SUBSAMPLES_ORTHO : 1); i++)
-		ortho->areaTex(i);
-
-	for (int i = 0; i < (subsampling ? SUBSAMPLES_DIAG : 1); i++)
-		diag->areaTex(i);
-
-	/* Generate .tga, .raw, or C/C++ source file, or write the data to stdout */
-	if (strcmp(outfile, "-") != 0)
-		status = generate_file(ortho, diag, outfile, subsampling, quantize, tga, raw);
-	else if (tga)
-		write_tga(ortho, diag, stdout, subsampling);
-	else if (raw)
-		write_raw(ortho, diag, stdout, subsampling);
-	else
-		write_csource(ortho, diag, stdout, subsampling, quantize);
-
-	delete ortho;
-	delete diag;
-
-	return status;
-}
-
-/* smaa_areatex.cpp ends here */
diff --git a/source/blender/compositor/CMakeLists.txt b/source/blender/compositor/CMakeLists.txt
index 6b7c153ddf9..38315eafe5f 100644
--- a/source/blender/compositor/CMakeLists.txt
+++ b/source/blender/compositor/CMakeLists.txt
@@ -584,20 +584,6 @@ if(WITH_COMPOSITOR_CPU)
     ${CMAKE_CURRENT_BINARY_DIR}/operations
   )
 
-  set(GENSRC_DIR ${CMAKE_CURRENT_BINARY_DIR}/operations)
-  set(GENSRC ${GENSRC_DIR}/COM_SMAAAreaTexture.h)
-  add_custom_command(
-    OUTPUT ${GENSRC}
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${GENSRC_DIR}
-    COMMAND "$<TARGET_FILE:smaa_areatex>" ${GENSRC}
-    DEPENDS smaa_areatex
-  )
-  list(APPEND SRC
-    ${GENSRC}
-  )
-  unset(GENSRC)
-  unset(GENSRC_DIR)
-
   if(WITH_OPENIMAGEDENOISE)
     add_definitions(-DWITH_OPENIMAGEDENOISE)
     add_definitions(-DOIDN_STATIC_LIB)
diff --git a/source/blender/compositor/nodes/COM_AntiAliasingNode.cc b/source/blender/compositor/nodes/COM_AntiAliasingNode.cc
index d2c68b7d041..3b73bdc4f1b 100644
--- a/source/blender/compositor/nodes/COM_AntiAliasingNode.cc
+++ b/source/blender/compositor/nodes/COM_AntiAliasingNode.cc
@@ -7,37 +7,41 @@
 
 namespace blender::compositor {
 
+/* Blender encodes the threshold in the [0, 1] range, while the SMAA algorithm expects it in
+ * the [0, 0.5] range. */
+static float get_threshold(const NodeAntiAliasingData *data)
+{
+  return data->threshold / 2.0f;
+}
+
+/* Blender encodes the local contrast adaptation factor in the [0, 1] range, while the SMAA
+ * algorithm expects it in the [0, 10] range. */
+static float get_local_contrast_adaptation_factor(const NodeAntiAliasingData *data)
+{
+  return data->contrast_limit * 10.0f;
+}
+
+/* Blender encodes the corner rounding factor in the float [0, 1] range, while the SMAA algorithm
+ * expects it in the integer [0, 100] range. */
+static int get_corner_rounding(const NodeAntiAliasingData *data)
+{
+  return int(data->corner_rounding * 100.0f);
+}
+
 void AntiAliasingNode::convert_to_operations(NodeConverter &converter,
                                              const CompositorContext & /*context*/) const
 {
   const bNode *node = this->get_bnode();
   const NodeAntiAliasingData *data = (const NodeAntiAliasingData *)node->storage;
 
-  /* Edge Detection (First Pass) */
-  SMAAEdgeDetectionOperation *operation1 = nullptr;
+  SMAAOperation *operation = new SMAAOperation();
+  operation->set_threshold(get_threshold(data));
+  operation->set_local_contrast_adaptation_factor(get_local_contrast_adaptation_factor(data));
+  operation->set_corner_rounding(get_corner_rounding(data));
+  converter.add_operation(operation);
 
-  operation1 = new SMAAEdgeDetectionOperation();
-  operation1->set_threshold(data->threshold);
-  operation1->set_local_contrast_adaptation_factor(data->contrast_limit);
-  converter.add_operation(operation1);
-
-  converter.map_input_socket(get_input_socket(0), operation1->get_input_socket(0));
-
-  /* Blending Weight Calculation Pixel Shader (Second Pass) */
-  SMAABlendingWeightCalculationOperation *operation2 =
-      new SMAABlendingWeightCalculationOperation();
-  operation2->set_corner_rounding(data->corner_rounding);
-  converter.add_operation(operation2);
-
-  converter.add_link(operation1->get_output_socket(), operation2->get_input_socket(0));
-
-  /* Neighborhood Blending Pixel Shader (Third Pass) */
-  SMAANeighborhoodBlendingOperation *operation3 = new SMAANeighborhoodBlendingOperation();
-  converter.add_operation(operation3);
-
-  converter.map_input_socket(get_input_socket(0), operation3->get_input_socket(0));
-  converter.add_link(operation2->get_output_socket(), operation3->get_input_socket(1));
-  converter.map_output_socket(get_output_socket(0), operation3->get_output_socket());
+  converter.map_input_socket(get_input_socket(0), operation->get_input_socket(0));
+  converter.map_output_socket(get_output_socket(0), operation->get_output_socket());
 }
 
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/nodes/COM_CornerPinNode.cc b/source/blender/compositor/nodes/COM_CornerPinNode.cc
index 716f1e1bae6..66d9be04f3b 100644
--- a/source/blender/compositor/nodes/COM_CornerPinNode.cc
+++ b/source/blender/compositor/nodes/COM_CornerPinNode.cc
@@ -18,28 +18,13 @@ void CornerPinNode::convert_to_operations(NodeConverter &converter,
   PlaneCornerPinMaskOperation *plane_mask_operation = new PlaneCornerPinMaskOperation();
   converter.add_operation(plane_mask_operation);
 
-  SMAAEdgeDetectionOperation *smaa_edge_detection = new SMAAEdgeDetectionOperation();
-  converter.add_operation(smaa_edge_detection);
+  SMAAOperation *smaa_operation = new SMAAOperation();
+  converter.add_operation(smaa_operation);
 
   converter.add_link(plane_mask_operation->get_output_socket(),
-                     smaa_edge_detection->get_input_socket(0));
+                     smaa_operation->get_input_socket(0));
 
-  SMAABlendingWeightCalculationOperation *smaa_blending_weights =
-      new SMAABlendingWeightCalculationOperation();
-  converter.add_operation(smaa_blending_weights);
-
-  converter.add_link(smaa_edge_detection->get_output_socket(),
-                     smaa_blending_weights->get_input_socket(0));
-
-  SMAANeighborhoodBlendingOperation *smaa_neighborhood = new SMAANeighborhoodBlendingOperation();
-  converter.add_operation(smaa_neighborhood);
-
-  converter.add_link(plane_mask_operation->get_output_socket(),
-                     smaa_neighborhood->get_input_socket(0));
-  converter.add_link(smaa_blending_weights->get_output_socket(),
-                     smaa_neighborhood->get_input_socket(1));
-
-  converter.map_output_socket(this->get_output_socket(1), smaa_neighborhood->get_output_socket());
+  converter.map_output_socket(this->get_output_socket(1), smaa_operation->get_output_socket());
 
   PlaneCornerPinWarpImageOperation *warp_image_operation = new PlaneCornerPinWarpImageOperation();
   converter.add_operation(warp_image_operation);
@@ -62,7 +47,7 @@ void CornerPinNode::convert_to_operations(NodeConverter &converter,
   converter.add_operation(set_alpha_operation);
   converter.add_link(warp_image_operation->get_output_socket(),
                      set_alpha_operation->get_input_socket(0));
-  converter.add_link(smaa_neighborhood->get_output_socket(),
+  converter.add_link(smaa_operation->get_output_socket(),
                      set_alpha_operation->get_input_socket(1));
   converter.map_output_socket(this->get_output_socket(0),
                               set_alpha_operation->get_output_socket());
diff --git a/source/blender/compositor/nodes/COM_DilateErodeNode.cc b/source/blender/compositor/nodes/COM_DilateErodeNode.cc
index f087b42e507..731fdf89aaa 100644
--- a/source/blender/compositor/nodes/COM_DilateErodeNode.cc
+++ b/source/blender/compositor/nodes/COM_DilateErodeNode.cc
@@ -37,26 +37,10 @@ void DilateErodeNode::convert_to_operations(NodeConverter &converter,
     converter.map_input_socket(get_input_socket(0), operation->get_input_socket(0));
 
     if (editor_node->custom3 < 2.0f) {
-      SMAAEdgeDetectionOperation *smaa_edge_detection = new SMAAEdgeDetectionOperation();
-      converter.add_operation(smaa_edge_detection);
-
-      converter.add_link(operation->get_output_socket(), smaa_edge_detection->get_input_socket(0));
-
-      SMAABlendingWeightCalculationOperation *smaa_blending_weights =
-          new SMAABlendingWeightCalculationOperation();
-      converter.add_operation(smaa_blending_weights);
-
-      converter.add_link(smaa_edge_detection->get_output_socket(),
-                         smaa_blending_weights->get_input_socket(0));
-
-      SMAANeighborhoodBlendingOperation *smaa_neighborhood =
-          new SMAANeighborhoodBlendingOperation();
-      converter.add_operation(smaa_neighborhood);
-
-      converter.add_link(operation->get_output_socket(), smaa_neighborhood->get_input_socket(0));
-      converter.add_link(smaa_blending_weights->get_output_socket(),
-                         smaa_neighborhood->get_input_socket(1));
-      converter.map_output_socket(get_output_socket(0), smaa_neighborhood->get_output_socket());
+      SMAAOperation *smaa_operation = new SMAAOperation();
+      converter.add_operation(smaa_operation);
+      converter.add_link(operation->get_output_socket(), smaa_operation->get_input_socket(0));
+      converter.map_output_socket(get_output_socket(0), smaa_operation->get_output_socket());
     }
     else {
       converter.map_output_socket(get_output_socket(0), operation->get_output_socket(0));
diff --git a/source/blender/compositor/nodes/COM_IDMaskNode.cc b/source/blender/compositor/nodes/COM_IDMaskNode.cc
index a0b712889fe..98a0dc638e9 100644
--- a/source/blender/compositor/nodes/COM_IDMaskNode.cc
+++ b/source/blender/compositor/nodes/COM_IDMaskNode.cc
@@ -27,27 +27,10 @@ void IDMaskNode::convert_to_operations(NodeConverter &converter,
     converter.map_output_socket(get_output_socket(0), operation->get_output_socket(0));
   }
   else {
-    SMAAEdgeDetectionOperation *operation1 = nullptr;
-
-    operation1 = new SMAAEdgeDetectionOperation();
-    converter.add_operation(operation1);
-
-    converter.add_link(operation->get_output_socket(0), operation1->get_input_socket(0));
-
-    /* Blending Weight Calculation Pixel Shader (Second Pass). */
-    SMAABlendingWeightCalculationOperation *operation2 =
-        new SMAABlendingWeightCalculationOperation();
-    converter.add_operation(operation2);
-
-    converter.add_link(operation1->get_output_socket(), operation2->get_input_socket(0));
-
-    /* Neighborhood Blending Pixel Shader (Third Pass). */
-    SMAANeighborhoodBlendingOperation *operation3 = new SMAANeighborhoodBlendingOperation();
-    converter.add_operation(operation3);
-
-    converter.add_link(operation->get_output_socket(0), operation3->get_input_socket(0));
-    converter.add_link(operation2->get_output_socket(), operation3->get_input_socket(1));
-    converter.map_output_socket(get_output_socket(0), operation3->get_output_socket());
+    SMAAOperation *smaa_operation = new SMAAOperation();
+    converter.add_operation(smaa_operation);
+    converter.add_link(operation->get_output_socket(0), smaa_operation->get_input_socket(0));
+    converter.map_output_socket(get_output_socket(0), smaa_operation->get_output_socket());
   }
 }
 
diff --git a/source/blender/compositor/nodes/COM_PlaneTrackDeformNode.cc b/source/blender/compositor/nodes/COM_PlaneTrackDeformNode.cc
index 21f3d26202b..8a02e95a855 100644
--- a/source/blender/compositor/nodes/COM_PlaneTrackDeformNode.cc
+++ b/source/blender/compositor/nodes/COM_PlaneTrackDeformNode.cc
@@ -35,28 +35,13 @@ void PlaneTrackDeformNode::convert_to_operations(NodeConverter &converter,
   }
   converter.add_operation(plane_mask_operation);
 
-  SMAAEdgeDetectionOperation *smaa_edge_detection = new SMAAEdgeDetectionOperation();
-  converter.add_operation(smaa_edge_detection);
+  SMAAOperation *smaa_operation = new SMAAOperation();
+  converter.add_operation(smaa_operation);
 
   converter.add_link(plane_mask_operation->get_output_socket(),
-                     smaa_edge_detection->get_input_socket(0));
+                     smaa_operation->get_input_socket(0));
 
-  SMAABlendingWeightCalculationOperation *smaa_blending_weights =
-      new SMAABlendingWeightCalculationOperation();
-  converter.add_operation(smaa_blending_weights);
-
-  converter.add_link(smaa_edge_detection->get_output_socket(),
-                     smaa_blending_weights->get_input_socket(0));
-
-  SMAANeighborhoodBlendingOperation *smaa_neighborhood = new SMAANeighborhoodBlendingOperation();
-  converter.add_operation(smaa_neighborhood);
-
-  converter.add_link(plane_mask_operation->get_output_socket(),
-                     smaa_neighborhood->get_input_socket(0));
-  converter.add_link(smaa_blending_weights->get_output_socket(),
-                     smaa_neighborhood->get_input_socket(1));
-
-  converter.map_output_socket(this->get_output_socket(1), smaa_neighborhood->get_output_socket());
+  converter.map_output_socket(this->get_output_socket(1), smaa_operation->get_output_socket());
 
   PlaneTrackWarpImageOperation *warp_image_operation = new PlaneTrackWarpImageOperation();
   warp_image_operation->set_movie_clip(clip);
@@ -75,7 +60,7 @@ void PlaneTrackDeformNode::convert_to_operations(NodeConverter &converter,
   converter.add_operation(set_alpha_operation);
   converter.add_link(warp_image_operation->get_output_socket(),
                      set_alpha_operation->get_input_socket(0));
-  converter.add_link(smaa_neighborhood->get_output_socket(),
+  converter.add_link(smaa_operation->get_output_socket(),
                      set_alpha_operation->get_input_socket(1));
   converter.map_output_socket(this->get_output_socket(0),
                               set_alpha_operation->get_output_socket());
diff --git a/source/blender/compositor/nodes/COM_ZCombineNode.cc b/source/blender/compositor/nodes/COM_ZCombineNode.cc
index f8dd36d1db3..ed79727b7cd 100644
--- a/source/blender/compositor/nodes/COM_ZCombineNode.cc
+++ b/source/blender/compositor/nodes/COM_ZCombineNode.cc
@@ -54,25 +54,10 @@ void ZCombineNode::convert_to_operations(NodeConverter &converter,
     converter.map_input_socket(get_input_socket(3), maskoperation->get_input_socket(1));
 
     /* Step 2 anti alias mask bit of an expensive operation, but does the trick. */
-    SMAAEdgeDetectionOperation *smaa_edge_detection = new SMAAEdgeDetectionOperation();
-    converter.add_operation(smaa_edge_detection);
+    SMAAOperation *smaa_operation = new SMAAOperation();
+    converter.add_operation(smaa_operation);
 
-    converter.add_link(maskoperation->get_output_socket(),
-                       smaa_edge_detection->get_input_socket(0));
-
-    SMAABlendingWeightCalculationOperation *smaa_blending_weights =
-        new SMAABlendingWeightCalculationOperation();
-    converter.add_operation(smaa_blending_weights);
-
-    converter.add_link(smaa_edge_detection->get_output_socket(),
-                       smaa_blending_weights->get_input_socket(0));
-
-    SMAANeighborhoodBlendingOperation *smaa_neighborhood = new SMAANeighborhoodBlendingOperation();
-    converter.add_operation(smaa_neighborhood);
-
-    converter.add_link(maskoperation->get_output_socket(), smaa_neighborhood->get_input_socket(0));
-    converter.add_link(smaa_blending_weights->get_output_socket(),
-                       smaa_neighborhood->get_input_socket(1));
+    converter.add_link(maskoperation->get_output_socket(), smaa_operation->get_input_socket(0));
 
     /* use mask to blend between the input colors. */
     ZCombineMaskOperation *zcombineoperation = this->get_bnode()->custom1 ?
@@ -80,7 +65,7 @@ void ZCombineNode::convert_to_operations(NodeConverter &converter,
                                                    new ZCombineMaskOperation();
     converter.add_operation(zcombineoperation);
 
-    converter.add_link(smaa_neighborhood->get_output_socket(),
+    converter.add_link(smaa_operation->get_output_socket(),
                        zcombineoperation->get_input_socket(0));
     converter.map_input_socket(get_input_socket(0), zcombineoperation->get_input_socket(1));
     converter.map_input_socket(get_input_socket(2), zcombineoperation->get_input_socket(2));
diff --git a/source/blender/compositor/operations/COM_SMAAOperation.cc b/source/blender/compositor/operations/COM_SMAAOperation.cc
index f49a069a81e..bb96c20c94d 100644
--- a/source/blender/compositor/operations/COM_SMAAOperation.cc
+++ b/source/blender/compositor/operations/COM_SMAAOperation.cc
@@ -1,805 +1,1514 @@
-/* SPDX-FileCopyrightText: 2024 Blender Authors
+/* SPDX-FileCopyrightText: 2013 Jorge Jimenez <jorge@iryoku.com>
+ * SPDX-FileCopyrightText: 2013 Jose I. Echevarria <joseignacioechevarria@gmail.com>
+ * SPDX-FileCopyrightText: 2013 Belen Masia <bmasia@unizar.es>
+ * SPDX-FileCopyrightText: 2013 Fernando Navarro <fernandn@microsoft.com>
+ * SPDX-FileCopyrightText: 2013 Diego Gutierrez <diegog@unizar.es>
+ * SPDX-FileCopyrightText: 2019-2023 Blender Authors
  *
- * SPDX-License-Identifier: GPL-2.0-or-later */
+ * SPDX-License-Identifier: MIT AND GPL-2.0-or-later */
 
-#include "COM_SMAAOperation.h"
-#include "BKE_node.hh"
-#include "COM_SMAAAreaTexture.h"
+#include "BLI_math_vector.h"
+#include "BLI_math_vector.hh"
+#include "BLI_smaa_textures.h"
+#include "BLI_span.hh"
+#include "BLI_task.hh"
 
 #include "IMB_colormanagement.hh"
 
+#include "COM_MemoryBuffer.h"
+#include "COM_SMAAOperation.h"
+
+/**
+ *                  _______  ___  ___       ___           ___
+ *                 /       ||   \/   |     /   \         /   \
+ *                |   (---- |  \  /  |    /  ^  \       /  ^  \
+ *                 \   \    |  |\/|  |   /  /_\  \     /  /_\  \
+ *              ----)   |   |  |  |  |  /  _____  \   /  _____  \
+ *             |_______/    |__|  |__| /__/     \__\ /__/     \__\
+ *
+ *                               E N H A N C E D
+ *       S U B P I X E L   M O R P H O L O G I C A L   A N T I A L I A S I N G
+ *
+ *                         http://www.iryoku.com/smaa/
+ *
+ * Hi, welcome aboard!
+ *
+ * Here you'll find instructions to get the shader up and running as fast as
+ * possible.
+ *
+ * IMPORTANTE NOTICE: when updating, remember to update both this file and the
+ * precomputed textures! They may change from version to version.
+ *
+ * The shader has three passes, chained together as follows:
+ *
+ *                           |input|------------------�
+ *                              v                     |
+ *                    [ SMAA*EdgeDetection ]          |
+ *                              v                     |
+ *                          |edgesTex|                |
+ *                              v                     |
+ *              [ SMAABlendingWeightCalculation ]     |
+ *                              v                     |
+ *                          |blendTex|                |
+ *                              v                     |
+ *                [ SMAANeighborhoodBlending ] <------�
+ *                              v
+ *                           |output|
+ *
+ * Note that each [pass] has its own vertex and pixel shader. Remember to use
+ * oversized triangles instead of quads to avoid overshading along the
+ * diagonal.
+ *
+ * You've three edge detection methods to choose from: luma, color or depth.
+ * They represent different quality/performance and anti-aliasing/sharpness
+ * tradeoffs, so our recommendation is for you to choose the one that best
+ * suits your particular scenario:
+ *
+ * - Depth edge detection is usually the fastest but it may miss some edges.
+ *
+ * - Luma edge detection is usually more expensive than depth edge detection,
+ *   but catches visible edges that depth edge detection can miss.
+ *
+ * - Color edge detection is usually the most expensive one but catches
+ *   chroma-only edges.
+ *
+ * For quickstarters: just use luma edge detection.
+ *
+ * The general advice is to not rush the integration process and ensure each
+ * step is done correctly (don't try to integrate SMAA T2x with predicated edge
+ * detection from the start!). Ok then, let's go!
+ *
+ *  1. The first step is to create two RGBA temporal render targets for holding
+ *     |edgesTex| and |blendTex|.
+ *
+ *     In DX10 or DX11, you can use a RG render target for the edges texture.
+ *     In the case of NVIDIA GPUs, using RG render targets seems to actually be
+ *     slower.
+ *
+ *     On the Xbox 360, you can use the same render target for resolving both
+ *     |edgesTex| and |blendTex|, as they aren't needed simultaneously.
+ *
+ *  2. Both temporal render targets |edgesTex| and |blendTex| must be cleared
+ *     each frame. Do not forget to clear the alpha channel!
+ *
+ *  3. The next step is loading the two supporting precalculated textures,
+ *     'areaTex' and 'searchTex'. You'll find them in the 'Textures' folder as
+ *     C++ headers, and also as regular DDS files. They'll be needed for the
+ *     'SMAABlendingWeightCalculation' pass.
+ *
+ *     If you use the C++ headers, be sure to load them in the format specified
+ *     inside of them.
+ *
+ *     You can also compress 'areaTex' and 'searchTex' using BC5 and BC4
+ *     respectively, if you have that option in your content processor pipeline.
+ *     When compressing then, you get a non-perceptible quality decrease, and a
+ *     marginal performance increase.
+ *
+ *  4. All samplers must be set to linear filtering and clamp.
+ *
+ *     After you get the technique working, remember that 64-bit inputs have
+ *     half-rate linear filtering on GCN.
+ *
+ *     If SMAA is applied to 64-bit color buffers, switching to point filtering
+ *     when accessing them will increase the performance. Search for
+ *     'SMAASamplePoint' to see which textures may benefit from point
+ *     filtering, and where (which is basically the color input in the edge
+ *     detection and resolve passes).
+ *
+ *  5. All texture reads and buffer writes must be non-sRGB, with the exception
+ *     of the input read and the output write in
+ *     'SMAANeighborhoodBlending' (and only in this pass!). If sRGB reads in
+ *     this last pass are not possible, the technique will work anyway, but
+ *     will perform antialiasing in gamma space.
+ *
+ *     IMPORTANT: for best results the input read for the color/luma edge
+ *     detection should *NOT* be sRGB.
+ *
+ *  6. Before including SMAA.h you'll have to setup the render target metrics,
+ *     the target and any optional configuration defines. Optionally you can
+ *     use a preset.
+ *
+ *     You have the following targets available:
+ *         SMAA_HLSL_3
+ *         SMAA_HLSL_4
+ *         SMAA_HLSL_4_1
+ *         SMAA_GLSL_3 *
+ *         SMAA_GLSL_4 *
+ *
+ *         * (See SMAA_INCLUDE_VS and SMAA_INCLUDE_PS below).
+ *
+ *     And four presets:
+ *         SMAA_PRESET_LOW          (%60 of the quality)
+ *         SMAA_PRESET_MEDIUM       (%80 of the quality)
+ *         SMAA_PRESET_HIGH         (%95 of the quality)
+ *         SMAA_PRESET_ULTRA        (%99 of the quality)
+ *
+ *     For example:
+ *         #define SMAA_RT_METRICS float4(1.0 / 1280.0, 1.0 / 720.0, 1280.0, 720.0)
+ *         #define SMAA_HLSL_4
+ *         #define SMAA_PRESET_HIGH
+ *         #include "SMAA.h"
+ *
+ *     Note that SMAA_RT_METRICS doesn't need to be a macro, it can be a
+ *     uniform variable. The code is designed to minimize the impact of not
+ *     using a constant value, but it is still better to hardcode it.
+ *
+ *     Depending on how you encoded 'areaTex' and 'searchTex', you may have to
+ *     add (and customize) the following defines before including SMAA.h:
+ *          #define SMAA_AREATEX_SELECT(sample) sample.rg
+ *          #define SMAA_SEARCHTEX_SELECT(sample) sample.r
+ *
+ *     If your engine is already using porting macros, you can define
+ *     SMAA_CUSTOM_SL, and define the porting functions by yourself.
+ *
+ *  7. Then, you'll have to setup the passes as indicated in the scheme above.
+ *     You can take a look into SMAA.fx, to see how we did it for our demo.
+ *     Checkout the function wrappers, you may want to copy-paste them!
+ *
+ *  8. It's recommended to validate the produced |edgesTex| and |blendTex|.
+ *     You can use a screenshot from your engine to compare the |edgesTex|
+ *     and |blendTex| produced inside of the engine with the results obtained
+ *     with the reference demo.
+ *
+ *  9. After you get the last pass to work, it's time to optimize. You'll have
+ *     to initialize a stencil buffer in the first pass (discard is already in
+ *     the code), then mask execution by using it the second pass. The last
+ *     pass should be executed in all pixels.
+ *
+ *
+ * After this point you can choose to enable predicated thresholding,
+ * temporal supersampling and motion blur integration:
+ *
+ * a) If you want to use predicated thresholding, take a look into
+ *    SMAA_PREDICATION; you'll need to pass an extra texture in the edge
+ *    detection pass.
+ *
+ * b) If you want to enable temporal supersampling (SMAA T2x):
+ *
+ * 1. The first step is to render using subpixel jitters. I won't go into
+ *    detail, but it's as simple as moving each vertex position in the
+ *    vertex shader, you can check how we do it in our DX10 demo.
+ *
+ * 2. Then, you must setup the temporal resolve. You may want to take a look
+ *    into SMAAResolve for resolving 2x modes. After you get it working, you'll
+ *    probably see ghosting everywhere. But fear not, you can enable the
+ *    CryENGINE temporal reprojection by setting the SMAA_REPROJECTION macro.
+ *    Check out SMAA_DECODE_VELOCITY if your velocity buffer is encoded.
+ *
+ * 3. The next step is to apply SMAA to each subpixel jittered frame, just as
+ *    done for 1x.
+ *
+ * 4. At this point you should already have something usable, but for best
+ *    results the proper area textures must be set depending on current jitter.
+ *    For this, the parameter 'subsampleIndices' of
+ *    'SMAABlendingWeightCalculationPS' must be set as follows, for our T2x
+ *    mode:
+ *
+ *    @SUBSAMPLE_INDICES
+ *
+ *    | S# |  Camera Jitter   |  subsampleIndices    |
+ *    +----+------------------+---------------------+
+ *    |  0 |  ( 0.25, -0.25)  |  float4(1, 1, 1, 0)  |
+ *    |  1 |  (-0.25,  0.25)  |  float4(2, 2, 2, 0)  |
+ *
+ *    These jitter positions assume a bottom-to-top y axis. S# stands for the
+ *    sample number.
+ *
+ * More information about temporal supersampling here:
+ *    http://iryoku.com/aacourse/downloads/13-Anti-Aliasing-Methods-in-CryENGINE-3.pdf
+ *
+ * c) If you want to enable spatial multisampling (SMAA S2x):
+ *
+ * 1. The scene must be rendered using MSAA 2x. The MSAA 2x buffer must be
+ *    created with:
+ *      - DX10:     see below (*)
+ *      - DX10.1:   D3D10_STANDARD_MULTISAMPLE_PATTERN or
+ *      - DX11:     D3D11_STANDARD_MULTISAMPLE_PATTERN
+ *
+ *    This allows to ensure that the subsample order matches the table in
+ *    @SUBSAMPLE_INDICES.
+ *
+ *    (*) In the case of DX10, we refer the reader to:
+ *      - SMAA::detectMSAAOrder and
+ *      - SMAA::msaaReorder
+ *
+ *    These functions allow matching the standard multisample patterns by
+ *    detecting the subsample order for a specific GPU, and reordering
+ *    them appropriately.
+ *
+ * 2. A shader must be run to output each subsample into a separate buffer
+ *    (DX10 is required). You can use SMAASeparate for this purpose, or just do
+ *    it in an existing pass (for example, in the tone mapping pass, which has
+ *    the advantage of feeding tone mapped subsamples to SMAA, which will yield
+ *    better results).
+ *
+ * 3. The full SMAA 1x pipeline must be run for each separated buffer, storing
+ *    the results in the final buffer. The second run should alpha blend with
+ *    the existing final buffer using a blending factor of 0.5.
+ *    'subsampleIndices' must be adjusted as in the SMAA T2x case (see point
+ *    b).
+ *
+ * d) If you want to enable temporal supersampling on top of SMAA S2x
+ *    (which actually is SMAA 4x):
+ *
+ * 1. SMAA 4x consists on temporally jittering SMAA S2x, so the first step is
+ *    to calculate SMAA S2x for current frame. In this case, 'subsampleIndices'
+ *    must be set as follows:
+ *
+ *    | F# | S# |   Camera Jitter    |    Net Jitter     |   subsampleIndices   |
+ *    +----+----+--------------------+-------------------+----------------------+
+ *    |  0 |  0 |  ( 0.125,  0.125)  |  ( 0.375, -0.125) |  float4(5, 3, 1, 3)  |
+ *    |  0 |  1 |  ( 0.125,  0.125)  |  (-0.125,  0.375) |  float4(4, 6, 2, 3)  |
+ *    +----+----+--------------------+-------------------+----------------------+
+ *    |  1 |  2 |  (-0.125, -0.125)  |  ( 0.125, -0.375) |  float4(3, 5, 1, 4)  |
+ *    |  1 |  3 |  (-0.125, -0.125)  |  (-0.375,  0.125) |  float4(6, 4, 2, 4)  |
+ *
+ *    These jitter positions assume a bottom-to-top y axis. F# stands for the
+ *    frame number. S# stands for the sample number.
+ *
+ * 2. After calculating SMAA S2x for current frame (with the new subsample
+ *    indices), previous frame must be reprojected as in SMAA T2x mode (see
+ *    point b).
+ *
+ * e) If motion blur is used, you may want to do the edge detection pass
+ *    together with motion blur. This has two advantages:
+ *
+ * 1. Pixels under heavy motion can be omitted from the edge detection process.
+ *    For these pixels we can just store "no edge", as motion blur will take
+ *    care of them.
+ * 2. The center pixel tap is reused.
+ *
+ * Note that in this case depth testing should be used instead of stenciling,
+ * as we have to write all the pixels in the motion blur pass.
+ *
+ * That's it!
+ */
+
+/* ----------------------------------------------------------------------------
+ * Blender's Defines */
+
+#define SMAA_CUSTOM_SL
+#define SMAA_AREATEX_SELECT(sample) sample.xy()
+#define SMAA_SEARCHTEX_SELECT(sample) sample.x
+#define SMAATexture2D(tex) const MemoryBuffer *tex
+#define SMAATexturePass2D(tex) tex
+#define SMAASampleLevelZero(tex, coord) tex->texture_bilinear_extend(coord)
+#define SMAASampleLevelZeroPoint(tex, coord) tex->texture_bilinear_extend(coord)
+#define SMAASampleLevelZeroOffset(tex, coord, offset, size) \
+  tex->texture_bilinear_extend(coord + float2(offset) / float2(size))
+#define SMAASample(tex, coord) tex->texture_bilinear_extend(coord)
+#define SMAASamplePoint(tex, coord) tex->texture_nearest_extend(coord)
+#define SMAASamplePointOffset(tex, coord, offset, size) \
+  tex->texture_nearest_extend(coord + float2(offset) / float2(size))
+#define SMAASampleOffset(tex, coord, offset, size) \
+  tex->texture_bilinear_extend(coord + float2(offset) / float2(size))
+#define SMAA_FLATTEN
+#define SMAA_BRANCH
+#define lerp(a, b, t) math::interpolate(a, b, t)
+#define saturate(a) math::clamp(a, 0.0f, 1.0f)
+#define mad(a, b, c) (a * b + c)
+
+/* ----------------------------------------------------------------------------
+ * SMAA Presets */
+
+/**
+ * Note that if you use one of these presets, the following configuration
+ * macros will be ignored if set in the "Configurable Defines" section.
+ */
+
+#if defined(SMAA_PRESET_LOW)
+#  define SMAA_THRESHOLD 0.15f
+#  define SMAA_MAX_SEARCH_STEPS 4
+#  define SMAA_DISABLE_DIAG_DETECTION
+#  define SMAA_DISABLE_CORNER_DETECTION
+#elif defined(SMAA_PRESET_MEDIUM)
+#  define SMAA_THRESHOLD 0.1f
+#  define SMAA_MAX_SEARCH_STEPS 8
+#  define SMAA_DISABLE_DIAG_DETECTION
+#  define SMAA_DISABLE_CORNER_DETECTION
+#elif defined(SMAA_PRESET_HIGH)
+#  define SMAA_THRESHOLD 0.1f
+#  define SMAA_MAX_SEARCH_STEPS 16
+#  define SMAA_MAX_SEARCH_STEPS_DIAG 8
+#  define SMAA_CORNER_ROUNDING 25
+#elif defined(SMAA_PRESET_ULTRA)
+#  define SMAA_THRESHOLD 0.05f
+#  define SMAA_MAX_SEARCH_STEPS 32
+#  define SMAA_MAX_SEARCH_STEPS_DIAG 16
+#  define SMAA_CORNER_ROUNDING 25
+#endif
+
+/* ----------------------------------------------------------------------------
+ * Configurable Defines */
+
+/**
+ * SMAA_THRESHOLD specifies the threshold or sensitivity to edges.
+ * Lowering this value you will be able to detect more edges at the expense of
+ * performance.
+ *
+ * Range: [0, 0.5]
+ *   0.1 is a reasonable value, and allows to catch most visible edges.
+ *   0.05 is a rather overkill value, that allows to catch 'em all.
+ *
+ *   If temporal supersampling is used, 0.2 could be a reasonable value, as low
+ *   contrast edges are properly filtered by just 2x.
+ */
+#ifndef SMAA_THRESHOLD
+#  define SMAA_THRESHOLD 0.1f
+#endif
+
+/**
+ * SMAA_DEPTH_THRESHOLD specifies the threshold for depth edge detection.
+ *
+ * Range: depends on the depth range of the scene.
+ */
+#ifndef SMAA_DEPTH_THRESHOLD
+#  define SMAA_DEPTH_THRESHOLD (0.1f * SMAA_THRESHOLD)
+#endif
+
+/**
+ * SMAA_MAX_SEARCH_STEPS specifies the maximum steps performed in the
+ * horizontal/vertical pattern searches, at each side of the pixel.
+ *
+ * In number of pixels, it's actually the double. So the maximum line length
+ * perfectly handled by, for example 16, is 64 (by perfectly, we meant that
+ * longer lines won't look as good, but still antialiased).
+ *
+ * Range: [0, 112]
+ */
+#ifndef SMAA_MAX_SEARCH_STEPS
+#  define SMAA_MAX_SEARCH_STEPS 16
+#endif
+
+/**
+ * SMAA_MAX_SEARCH_STEPS_DIAG specifies the maximum steps performed in the
+ * diagonal pattern searches, at each side of the pixel. In this case we jump
+ * one pixel at time, instead of two.
+ *
+ * Range: [0, 20]
+ *
+ * On high-end machines it is cheap (between a 0.8x and 0.9x slower for 16
+ * steps), but it can have a significant impact on older machines.
+ *
+ * Define SMAA_DISABLE_DIAG_DETECTION to disable diagonal processing.
+ */
+#ifndef SMAA_MAX_SEARCH_STEPS_DIAG
+#  define SMAA_MAX_SEARCH_STEPS_DIAG 8
+#endif
+
+/**
+ * SMAA_CORNER_ROUNDING specifies how much sharp corners will be rounded.
+ *
+ * Range: [0, 100]
+ *
+ * Define SMAA_DISABLE_CORNER_DETECTION to disable corner processing.
+ */
+#ifndef SMAA_CORNER_ROUNDING
+#  define SMAA_CORNER_ROUNDING 25
+#endif
+
+/**
+ * If there is an neighbor edge that has SMAA_LOCAL_CONTRAST_FACTOR times
+ * bigger contrast than current edge, current edge will be discarded.
+ *
+ * This allows to eliminate spurious crossing edges, and is based on the fact
+ * that, if there is too much contrast in a direction, that will hide
+ * perceptually contrast in the other neighbors.
+ */
+#ifndef SMAA_LOCAL_CONTRAST_ADAPTATION_FACTOR
+#  define SMAA_LOCAL_CONTRAST_ADAPTATION_FACTOR 2.0f
+#endif
+
+/**
+ * Predicated thresholding allows to better preserve texture details and to
+ * improve performance, by decreasing the number of detected edges using an
+ * additional buffer like the light accumulation buffer, object ids or even the
+ * depth buffer (the depth buffer usage may be limited to indoor or short range
+ * scenes).
+ *
+ * It locally decreases the luma or color threshold if an edge is found in an
+ * additional buffer (so the global threshold can be higher).
+ *
+ * This method was developed by Playstation EDGE MLAA team, and used in
+ * Killzone 3, by using the light accumulation buffer. More information here:
+ *     http://iryoku.com/aacourse/downloads/06-MLAA-on-PS3.pptx
+ */
+#ifndef SMAA_PREDICATION
+#  define SMAA_PREDICATION 0
+#endif
+
+/**
+ * Threshold to be used in the additional predication buffer.
+ *
+ * Range: depends on the input, so you'll have to find the magic number that
+ * works for you.
+ */
+#ifndef SMAA_PREDICATION_THRESHOLD
+#  define SMAA_PREDICATION_THRESHOLD 0.01f
+#endif
+
+/**
+ * How much to scale the global threshold used for luma or color edge
+ * detection when using predication.
+ *
+ * Range: [1, 5]
+ */
+#ifndef SMAA_PREDICATION_SCALE
+#  define SMAA_PREDICATION_SCALE 2.0f
+#endif
+
+/**
+ * How much to locally decrease the threshold.
+ *
+ * Range: [0, 1]
+ */
+#ifndef SMAA_PREDICATION_STRENGTH
+#  define SMAA_PREDICATION_STRENGTH 0.4f
+#endif
+
+/**
+ * Temporal reprojection allows to remove ghosting artifacts when using
+ * temporal supersampling. We use the CryEngine 3 method which also introduces
+ * velocity weighting. This feature is of extreme importance for totally
+ * removing ghosting. More information here:
+ *    http://iryoku.com/aacourse/downloads/13-Anti-Aliasing-Methods-in-CryENGINE-3.pdf
+ *
+ * Note that you'll need to setup a velocity buffer for enabling reprojection.
+ * For static geometry, saving the previous depth buffer is a viable
+ * alternative.
+ */
+#ifndef SMAA_REPROJECTION
+#  define SMAA_REPROJECTION 0
+#endif
+
+/**
+ * SMAA_REPROJECTION_WEIGHT_SCALE controls the velocity weighting. It allows to
+ * remove ghosting trails behind the moving object, which are not removed by
+ * just using reprojection. Using low values will exhibit ghosting, while using
+ * high values will disable temporal supersampling under motion.
+ *
+ * Behind the scenes, velocity weighting removes temporal supersampling when
+ * the velocity of the subsamples differs (meaning they are different objects).
+ *
+ * Range: [0, 80]
+ */
+#ifndef SMAA_REPROJECTION_WEIGHT_SCALE
+#  define SMAA_REPROJECTION_WEIGHT_SCALE 30.0f
+#endif
+
+/**
+ * On some compilers, discard cannot be used in vertex shaders. Thus, they need
+ * to be compiled separately.
+ */
+#ifndef SMAA_INCLUDE_VS
+#  define SMAA_INCLUDE_VS 1
+#endif
+#ifndef SMAA_INCLUDE_PS
+#  define SMAA_INCLUDE_PS 1
+#endif
+
+/* ----------------------------------------------------------------------------
+ * Texture Access Defines */
+
+#ifndef SMAA_AREATEX_SELECT
+#  if defined(SMAA_HLSL_3)
+#    define SMAA_AREATEX_SELECT(sample) sample.ra
+#  else
+#    define SMAA_AREATEX_SELECT(sample) sample.rg
+#  endif
+#endif
+
+#ifndef SMAA_SEARCHTEX_SELECT
+#  define SMAA_SEARCHTEX_SELECT(sample) sample.r
+#endif
+
+#ifndef SMAA_DECODE_VELOCITY
+#  define SMAA_DECODE_VELOCITY(sample) sample.rg
+#endif
+
+/* ----------------------------------------------------------------------------
+ * Non-Configurable Defines */
+
+#define SMAA_AREATEX_MAX_DISTANCE 16
+#define SMAA_AREATEX_MAX_DISTANCE_DIAG 20
+#define SMAA_AREATEX_PIXEL_SIZE (1.0f / float2(160.0f, 560.0f))
+#define SMAA_AREATEX_SUBTEX_SIZE (1.0f / 7.0f)
+#define SMAA_SEARCHTEX_SIZE float2(66.0f, 33.0f)
+#define SMAA_SEARCHTEX_PACKED_SIZE float2(64.0f, 16.0f)
+#define SMAA_CORNER_ROUNDING_NORM (float(SMAA_CORNER_ROUNDING) / 100.0f)
+
+/* ----------------------------------------------------------------------------
+ * Porting Functions */
+
+#if defined(SMAA_HLSL_3)
+#  define SMAATexture2D(tex) sampler2D tex
+#  define SMAATexturePass2D(tex) tex
+#  define SMAASampleLevelZero(tex, coord) tex2Dlod(tex, float4(coord, 0.0, 0.0))
+#  define SMAASampleLevelZeroPoint(tex, coord) tex2Dlod(tex, float4(coord, 0.0, 0.0))
+/* clang-format off */
+#  define SMAASampleLevelZeroOffset(tex, coord, offset) tex2Dlod(tex, float4(coord + offset * SMAA_RT_METRICS.xy, 0.0, 0.0))
+/* clang-format on */
+#  define SMAASample(tex, coord) tex2D(tex, coord)
+#  define SMAASamplePoint(tex, coord) tex2D(tex, coord)
+#  define SMAASampleOffset(tex, coord, offset) tex2D(tex, coord + offset * SMAA_RT_METRICS.xy)
+#  define SMAA_FLATTEN [flatten]
+#  define SMAA_BRANCH [branch]
+#endif
+#if defined(SMAA_HLSL_4) || defined(SMAA_HLSL_4_1)
+SamplerState LinearSampler
+{
+  Filter = MIN_MAG_LINEAR_MIP_POINT;
+  AddressU = Clamp;
+  AddressV = Clamp;
+};
+SamplerState PointSampler
+{
+  Filter = MIN_MAG_MIP_POINT;
+  AddressU = Clamp;
+  AddressV = Clamp;
+};
+#  define SMAATexture2D(tex) Texture2D tex
+#  define SMAATexturePass2D(tex) tex
+#  define SMAASampleLevelZero(tex, coord) tex.SampleLevel(LinearSampler, coord, 0)
+#  define SMAASampleLevelZeroPoint(tex, coord) tex.SampleLevel(PointSampler, coord, 0)
+/* clang-format off */
+#  define SMAASampleLevelZeroOffset(tex, coord, offset) tex.SampleLevel(LinearSampler, coord, 0, offset)
+/* clang-format on */
+#  define SMAASample(tex, coord) tex.Sample(LinearSampler, coord)
+#  define SMAASamplePoint(tex, coord) tex.Sample(PointSampler, coord)
+#  define SMAASampleOffset(tex, coord, offset) tex.Sample(LinearSampler, coord, offset)
+#  define SMAA_FLATTEN [flatten]
+#  define SMAA_BRANCH [branch]
+#  define SMAATexture2DMS2(tex) Texture2DMS<float4, 2> tex
+#  define SMAALoad(tex, pos, sample) tex.Load(pos, sample)
+#  if defined(SMAA_HLSL_4_1)
+#    define SMAAGather(tex, coord) tex.Gather(LinearSampler, coord, 0)
+#  endif
+#endif
+#if defined(SMAA_GLSL_3) || defined(SMAA_GLSL_4) || defined(GPU_METAL) || defined(GPU_VULKAN)
+#  define SMAATexture2D(tex) sampler2D tex
+#  define SMAATexturePass2D(tex) tex
+#  define SMAASampleLevelZero(tex, coord) textureLod(tex, coord, 0.0)
+#  define SMAASampleLevelZeroPoint(tex, coord) textureLod(tex, coord, 0.0)
+#  define SMAASampleLevelZeroOffset(tex, coord, offset) textureLodOffset(tex, coord, 0.0, offset)
+#  define SMAASample(tex, coord) texture(tex, coord)
+#  define SMAASamplePoint(tex, coord) texture(tex, coord)
+#  define SMAASampleOffset(tex, coord, offset) texture(tex, coord, offset)
+#  define SMAA_FLATTEN
+#  define SMAA_BRANCH
+#  define lerp(a, b, t) mix(a, b, t)
+#  define saturate(a) clamp(a, 0.0, 1.0)
+#  if defined(SMAA_GLSL_4)
+#    define SMAAGather(tex, coord) textureGather(tex, coord)
+#  endif
+#  if defined(SMAA_GLSL_4)
+#    define mad(a, b, c) fma(a, b, c)
+#  elif defined(GPU_VULKAN)
+/* NOTE(Vulkan) mad macro doesn't work, define each override as work-around. */
+vec4 mad(vec4 a, vec4 b, vec4 c)
+{
+  return fma(a, b, c);
+}
+vec3 mad(vec3 a, vec3 b, vec3 c)
+{
+  return fma(a, b, c);
+}
+vec2 mad(vec2 a, vec2 b, vec2 c)
+{
+  return fma(a, b, c);
+}
+float mad(float a, float b, float c)
+{
+  return fma(a, b, c);
+}
+#  else
+#    define mad(a, b, c) (a * b + c)
+#  endif
+/* NOTE(Metal): Types already natively declared in MSL. */
+#  ifndef GPU_METAL
+#    define float2 vec2
+#    define float3 vec3
+#    define float4 vec4
+#    define int2 ivec2
+#    define int3 ivec3
+#    define int4 ivec4
+#    define bool2 bvec2
+#    define bool3 bvec3
+#    define bool4 bvec4
+#  endif
+#endif
+
+/* clang-format off */
+#if !defined(SMAA_HLSL_3) && !defined(SMAA_HLSL_4) && !defined(SMAA_HLSL_4_1) && !defined(SMAA_GLSL_3) && !defined(SMAA_GLSL_4) && !defined(SMAA_CUSTOM_SL)
+#  error you must define the shading language: SMAA_HLSL_*, SMAA_GLSL_* or SMAA_CUSTOM_SL
+#endif
+/* clang-format on */
+
 namespace blender::compositor {
 
-/*
- * An implementation of Enhanced Sub-pixel Morphological Anti-aliasing (SMAA)
- *
- * The algorithm was proposed by:
- *   Jorge Jimenez, Jose I. Echevarria, Tiago Sousa, Diego Gutierrez
- *
- *   http://www.iryoku.com/smaa/
- *
- * This file is based on SMAA-CPP:
- *
- *   https://github.com/i_ri-E/smaa-cpp
- *
- * Currently only SMAA 1x mode is provided, so the operation will be done
- * with no spatial multi-sampling nor temporal super-sampling.
- *
- * NOTE: This program assumes the screen coordinates are DirectX style, so
- * the vertical direction is upside-down. "top" and "bottom" actually mean
- * bottom and top, respectively.
+/* ----------------------------------------------------------------------------
+ * Misc functions */
+
+/**
+ * Conditional move:
  */
-
-/*-----------------------------------------------------------------------------*/
-/* Non-Configurable Defines */
-
-#define SMAA_AREATEX_SIZE 80
-#define SMAA_AREATEX_MAX_DISTANCE 20
-#define SMAA_AREATEX_MAX_DISTANCE_DIAG 20
-#define SMAA_MAX_SEARCH_STEPS 362 /* 362 - 1 = 19^2 */
-#define SMAA_MAX_SEARCH_STEPS_DIAG 19
-
-/*-----------------------------------------------------------------------------*/
-/* Internal Functions to Sample Pixel Color from Image */
-
-static inline void sample(MemoryBuffer *reader, int x, int y, float color[4])
+static void SMAAMovc(float2 cond, float2 &variable, float2 value)
 {
-  reader->read_elem_checked(x, y, color);
+  /* Use select function (select(genType A, genType B, genBType cond)). */
+  variable = math::interpolate(variable, value, cond);
 }
 
-template<typename T>
-static void sample_bilinear_vertical(T *reader, int x, int y, float yoffset, float color[4])
+static void SMAAMovc(float4 cond, float4 &variable, float4 value)
 {
-  float iy = floorf(yoffset);
-  float fy = yoffset - iy;
-  y += int(iy);
-
-  float color00[4], color01[4];
-
-  sample(reader, x + 0, y + 0, color00);
-  sample(reader, x + 0, y + 1, color01);
-
-  color[0] = interpf(color01[0], color00[0], fy);
-  color[1] = interpf(color01[1], color00[1], fy);
-  color[2] = interpf(color01[2], color00[2], fy);
-  color[3] = interpf(color01[3], color00[3], fy);
+  /* Use select function (select(genType A, genType B, genBType cond)). */
+  variable = math::interpolate(variable, value, cond);
 }
 
-template<typename T>
-static void sample_bilinear_horizontal(T *reader, int x, int y, float xoffset, float color[4])
+#if SMAA_INCLUDE_VS
+/* ----------------------------------------------------------------------------
+ * Vertex Shaders */
+
+/**
+ * Edge Detection Vertex Shader
+ */
+static void SMAAEdgeDetectionVS(float2 texcoord, int2 size, float4 offset[3])
 {
-  float ix = floorf(xoffset);
-  float fx = xoffset - ix;
-  x += int(ix);
-
-  float color00[4], color10[4];
-
-  sample(reader, x + 0, y + 0, color00);
-  sample(reader, x + 1, y + 0, color10);
-
-  color[0] = interpf(color10[0], color00[0], fx);
-  color[1] = interpf(color10[1], color00[1], fx);
-  color[2] = interpf(color10[2], color00[2], fx);
-  color[3] = interpf(color10[3], color00[3], fx);
-}
-
-/*-----------------------------------------------------------------------------*/
-/* Internal Functions to Sample Blending Weights from AreaTex */
-
-static inline const float *areatex_sample_internal(const float *areatex, int x, int y)
-{
-  return &areatex[(std::clamp(x, 0, SMAA_AREATEX_SIZE - 1) +
-                   std::clamp(y, 0, SMAA_AREATEX_SIZE - 1) * SMAA_AREATEX_SIZE) *
-                  2];
+  offset[0] = float4(texcoord.xy(), texcoord.xy()) +
+              float4(-1.0f, 0.0f, 0.0f, -1.0f) / float4(size, size);
+  offset[1] = float4(texcoord.xy(), texcoord.xy()) +
+              float4(1.0f, 0.0f, 0.0f, 1.0f) / float4(size, size);
+  offset[2] = float4(texcoord.xy(), texcoord.xy()) +
+              float4(-2.0f, 0.0f, 0.0f, -2.0f) / float4(size, size);
 }
 
 /**
- * We have the distance and both crossing edges. So, what are the areas
- * at each side of current edge?
+ * Blend Weight Calculation Vertex Shader
  */
-static void area(int d1, int d2, int e1, int e2, float weights[2])
+static void SMAABlendingWeightCalculationVS(float2 texcoord,
+                                            int2 size,
+                                            float2 &pixcoord,
+                                            float4 offset[3])
 {
-  /* The areas texture is compressed  quadratically: */
-  float x = float(SMAA_AREATEX_MAX_DISTANCE * e1) + sqrtf(float(d1));
-  float y = float(SMAA_AREATEX_MAX_DISTANCE * e2) + sqrtf(float(d2));
+  pixcoord = texcoord * float2(size);
 
-  float ix = floorf(x), iy = floorf(y);
-  float fx = x - ix, fy = y - iy;
-  int X = int(ix), Y = int(iy);
+  // We will use these offsets for the searches later on (see @PSEUDO_GATHER4):
+  offset[0] = float4(texcoord.xy(), texcoord.xy()) +
+              float4(-0.25f, -0.125f, 1.25f, -0.125f) / float4(size, size);
+  offset[1] = float4(texcoord.xy(), texcoord.xy()) +
+              float4(-0.125f, -0.25f, -0.125f, 1.25f) / float4(size, size);
 
-  const float *weights00 = areatex_sample_internal(areatex, X + 0, Y + 0);
-  const float *weights10 = areatex_sample_internal(areatex, X + 1, Y + 0);
-  const float *weights01 = areatex_sample_internal(areatex, X + 0, Y + 1);
-  const float *weights11 = areatex_sample_internal(areatex, X + 1, Y + 1);
-
-  weights[0] = interpf(
-      interpf(weights11[0], weights01[0], fx), interpf(weights10[0], weights00[0], fx), fy);
-  weights[1] = interpf(
-      interpf(weights11[1], weights01[1], fx), interpf(weights10[1], weights00[1], fx), fy);
+  // And these for the searches, they indicate the ends of the loops:
+  offset[2] = float4(offset[0].x, offset[0].z, offset[1].y, offset[1].w) +
+              (float4(-2.0f, 2.0f, -2.0f, 2.0f) * float(SMAA_MAX_SEARCH_STEPS)) /
+                  float4(float2(size.x), float2(size.y));
 }
 
 /**
- * Similar to area(), this calculates the area corresponding to a certain
+ * Neighborhood Blending Vertex Shader
+ */
+static void SMAANeighborhoodBlendingVS(float2 texcoord, int2 size, float4 &offset)
+{
+  offset = float4(texcoord, texcoord) + float4(1.0f, 0.0f, 0.0f, 1.0f) / float4(size, size);
+}
+#endif  // SMAA_INCLUDE_VS
+
+/**
+ * Luma Edge Detection
+ *
+ * IMPORTANT NOTICE: luma edge detection requires gamma-corrected colors, and
+ * thus 'colorTex' should be a non-sRGB texture.
+ */
+static float2 SMAALumaEdgeDetectionPS(float2 texcoord,
+                                      float4 offset[3],
+                                      SMAATexture2D(colorTex),
+#if SMAA_PREDICATION
+                                      SMAATexture2D(predicationTex),
+#endif
+                                      float edge_threshold,
+                                      float3 luminance_coefficients,
+                                      float local_contrast_adaptation_factor)
+{
+#if SMAA_PREDICATION
+  float2 threshold = SMAACalculatePredicatedThreshold(
+      texcoord, offset, SMAATexturePass2D(predicationTex));
+#else
+  // Calculate the threshold:
+  float2 threshold = float2(edge_threshold, edge_threshold);
+#endif
+
+  // Calculate lumas:
+  // float4 weights = float4(0.2126, 0.7152, 0.0722, 0.0);
+  float4 weights = float4(luminance_coefficients, 0.0f);
+  float L = math::dot(SMAASamplePoint(colorTex, texcoord), weights);
+
+  float Lleft = math::dot(SMAASamplePoint(colorTex, offset[0].xy()), weights);
+  float Ltop = math::dot(SMAASamplePoint(colorTex, offset[0].zw()), weights);
+
+  // We do the usual threshold:
+  float4 delta;
+  float2 delta_left_top = math::abs(L - float2(Lleft, Ltop));
+  delta.x = delta_left_top.x;
+  delta.y = delta_left_top.y;
+  float2 edges = math::step(threshold, delta.xy());
+
+  // Then return early if there is no edge:
+  if (math::dot(edges, float2(1.0f, 1.0f)) == 0.0f) {
+    return float2(0.0f);
+  }
+
+  // Calculate right and bottom deltas:
+  float Lright = math::dot(SMAASamplePoint(colorTex, offset[1].xy()), weights);
+  float Lbottom = math::dot(SMAASamplePoint(colorTex, offset[1].zw()), weights);
+  float2 delta_right_bottom = math::abs(L - float2(Lright, Lbottom));
+  delta.z = delta_right_bottom.x;
+  delta.w = delta_right_bottom.y;
+
+  // Calculate the maximum delta in the direct neighborhood:
+  float2 maxDelta = math::max(delta.xy(), delta.zw());
+
+  // Calculate left-left and top-top deltas:
+  float Lleftleft = math::dot(SMAASamplePoint(colorTex, offset[2].xy()), weights);
+  float Ltoptop = math::dot(SMAASamplePoint(colorTex, offset[2].zw()), weights);
+  float2 delta_left_left_top_top = math::abs(float2(Lleft, Ltop) - float2(Lleftleft, Ltoptop));
+  delta.z = delta_left_left_top_top.x;
+  delta.w = delta_left_left_top_top.y;
+
+  // Calculate the final maximum delta:
+  maxDelta = math::max(maxDelta.xy(), delta.zw());
+  float finalDelta = math::max(maxDelta.x, maxDelta.y);
+
+  // Local contrast adaptation:
+  edges *= math::step(finalDelta, local_contrast_adaptation_factor * delta.xy());
+
+  return edges;
+}
+
+/* ----------------------------------------------------------------------------
+ * Diagonal Search Functions */
+
+#if !defined(SMAA_DISABLE_DIAG_DETECTION)
+
+/**
+ * Allows to decode two binary values from a bilinear-filtered access.
+ */
+static float2 SMAADecodeDiagBilinearAccess(float2 e)
+{
+  // Bilinear access for fetching 'e' have a 0.25 offset, and we are
+  // interested in the R and G edges:
+  //
+  // +---G---+-------+
+  // |   x o R   x   |
+  // +-------+-------+
+  //
+  // Then, if one of these edge is enabled:
+  //   Red:   (0.75 * X + 0.25 * 1) => 0.25 or 1.0
+  //   Green: (0.75 * 1 + 0.25 * X) => 0.75 or 1.0
+  //
+  // This function will unpack the values (mad + mul + round):
+  // wolframalpha.com: round(x * abs(5 * x - 5 * 0.75)) plot 0 to 1
+  e.x = e.x * math::abs(5.0f * e.x - 5.0f * 0.75f);
+  return math::round(e);
+}
+
+static float4 SMAADecodeDiagBilinearAccess(float4 e)
+{
+  e.x = e.x * math::abs(5.0f * e.x - 5.0f * 0.75f);
+  e.z = e.z * math::abs(5.0f * e.z - 5.0f * 0.75f);
+  return math::round(e);
+}
+
+/**
+ * These functions allows to perform diagonal pattern searches.
+ */
+static float2 SMAASearchDiag1(
+    SMAATexture2D(edgesTex), float2 texcoord, float2 dir, int2 size, float2 &e)
+{
+  float4 coord = float4(texcoord, -1.0f, 1.0f);
+  float3 t = float3(1.0f / float2(size), 1.0f);
+  while (coord.z < float(SMAA_MAX_SEARCH_STEPS_DIAG - 1) && coord.w > 0.9f) {
+    float3 increment = mad(t, float3(dir, 1.0f), coord.xyz());
+    coord.x = increment.x;
+    coord.y = increment.y;
+    coord.z = increment.z;
+    e = SMAASamplePoint(edgesTex, coord.xy()).xy();
+    coord.w = math::dot(e, float2(0.5f, 0.5f));
+  }
+  return coord.zw();
+}
+
+static float2 SMAASearchDiag2(
+    SMAATexture2D(edgesTex), float2 texcoord, float2 dir, int2 size, float2 &e)
+{
+  float4 coord = float4(texcoord, -1.0f, 1.0f);
+  coord.x += 0.25f / size.x;  // See @SearchDiag2Optimization
+  float3 t = float3(1.0f / float2(size), 1.0f);
+  while (coord.z < float(SMAA_MAX_SEARCH_STEPS_DIAG - 1) && coord.w > 0.9f) {
+    float3 increment = mad(t, float3(dir, 1.0f), coord.xyz());
+    coord.x = increment.x;
+    coord.y = increment.y;
+    coord.z = increment.z;
+
+    // @SearchDiag2Optimization
+    // Fetch both edges at once using bilinear filtering:
+    e = SMAASampleLevelZero(edgesTex, coord.xy()).xy();
+    e = SMAADecodeDiagBilinearAccess(e);
+
+    // Non-optimized version:
+    // e.g = SMAASampleLevelZero(edgesTex, coord.xy).g;
+    // e.r = SMAASampleLevelZeroOffset(edgesTex, coord.xy, int2(1, 0), size).r;
+
+    coord.w = math::dot(e, float2(0.5f, 0.5f));
+  }
+  return coord.zw();
+}
+
+/**
+ * Similar to SMAAArea, this calculates the area corresponding to a certain
  * diagonal distance and crossing edges 'e'.
  */
-static void area_diag(int d1, int d2, int e1, int e2, float weights[2])
+static float2 SMAAAreaDiag(SMAATexture2D(areaTex), float2 dist, float2 e, float offset)
 {
-  int x = SMAA_AREATEX_MAX_DISTANCE_DIAG * e1 + d1;
-  int y = SMAA_AREATEX_MAX_DISTANCE_DIAG * e2 + d2;
+  float2 texcoord = mad(
+      float2(SMAA_AREATEX_MAX_DISTANCE_DIAG, SMAA_AREATEX_MAX_DISTANCE_DIAG), e, dist);
 
-  const float *w = areatex_sample_internal(areatex_diag, x, y);
-  copy_v2_v2(weights, w);
+  // We do a scale and bias for mapping to texel space:
+  texcoord = mad(SMAA_AREATEX_PIXEL_SIZE, texcoord, 0.5f * SMAA_AREATEX_PIXEL_SIZE);
+
+  // Diagonal areas are on the second half of the texture:
+  texcoord.x += 0.5f;
+
+  // Move to proper place, according to the subpixel offset:
+  texcoord.y += SMAA_AREATEX_SUBTEX_SIZE * offset;
+
+  // Do it!
+  return SMAA_AREATEX_SELECT(SMAASampleLevelZero(areaTex, texcoord));
 }
 
-/*-----------------------------------------------------------------------------*/
-/* Edge Detection (First Pass) */
-/*-----------------------------------------------------------------------------*/
-
-SMAAEdgeDetectionOperation::SMAAEdgeDetectionOperation()
+/**
+ * This searches for diagonal patterns and returns the corresponding weights.
+ */
+static float2 SMAACalculateDiagWeights(SMAATexture2D(edgesTex),
+                                       SMAATexture2D(areaTex),
+                                       float2 texcoord,
+                                       float2 e,
+                                       float4 subsampleIndices,
+                                       int2 size)
 {
-  this->add_input_socket(DataType::Color); /* image */
-  this->add_input_socket(DataType::Value); /* Depth, material ID, etc. TODO: currently unused. */
-  this->add_output_socket(DataType::Color);
-  flags_.can_be_constant = true;
-  this->set_threshold(CMP_DEFAULT_SMAA_THRESHOLD);
-  this->set_local_contrast_adaptation_factor(CMP_DEFAULT_SMAA_CONTRAST_LIMIT);
+  float2 weights = float2(0.0f, 0.0f);
+
+  // Search for the line ends:
+  float4 d;
+  float2 end;
+  if (e.x > 0.0f) {
+    float2 negative_diagonal = SMAASearchDiag1(
+        SMAATexturePass2D(edgesTex), texcoord, float2(-1.0f, 1.0f), size, end);
+    d.x = negative_diagonal.x;
+    d.z = negative_diagonal.y;
+    d.x += float(end.y > 0.9f);
+  }
+  else {
+    d.x = 0.0f;
+    d.z = 0.0f;
+  }
+  float2 positive_diagonal = SMAASearchDiag1(
+      SMAATexturePass2D(edgesTex), texcoord, float2(1.0, -1.0), size, end);
+  d.y = positive_diagonal.x;
+  d.w = positive_diagonal.y;
+
+  SMAA_BRANCH
+  if (d.x + d.y > 2.0f) {  // d.x + d.y + 1 > 3
+    // Fetch the crossing edges:
+    float4 coords = float4(texcoord, texcoord) +
+                    float4(-d.x + 0.25f, d.x, d.y, -d.y - 0.25f) / float4(size, size);
+    float4 c;
+    float2 left_edge = SMAASampleLevelZeroOffset(edgesTex, coords.xy(), int2(-1, 0), size).xy();
+    float2 right_edge = SMAASampleLevelZeroOffset(edgesTex, coords.zw(), int2(1, 0), size).xy();
+    c.x = left_edge.x;
+    c.y = left_edge.y;
+    c.z = right_edge.x;
+    c.w = right_edge.y;
+    float4 decoded_access = SMAADecodeDiagBilinearAccess(c);
+    c.y = decoded_access.x;
+    c.x = decoded_access.y;
+    c.w = decoded_access.z;
+    c.z = decoded_access.w;
+
+    // Non-optimized version:
+    // float4 coords = mad(float4(-d.x, d.x, d.y, -d.y), SMAA_RT_METRICS.xyxy, texcoord.xyxy);
+    // float4 c;
+    // c.x = SMAASampleLevelZeroOffset(edgesTex, coords.xy, int2(-1,  0), size).g;
+    // c.y = SMAASampleLevelZeroOffset(edgesTex, coords.xy, int2( 0,  0), size).r;
+    // c.z = SMAASampleLevelZeroOffset(edgesTex, coords.zw, int2( 1,  0), size).g;
+    // c.w = SMAASampleLevelZeroOffset(edgesTex, coords.zw, int2( 1, -1), size).r;
+
+    // Merge crossing edges at each side into a single value:
+    float2 cc = mad(float2(2.0f, 2.0f), float2(c.x, c.z), float2(c.y, c.w));
+
+    // Remove the crossing edge if we didn't found the end of the line:
+    SMAAMovc(math::step(0.9f, d.zw()), cc, float2(0.0f, 0.0f));
+
+    // Fetch the areas for this line:
+    weights += SMAAAreaDiag(SMAATexturePass2D(areaTex), d.xy(), cc, subsampleIndices.z);
+  }
+
+  // Search for the line ends:
+  float2 negative_diagonal = SMAASearchDiag2(
+      SMAATexturePass2D(edgesTex), texcoord, float2(-1.0f, -1.0f), size, end);
+  d.x = negative_diagonal.x;
+  d.z = negative_diagonal.y;
+  if (SMAASamplePointOffset(edgesTex, texcoord, int2(1, 0), size).x > 0.0f) {
+    float2 positive_diagonal = SMAASearchDiag2(
+        SMAATexturePass2D(edgesTex), texcoord, float2(1.0f, 1.0f), size, end);
+    d.y = positive_diagonal.x;
+    d.w = positive_diagonal.y;
+    d.y += float(end.y > 0.9f);
+  }
+  else {
+    d.y = 0.0f;
+    d.w = 0.0f;
+  }
+
+  SMAA_BRANCH
+  if (d.x + d.y > 2.0f) {  // d.x + d.y + 1 > 3
+    // Fetch the crossing edges:
+    float4 coords = float4(texcoord, texcoord) + float4(-d.x, -d.x, d.y, d.y) / float4(size, size);
+    float4 c;
+    c.x = SMAASampleLevelZeroOffset(edgesTex, coords.xy(), int2(-1, 0), size).y;
+    c.y = SMAASampleLevelZeroOffset(edgesTex, coords.xy(), int2(0, -1), size).x;
+    float2 left_edge = SMAASampleLevelZeroOffset(edgesTex, coords.zw(), int2(1, 0), size).xy();
+    c.z = left_edge.y;
+    c.w = left_edge.x;
+    float2 cc = mad(float2(2.0f, 2.0f), float2(c.x, c.z), float2(c.y, c.w));
+
+    // Remove the crossing edge if we didn't found the end of the line:
+    SMAAMovc(math::step(0.9f, d.zw()), cc, float2(0.0f, 0.0f));
+
+    // Fetch the areas for this line:
+    float2 area = SMAAAreaDiag(SMAATexturePass2D(areaTex), d.xy(), cc, subsampleIndices.w).xy();
+    weights.x += area.y;
+    weights.y += area.x;
+  }
+
+  return weights;
+}
+#endif
+
+/* ----------------------------------------------------------------------------
+ * Horizontal/Vertical Search Functions */
+
+/**
+ * This allows to determine how much length should we add in the last step
+ * of the searches. It takes the bilinearly interpolated edge (see
+ * @PSEUDO_GATHER4), and adds 0, 1 or 2, depending on which edges and
+ * crossing edges are active.
+ */
+static float SMAASearchLength(SMAATexture2D(searchTex), float2 e, float offset)
+{
+  // The texture is flipped vertically, with left and right cases taking half
+  // of the space horizontally:
+  float2 scale = SMAA_SEARCHTEX_SIZE * float2(0.5f, -1.0f);
+  float2 bias = SMAA_SEARCHTEX_SIZE * float2(offset, 1.0f);
+
+  // Scale and bias to access texel centers:
+  scale += float2(-1.0f, 1.0f);
+  bias += float2(0.5f, -0.5f);
+
+  // Convert from pixel coordinates to texcoords:
+  // (We use SMAA_SEARCHTEX_PACKED_SIZE because the texture is cropped)
+  scale *= 1.0f / SMAA_SEARCHTEX_PACKED_SIZE;
+  bias *= 1.0f / SMAA_SEARCHTEX_PACKED_SIZE;
+
+  // Lookup the search texture:
+  return SMAA_SEARCHTEX_SELECT(SMAASampleLevelZero(searchTex, mad(scale, e, bias)));
 }
 
-void SMAAEdgeDetectionOperation::set_threshold(float threshold)
+/**
+ * Horizontal/vertical search functions for the 2nd pass.
+ */
+static float SMAASearchXLeft(
+    SMAATexture2D(edgesTex), SMAATexture2D(searchTex), float2 texcoord, float end, int2 size)
 {
-  /* UI values are between 0 and 1 for simplicity but algorithm expects values between 0 and 0.5 */
-  threshold_ = scalenorm(0, 0.5, threshold);
+  /**
+   * @PSEUDO_GATHER4
+   * This texcoord has been offset by (-0.25, -0.125) in the vertex shader to
+   * sample between edge, thus fetching four edges in a row.
+   * Sampling with different offsets in each direction allows to disambiguate
+   * which edges are active from the four fetched ones.
+   */
+  float2 e = float2(0.0f, 1.0f);
+  while (texcoord.x > end && e.y > 0.8281f &&  // Is there some edge not activated?
+         e.x == 0.0f)                          // Or is there a crossing edge that breaks the line?
+  {
+    e = SMAASampleLevelZero(edgesTex, texcoord).xy();
+    texcoord = texcoord - float2(2.0f, 0.0f) / float2(size);
+  }
+
+  float offset = mad(
+      -(255.0f / 127.0f), SMAASearchLength(SMAATexturePass2D(searchTex), e, 0.0f), 3.25f);
+  return texcoord.x + offset / size.x;
+
+  // Non-optimized version:
+  // We correct the previous (-0.25, -0.125) offset we applied:
+  // texcoord.x += 0.25 * SMAA_RT_METRICS.x;
+
+  // The searches are bias by 1, so adjust the coords accordingly:
+  // texcoord.x += SMAA_RT_METRICS.x;
+
+  // Disambiguate the length added by the last step:
+  // texcoord.x += 2.0 * SMAA_RT_METRICS.x; // Undo last step
+  // texcoord.x -= SMAA_RT_METRICS.x * (255.0 / 127.0) *
+  // SMAASearchLength(SMAATexturePass2D(searchTex), e, 0.0); return mad(SMAA_RT_METRICS.x, offset,
+  // texcoord.x);
 }
 
-void SMAAEdgeDetectionOperation::set_local_contrast_adaptation_factor(float factor)
+static float SMAASearchXRight(
+    SMAATexture2D(edgesTex), SMAATexture2D(searchTex), float2 texcoord, float end, int2 size)
 {
-  /* UI values are between 0 and 1 for simplicity but algorithm expects values between 1 and 10 */
-  contrast_limit_ = scalenorm(1, 10, factor);
+  float2 e = float2(0.0f, 1.0f);
+  while (texcoord.x < end && e.y > 0.8281f &&  // Is there some edge not activated?
+         e.x == 0.0f)                          // Or is there a crossing edge that breaks the line?
+  {
+    e = SMAASampleLevelZero(edgesTex, texcoord).xy();
+    texcoord = texcoord + float2(2.0f, 0.0f) / float2(size);
+  }
+  float offset = mad(
+      -(255.0f / 127.0f), SMAASearchLength(SMAATexturePass2D(searchTex), e, 0.5f), 3.25f);
+  return texcoord.x - offset / size.x;
 }
 
-void SMAAEdgeDetectionOperation::get_area_of_interest(const int /*input_idx*/,
-                                                      const rcti &output_area,
-                                                      rcti &r_input_area)
+static float SMAASearchYUp(
+    SMAATexture2D(edgesTex), SMAATexture2D(searchTex), float2 texcoord, float end, int2 size)
 {
-  r_input_area.xmax = output_area.xmax + 1;
-  r_input_area.xmin = output_area.xmin - 2;
-  r_input_area.ymax = output_area.ymax + 1;
-  r_input_area.ymin = output_area.ymin - 2;
+  float2 e = float2(1.0f, 0.0f);
+  while (texcoord.y > end && e.x > 0.8281f &&  // Is there some edge not activated?
+         e.y == 0.0f)                          // Or is there a crossing edge that breaks the line?
+  {
+    e = SMAASampleLevelZero(edgesTex, texcoord).xy();
+    texcoord = texcoord - float2(0.0f, 2.0f) / float2(size);
+  }
+  float2 flipped_edge = float2(e.y, e.x);
+  float offset = mad(-(255.0f / 127.0f),
+                     SMAASearchLength(SMAATexturePass2D(searchTex), flipped_edge, 0.0f),
+                     3.25f);
+  return texcoord.y + offset / size.y;
 }
 
-void SMAAEdgeDetectionOperation::update_memory_buffer_partial(MemoryBuffer *output,
-                                                              const rcti &area,
-                                                              Span<MemoryBuffer *> inputs)
+static float SMAASearchYDown(
+    SMAATexture2D(edgesTex), SMAATexture2D(searchTex), float2 texcoord, float end, int2 size)
 {
-  const MemoryBuffer *image = inputs[0];
-  for (BuffersIterator<float> it = output->iterate_with({}, area); !it.is_end(); ++it) {
-    float color[4];
-    const int x = it.x;
-    const int y = it.y;
+  float2 e = float2(1.0f, 0.0f);
+  while (texcoord.y < end && e.x > 0.8281f &&  // Is there some edge not activated?
+         e.y == 0.0f)                          // Or is there a crossing edge that breaks the line?
+  {
+    e = SMAASampleLevelZero(edgesTex, texcoord).xy();
+    texcoord = texcoord + float2(0.0f, 2.0f) / float2(size);
+  }
+  float2 flipped_edge = float2(e.y, e.x);
+  float offset = mad(-(255.0f / 127.0f),
+                     SMAASearchLength(SMAATexturePass2D(searchTex), flipped_edge, 0.5f),
+                     3.25f);
+  return texcoord.y - offset / size.y;
+}
 
-    /* Calculate luma deltas: */
-    image->read_elem_checked(x, y, color);
-    const float L = IMB_colormanagement_get_luminance(color);
-    image->read_elem_checked(x - 1, y, color);
-    const float Lleft = IMB_colormanagement_get_luminance(color);
-    image->read_elem_checked(x, y - 1, color);
-    const float Ltop = IMB_colormanagement_get_luminance(color);
-    const float Dleft = fabsf(L - Lleft);
-    const float Dtop = fabsf(L - Ltop);
+/**
+ * Ok, we have the distance and both crossing edges. So, what are the areas
+ * at each side of current edge?
+ */
+static float2 SMAAArea(SMAATexture2D(areaTex), float2 dist, float e1, float e2, float offset)
+{
+  // Rounding prevents precision errors of bilinear filtering:
+  float2 texcoord = mad(float2(SMAA_AREATEX_MAX_DISTANCE, SMAA_AREATEX_MAX_DISTANCE),
+                        math::round(4.0f * float2(e1, e2)),
+                        dist);
 
-    /* We do the usual threshold: */
-    it.out[0] = (x > 0 && Dleft >= threshold_) ? 1.0f : 0.0f;
-    it.out[1] = (y > 0 && Dtop >= threshold_) ? 1.0f : 0.0f;
-    it.out[2] = 0.0f;
-    it.out[3] = 1.0f;
+  // We do a scale and bias for mapping to texel space:
+  texcoord = mad(SMAA_AREATEX_PIXEL_SIZE, texcoord, 0.5f * SMAA_AREATEX_PIXEL_SIZE);
 
-    /* Then discard if there is no edge: */
-    if (is_zero_v2(it.out)) {
-      continue;
+  // Move to proper place, according to the subpixel offset:
+  texcoord.y = mad(SMAA_AREATEX_SUBTEX_SIZE, offset, texcoord.y);
+
+  // Do it!
+  return SMAA_AREATEX_SELECT(SMAASampleLevelZero(areaTex, texcoord));
+}
+
+/* ----------------------------------------------------------------------------
+ * Corner Detection Functions */
+
+static void SMAADetectHorizontalCornerPattern(SMAATexture2D(edgesTex),
+                                              float2 &weights,
+                                              float4 texcoord,
+                                              float2 d,
+                                              int2 size,
+                                              int corner_rounding)
+{
+#if !defined(SMAA_DISABLE_CORNER_DETECTION)
+  float2 leftRight = math::step(d, float2(d.y, d.x));
+  float2 rounding = (1.0f - corner_rounding / 100.0f) * leftRight;
+
+  rounding /= leftRight.x + leftRight.y;  // Reduce blending for pixels in the center of a line.
+
+  float2 factor = float2(1.0f, 1.0f);
+  factor.x -= rounding.x * SMAASampleLevelZeroOffset(edgesTex, texcoord.xy(), int2(0, 1), size).x;
+  factor.x -= rounding.y * SMAASampleLevelZeroOffset(edgesTex, texcoord.zw(), int2(1, 1), size).x;
+  factor.y -= rounding.x * SMAASampleLevelZeroOffset(edgesTex, texcoord.xy(), int2(0, -2), size).x;
+  factor.y -= rounding.y * SMAASampleLevelZeroOffset(edgesTex, texcoord.zw(), int2(1, -2), size).x;
+
+  weights *= saturate(factor);
+#endif
+}
+
+static void SMAADetectVerticalCornerPattern(SMAATexture2D(edgesTex),
+                                            float2 &weights,
+                                            float4 texcoord,
+                                            float2 d,
+                                            int2 size,
+                                            int corner_rounding)
+{
+#if !defined(SMAA_DISABLE_CORNER_DETECTION)
+  float2 leftRight = math::step(d, float2(d.y, d.x));
+  float2 rounding = (1.0f - corner_rounding / 100.0f) * leftRight;
+
+  rounding /= leftRight.x + leftRight.y;
+
+  float2 factor = float2(1.0f, 1.0f);
+  factor.x -= rounding.x * SMAASampleLevelZeroOffset(edgesTex, texcoord.xy(), int2(1, 0), size).y;
+  factor.x -= rounding.y * SMAASampleLevelZeroOffset(edgesTex, texcoord.zw(), int2(1, 1), size).y;
+  factor.y -= rounding.x * SMAASampleLevelZeroOffset(edgesTex, texcoord.xy(), int2(-2, 0), size).y;
+  factor.y -= rounding.y * SMAASampleLevelZeroOffset(edgesTex, texcoord.zw(), int2(-2, 1), size).y;
+
+  weights *= saturate(factor);
+#endif
+}
+
+/* ----------------------------------------------------------------------------
+ * Blending Weight Calculation Pixel Shader (Second Pass) */
+
+static float4 SMAABlendingWeightCalculationPS(float2 texcoord,
+                                              float2 pixcoord,
+                                              float4 offset[3],
+                                              MemoryBuffer *edgesTex,
+                                              MemoryBuffer *areaTex,
+                                              MemoryBuffer *searchTex,
+                                              float4 subsampleIndices,
+                                              int2 size,
+                                              int corner_rounding)
+{  // Just pass zero for SMAA 1x, see @SUBSAMPLE_INDICES.
+  float4 weights = float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+  float2 e = SMAASamplePoint(edgesTex, texcoord).xy();
+
+  SMAA_BRANCH
+  if (e.y > 0.0f) {  // Edge at north
+#if !defined(SMAA_DISABLE_DIAG_DETECTION)
+    // Diagonals have both north and west edges, so searching for them in
+    // one of the boundaries is enough.
+    float2 diagonal_weights = SMAACalculateDiagWeights(SMAATexturePass2D(edgesTex),
+                                                       SMAATexturePass2D(areaTex),
+                                                       texcoord,
+                                                       e,
+                                                       subsampleIndices,
+                                                       size);
+
+    weights.x = diagonal_weights.x;
+    weights.y = diagonal_weights.y;
+
+    // We give priority to diagonals, so if we find a diagonal we skip
+    // horizontal/vertical processing.
+    SMAA_BRANCH
+    if (weights.x == -weights.y) {  // weights.x + weights.y == 0.0
+#endif
+
+      float2 d;
+
+      // Find the distance to the left:
+      float3 coords;
+      coords.x = SMAASearchXLeft(SMAATexturePass2D(edgesTex),
+                                 SMAATexturePass2D(searchTex),
+                                 offset[0].xy(),
+                                 offset[2].x,
+                                 size);
+      coords.y =
+          offset[1].y;  // offset[1].y = texcoord.y - 0.25 * SMAA_RT_METRICS.y (@CROSSING_OFFSET)
+      d.x = coords.x;
+
+      // Now fetch the left crossing edges, two at a time using bilinear
+      // filtering. Sampling at -0.25 (see @CROSSING_OFFSET) enables to
+      // discern what value each edge has:
+      float e1 = SMAASampleLevelZero(edgesTex, coords.xy()).x;
+
+      // Find the distance to the right:
+      coords.z = SMAASearchXRight(SMAATexturePass2D(edgesTex),
+                                  SMAATexturePass2D(searchTex),
+                                  offset[0].zw(),
+                                  offset[2].y,
+                                  size);
+      d.y = coords.z;
+
+      // We want the distances to be in pixel units (doing this here allows
+      // better interleaving of arithmetic and memory accesses):
+      d = math::abs(math::round(mad(float2(size.x), d, -float2(pixcoord.x))));
+
+      // SMAAArea below needs a sqrt, as the areas texture is compressed
+      // quadratically:
+      float2 sqrt_d = math::sqrt(d);
+
+      // Fetch the right crossing edges:
+      float e2 =
+          SMAASampleLevelZeroOffset(edgesTex, float2(coords.z, coords.y), int2(1, 0), size).x;
+
+      // Ok, we know how this pattern looks like, now it is time for getting
+      // the actual area:
+      float2 area = SMAAArea(SMAATexturePass2D(areaTex), sqrt_d, e1, e2, subsampleIndices.y);
+      weights.x = area.x;
+      weights.y = area.y;
+
+      // Fix corners:
+      coords.y = texcoord.y;
+
+      float2 corner_weight = weights.xy();
+      SMAADetectHorizontalCornerPattern(SMAATexturePass2D(edgesTex),
+                                        corner_weight,
+                                        float4(coords.xy(), coords.z, coords.y),
+                                        d,
+                                        size,
+                                        corner_rounding);
+      weights.x = corner_weight.x;
+      weights.y = corner_weight.y;
+
+#if !defined(SMAA_DISABLE_DIAG_DETECTION)
     }
+    else
+      e.x = 0.0f;  // Skip vertical processing.
+#endif
+  }
 
-    /* Calculate right and bottom deltas: */
-    image->read_elem_checked(x + 1, y, color);
-    const float Lright = IMB_colormanagement_get_luminance(color);
-    image->read_elem_checked(x, y + 1, color);
-    const float Lbottom = IMB_colormanagement_get_luminance(color);
-    const float Dright = fabsf(L - Lright);
-    const float Dbottom = fabsf(L - Lbottom);
+  SMAA_BRANCH
+  if (e.x > 0.0f) {  // Edge at west
+    float2 d;
 
-    /* Calculate the maximum delta in the direct neighborhood: */
-    float max_delta = fmaxf(fmaxf(Dleft, Dright), fmaxf(Dtop, Dbottom));
+    // Find the distance to the top:
+    float3 coords;
+    coords.y = SMAASearchYUp(SMAATexturePass2D(edgesTex),
+                             SMAATexturePass2D(searchTex),
+                             offset[1].xy(),
+                             offset[2].z,
+                             size);
+    coords.x = offset[0].x;  // offset[1].x = texcoord.x - 0.25 * SMAA_RT_METRICS.x;
+    d.x = coords.y;
 
-    /* Calculate luma used for both left and top edges: */
-    image->read_elem_checked(x - 1, y - 1, color);
-    const float Llefttop = IMB_colormanagement_get_luminance(color);
+    // Fetch the top crossing edges:
+    float e1 = SMAASampleLevelZero(edgesTex, coords.xy()).y;
 
-    /* Left edge */
-    if (it.out[0] != 0.0f) {
-      /* Calculate deltas around the left pixel: */
-      image->read_elem_checked(x - 2, y, color);
-      const float Lleftleft = IMB_colormanagement_get_luminance(color);
-      image->read_elem_checked(x - 1, y + 1, color);
-      const float Lleftbottom = IMB_colormanagement_get_luminance(color);
-      const float Dleftleft = fabsf(Lleft - Lleftleft);
-      const float Dlefttop = fabsf(Lleft - Llefttop);
-      const float Dleftbottom = fabsf(Lleft - Lleftbottom);
+    // Find the distance to the bottom:
+    coords.z = SMAASearchYDown(SMAATexturePass2D(edgesTex),
+                               SMAATexturePass2D(searchTex),
+                               offset[1].zw(),
+                               offset[2].w,
+                               size);
+    d.y = coords.z;
 
-      /* Calculate the final maximum delta: */
-      max_delta = fmaxf(max_delta, fmaxf(Dleftleft, fmaxf(Dlefttop, Dleftbottom)));
+    // We want the distances to be in pixel units:
+    d = math::abs(math::round(mad(float2(size.y), d, -float2(pixcoord.y))));
 
-      /* Local contrast adaptation: */
-      if (max_delta > contrast_limit_ * Dleft) {
-        it.out[0] = 0.0f;
-      }
-    }
+    // SMAAArea below needs a sqrt, as the areas texture is compressed
+    // quadratically:
+    float2 sqrt_d = math::sqrt(d);
 
-    /* Top edge */
-    if (it.out[1] != 0.0f) {
-      /* Calculate top-top delta: */
-      image->read_elem_checked(x, y - 2, color);
-      const float Ltoptop = IMB_colormanagement_get_luminance(color);
-      image->read_elem_checked(x + 1, y - 1, color);
-      const float Ltopright = IMB_colormanagement_get_luminance(color);
-      const float Dtoptop = fabsf(Ltop - Ltoptop);
-      const float Dtopleft = fabsf(Ltop - Llefttop);
-      const float Dtopright = fabsf(Ltop - Ltopright);
+    // Fetch the bottom crossing edges:
+    float e2 = SMAASampleLevelZeroOffset(edgesTex, float2(coords.x, coords.z), int2(0, 1), size).y;
 
-      /* Calculate the final maximum delta: */
-      max_delta = fmaxf(max_delta, fmaxf(Dtoptop, fmaxf(Dtopleft, Dtopright)));
+    // Get the area for this direction:
+    float2 area = SMAAArea(SMAATexturePass2D(areaTex), sqrt_d, e1, e2, subsampleIndices.x);
+    weights.z = area.x;
+    weights.w = area.y;
 
-      /* Local contrast adaptation: */
-      if (max_delta > contrast_limit_ * Dtop) {
-        it.out[1] = 0.0f;
-      }
-    }
+    // Fix corners:
+    coords.x = texcoord.x;
+
+    float2 corner_weight = weights.zw();
+    SMAADetectVerticalCornerPattern(SMAATexturePass2D(edgesTex),
+                                    corner_weight,
+                                    float4(coords.xy(), coords.x, coords.z),
+                                    d,
+                                    size,
+                                    corner_rounding);
+    weights.z = corner_weight.x;
+    weights.w = corner_weight.y;
+  }
+
+  return weights;
+}
+
+/* ----------------------------------------------------------------------------
+ * Neighborhood Blending Pixel Shader (Third Pass) */
+
+static float4 SMAANeighborhoodBlendingPS(float2 texcoord,
+                                         float4 offset,
+                                         SMAATexture2D(colorTex),
+                                         SMAATexture2D(blendTex),
+#if SMAA_REPROJECTION
+                                         SMAATexture2D(velocityTex),
+#endif
+                                         int2 size)
+{
+  // Fetch the blending weights for current pixel:
+  float4 a;
+  a.x = SMAASample(blendTex, offset.xy()).w;  // Right
+  a.y = SMAASample(blendTex, offset.zw()).y;  // Top
+  a.z = SMAASample(blendTex, texcoord).z;     // Left
+  a.w = SMAASample(blendTex, texcoord).x;     // Bottom
+
+  // Is there any blending weight with a value greater than 0.0?
+  SMAA_BRANCH
+  if (math::dot(a, float4(1.0f, 1.0f, 1.0f, 1.0f)) < 1e-5f) {
+    float4 color = SMAASampleLevelZero(colorTex, texcoord);
+
+#if SMAA_REPROJECTION
+    float2 velocity = SMAA_DECODE_VELOCITY(SMAASampleLevelZero(velocityTex, texcoord));
+
+    // Pack velocity into the alpha channel:
+    color.a = math::sqrt(5.0f * math::length(velocity));
+#endif
+
+    return color;
+  }
+  else {
+    bool h = math::max(a.x, a.z) > math::max(a.y, a.w);  // max(horizontal) > max(vertical)
+
+    // Calculate the blending offsets:
+    float4 blendingOffset = float4(0.0f, a.y, 0.0f, a.w);
+    float2 blendingWeight = float2(a.y, a.w);
+    SMAAMovc(float4(h), blendingOffset, float4(a.x, 0.0f, a.z, 0.0f));
+    SMAAMovc(float2(h), blendingWeight, float2(a.x, a.z));
+    blendingWeight /= math::dot(blendingWeight, float2(1.0f, 1.0f));
+
+    // Calculate the texture coordinates:
+    float4 blendingCoord = float4(texcoord, texcoord) + blendingOffset / float4(size, -size);
+
+    // We exploit bilinear filtering to mix current pixel with the chosen
+    // neighbor:
+    float4 color = blendingWeight.x * SMAASampleLevelZero(colorTex, blendingCoord.xy());
+    color += blendingWeight.y * SMAASampleLevelZero(colorTex, blendingCoord.zw());
+
+#if SMAA_REPROJECTION
+    // Antialias velocity for proper reprojection in a later stage:
+    float2 velocity = blendingWeight.x *
+                      SMAA_DECODE_VELOCITY(SMAASampleLevelZero(velocityTex, blendingCoord.xy()));
+    velocity += blendingWeight.y *
+                SMAA_DECODE_VELOCITY(SMAASampleLevelZero(velocityTex, blendingCoord.zw()));
+
+    // Pack velocity into the alpha channel:
+    color.a = math::sqrt(5.0f * math::length(velocity));
+#endif
+
+    return color;
   }
 }
 
-/*-----------------------------------------------------------------------------*/
-/* Blending Weight Calculation (Second Pass) */
-/*-----------------------------------------------------------------------------*/
-
-SMAABlendingWeightCalculationOperation::SMAABlendingWeightCalculationOperation()
+SMAAOperation::SMAAOperation()
 {
-  this->add_input_socket(DataType::Color); /* edges */
+  this->add_input_socket(DataType::Color);
   this->add_output_socket(DataType::Color);
   flags_.can_be_constant = true;
-  this->set_corner_rounding(CMP_DEFAULT_SMAA_CORNER_ROUNDING);
 }
 
-void SMAABlendingWeightCalculationOperation::set_corner_rounding(float rounding)
-{
-  /* UI values are between 0 and 1 for simplicity but algorithm expects values between 0 and 100 */
-  corner_rounding_ = int(scalenorm(0, 100, rounding));
-}
-
-void SMAABlendingWeightCalculationOperation::update_memory_buffer_started(
-    MemoryBuffer * /*output*/, const rcti & /*out_area*/, Span<MemoryBuffer *> inputs)
-{
-  const MemoryBuffer *image = inputs[0];
-  sample_image_fn_ = [=](int x, int y, float *out) { image->read_elem_checked(x, y, out); };
-}
-
-void SMAABlendingWeightCalculationOperation::update_memory_buffer_partial(
-    MemoryBuffer *output, const rcti &out_area, Span<MemoryBuffer *> /*inputs*/)
-{
-  for (BuffersIterator<float> it = output->iterate_with({}, out_area); !it.is_end(); ++it) {
-    const int x = it.x;
-    const int y = it.y;
-    zero_v4(it.out);
-
-    float edges[4];
-    sample_image_fn_(x, y, edges);
-
-    /* Edge at north */
-    float c[4];
-    if (edges[1] > 0.0f) {
-      /* Diagonals have both north and west edges, so calculating weights for them */
-      /* in one of the boundaries is enough. */
-      calculate_diag_weights(x, y, edges, it.out);
-
-      /* We give priority to diagonals, so if we find a diagonal we skip. */
-      /* horizontal/vertical processing. */
-      if (!is_zero_v2(it.out)) {
-        continue;
-      }
-
-      /* Find the distance to the left and the right: */
-      int left = search_xleft(x, y);
-      int right = search_xright(x, y);
-      int d1 = x - left, d2 = right - x;
-
-      /* Fetch the left and right crossing edges: */
-      int e1 = 0, e2 = 0;
-      sample_image_fn_(left, y - 1, c);
-      if (c[0] > 0.0) {
-        e1 += 1;
-      }
-      sample_image_fn_(left, y, c);
-      if (c[0] > 0.0) {
-        e1 += 2;
-      }
-      sample_image_fn_(right + 1, y - 1, c);
-      if (c[0] > 0.0) {
-        e2 += 1;
-      }
-      sample_image_fn_(right + 1, y, c);
-      if (c[0] > 0.0) {
-        e2 += 2;
-      }
-
-      /* Ok, we know how this pattern looks like, now it is time for getting */
-      /* the actual area: */
-      area(d1, d2, e1, e2, it.out); /* R, G */
-
-      /* Fix corners: */
-      if (corner_rounding_) {
-        detect_horizontal_corner_pattern(it.out, left, right, y, d1, d2);
-      }
-    }
-
-    /* Edge at west */
-    if (edges[0] > 0.0f) {
-      /* Did we already do diagonal search for this west edge from the left neighboring pixel? */
-      if (is_vertical_search_unneeded(x, y)) {
-        continue;
-      }
-
-      /* Find the distance to the top and the bottom: */
-      int top = search_yup(x, y);
-      int bottom = search_ydown(x, y);
-      int d1 = y - top, d2 = bottom - y;
-
-      /* Fetch the top and bottom crossing edges: */
-      int e1 = 0, e2 = 0;
-      sample_image_fn_(x - 1, top, c);
-      if (c[1] > 0.0) {
-        e1 += 1;
-      }
-      sample_image_fn_(x, top, c);
-      if (c[1] > 0.0) {
-        e1 += 2;
-      }
-      sample_image_fn_(x - 1, bottom + 1, c);
-      if (c[1] > 0.0) {
-        e2 += 1;
-      }
-      sample_image_fn_(x, bottom + 1, c);
-      if (c[1] > 0.0) {
-        e2 += 2;
-      }
-
-      /* Get the area for this direction: */
-      area(d1, d2, e1, e2, it.out + 2); /* B, A */
-
-      /* Fix corners: */
-      if (corner_rounding_) {
-        detect_vertical_corner_pattern(it.out + 2, x, top, bottom, d1, d2);
-      }
-    }
-  }
-}
-
-void SMAABlendingWeightCalculationOperation::get_area_of_interest(const int /*input_idx*/,
-                                                                  const rcti &output_area,
-                                                                  rcti &r_input_area)
+void SMAAOperation::get_area_of_interest(const int /*input_idx*/,
+                                         const rcti &output_area,
+                                         rcti &r_input_area)
 {
   r_input_area.xmax = output_area.xmax +
-                      fmax(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG + 1);
-  r_input_area.xmin = output_area.xmin -
-                      fmax(fmax(SMAA_MAX_SEARCH_STEPS - 1, 1), SMAA_MAX_SEARCH_STEPS_DIAG + 1);
-  r_input_area.ymax = output_area.ymax + fmax(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG);
-  r_input_area.ymin = output_area.ymin -
-                      fmax(fmax(SMAA_MAX_SEARCH_STEPS - 1, 1), SMAA_MAX_SEARCH_STEPS_DIAG);
+                      math::max(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG + 1);
+  r_input_area.xmin = output_area.xmin - math::max(math::max(SMAA_MAX_SEARCH_STEPS - 1, 1),
+                                                   SMAA_MAX_SEARCH_STEPS_DIAG + 1);
+  r_input_area.ymax = output_area.ymax +
+                      math::max(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG);
+  r_input_area.ymin = output_area.ymin - math::max(math::max(SMAA_MAX_SEARCH_STEPS - 1, 1),
+                                                   SMAA_MAX_SEARCH_STEPS_DIAG);
 }
 
-/*-----------------------------------------------------------------------------*/
-/* Diagonal Search Functions */
-
-int SMAABlendingWeightCalculationOperation::search_diag1(int x, int y, int dir, bool *r_found)
+void SMAAOperation::update_memory_buffer(MemoryBuffer *output,
+                                         const rcti & /*area*/,
+                                         Span<MemoryBuffer *> inputs)
 {
-  float e[4];
-  int end = x + SMAA_MAX_SEARCH_STEPS_DIAG * dir;
-  *r_found = false;
-
-  while (x != end) {
-    x += dir;
-    y -= dir;
-    sample_image_fn_(x, y, e);
-    if (e[1] == 0.0f) {
-      *r_found = true;
-      break;
-    }
-    if (e[0] == 0.0f) {
-      *r_found = true;
-      return (dir < 0) ? x : x - dir;
-    }
-  }
-
-  return x - dir;
-}
-
-int SMAABlendingWeightCalculationOperation::search_diag2(int x, int y, int dir, bool *r_found)
-{
-  float e[4];
-  int end = x + SMAA_MAX_SEARCH_STEPS_DIAG * dir;
-  *r_found = false;
-
-  while (x != end) {
-    x += dir;
-    y += dir;
-    sample_image_fn_(x, y, e);
-    if (e[1] == 0.0f) {
-      *r_found = true;
-      break;
-    }
-    sample_image_fn_(x + 1, y, e);
-    if (e[0] == 0.0f) {
-      *r_found = true;
-      return (dir > 0) ? x : x - dir;
-    }
-  }
-
-  return x - dir;
-}
-
-void SMAABlendingWeightCalculationOperation::calculate_diag_weights(int x,
-                                                                    int y,
-                                                                    const float edges[2],
-                                                                    float weights[2])
-{
-  int d1, d2;
-  bool d1_found, d2_found;
-  float e[4], c[4];
-
-  zero_v2(weights);
-
-  if (SMAA_MAX_SEARCH_STEPS_DIAG <= 0) {
+  const MemoryBuffer *image = inputs[0];
+  if (image->is_a_single_elem()) {
+    copy_v4_v4(output->get_elem(0, 0), image->get_elem(0, 0));
     return;
   }
 
-  /* Search for the line ends: */
-  if (edges[0] > 0.0f) {
-    d1 = x - search_diag1(x, y, -1, &d1_found);
-  }
-  else {
-    d1 = 0;
-    d1_found = true;
-  }
-  d2 = search_diag1(x, y, 1, &d2_found) - x;
+  const int2 size = int2(image->get_width(), image->get_height());
+  MemoryBuffer edges(DataType::Float2, size.x, size.y);
 
-  if (d1 + d2 > 2) { /* d1 + d2 + 1 > 3 */
-    int e1 = 0, e2 = 0;
+  float3 luminance_coefficients;
+  IMB_colormanagement_get_luminance_coefficients(luminance_coefficients);
 
-    if (d1_found) {
-      /* Fetch the crossing edges: */
-      int left = x - d1, bottom = y + d1;
+  threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) {
+    for (const int64_t y : sub_y_range) {
+      for (const int64_t x : IndexRange(size.x)) {
+        int2 texel = int2(x, y);
+        float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size);
 
-      sample_image_fn_(left - 1, bottom, c);
-      if (c[1] > 0.0) {
-        e1 += 2;
-      }
-      sample_image_fn_(left, bottom, c);
-      if (c[0] > 0.0) {
-        e1 += 1;
+        float4 offset[3];
+        SMAAEdgeDetectionVS(coordinates, size, offset);
+
+        float2 edge = SMAALumaEdgeDetectionPS(coordinates,
+                                              offset,
+                                              image,
+                                              threshold_,
+                                              luminance_coefficients,
+                                              local_contrast_adaptation_factor_);
+        copy_v2_v2(edges.get_elem(texel.x, texel.y), edge);
       }
     }
+  });
 
-    if (d2_found) {
-      /* Fetch the crossing edges: */
-      int right = x + d2, top = y - d2;
+  MemoryBuffer blending_weights(DataType::Color, size.x, size.y);
 
-      sample_image_fn_(right + 1, top, c);
-      if (c[1] > 0.0) {
-        e2 += 2;
-      }
-      sample_image_fn_(right + 1, top - 1, c);
-      if (c[0] > 0.0) {
-        e2 += 1;
+  MemoryBuffer area_texture(DataType::Float2, AREATEX_WIDTH, AREATEX_HEIGHT);
+  area_texture.copy_from(areaTexBytes, area_texture.get_rect());
+
+  MemoryBuffer search_texture(DataType::Value, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT);
+  search_texture.copy_from(searchTexBytes, search_texture.get_rect());
+
+  threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) {
+    for (const int64_t y : sub_y_range) {
+      for (const int64_t x : IndexRange(size.x)) {
+        int2 texel = int2(x, y);
+        float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size);
+
+        float4 offset[3];
+        float2 pixel_coordinates;
+        SMAABlendingWeightCalculationVS(coordinates, size, pixel_coordinates, offset);
+
+        float4 weights = SMAABlendingWeightCalculationPS(coordinates,
+                                                         pixel_coordinates,
+                                                         offset,
+                                                         &edges,
+                                                         &area_texture,
+                                                         &search_texture,
+                                                         float4(0.0f),
+                                                         size,
+                                                         corner_rounding_);
+        copy_v4_v4(blending_weights.get_elem(texel.x, texel.y), weights);
       }
     }
+  });
 
-    /* Fetch the areas for this line: */
-    area_diag(d1, d2, e1, e2, weights);
-  }
+  threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) {
+    for (const int64_t y : sub_y_range) {
+      for (const int64_t x : IndexRange(size.x)) {
+        int2 texel = int2(x, y);
+        float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size);
 
-  /* Search for the line ends: */
-  d1 = x - search_diag2(x, y, -1, &d1_found);
-  sample_image_fn_(x + 1, y, e);
-  if (e[0] > 0.0f) {
-    d2 = search_diag2(x, y, 1, &d2_found) - x;
-  }
-  else {
-    d2 = 0;
-    d2_found = true;
-  }
+        float4 offset;
+        SMAANeighborhoodBlendingVS(coordinates, size, offset);
 
-  if (d1 + d2 > 2) { /* d1 + d2 + 1 > 3 */
-    int e1 = 0, e2 = 0;
-
-    if (d1_found) {
-      /* Fetch the crossing edges: */
-      int left = x - d1, top = y - d1;
-
-      sample_image_fn_(left - 1, top, c);
-      if (c[1] > 0.0) {
-        e1 += 2;
-      }
-      sample_image_fn_(left, top - 1, c);
-      if (c[0] > 0.0) {
-        e1 += 1;
+        float4 result = SMAANeighborhoodBlendingPS(
+            coordinates, offset, image, &blending_weights, size);
+        copy_v4_v4(output->get_elem(texel.x, texel.y), result);
       }
     }
-
-    if (d2_found) {
-      /* Fetch the crossing edges: */
-      int right = x + d2, bottom = y + d2;
-
-      sample_image_fn_(right + 1, bottom, c);
-      if (c[1] > 0.0) {
-        e2 += 2;
-      }
-      if (c[0] > 0.0) {
-        e2 += 1;
-      }
-    }
-
-    /* Fetch the areas for this line: */
-    float w[2];
-    area_diag(d1, d2, e1, e2, w);
-    weights[0] += w[1];
-    weights[1] += w[0];
-  }
-}
-
-bool SMAABlendingWeightCalculationOperation::is_vertical_search_unneeded(int x, int y)
-{
-  int d1, d2;
-  bool found;
-  float e[4];
-
-  if (SMAA_MAX_SEARCH_STEPS_DIAG <= 0) {
-    return false;
-  }
-
-  /* Search for the line ends: */
-  sample_image_fn_(x - 1, y, e);
-  if (e[1] > 0.0f) {
-    d1 = x - search_diag2(x - 1, y, -1, &found);
-  }
-  else {
-    d1 = 0;
-  }
-  d2 = search_diag2(x - 1, y, 1, &found) - x;
-
-  return (d1 + d2 > 2); /* d1 + d2 + 1 > 3 */
-}
-
-/*-----------------------------------------------------------------------------*/
-/* Horizontal/Vertical Search Functions */
-
-int SMAABlendingWeightCalculationOperation::search_xleft(int x, int y)
-{
-  int end = x - SMAA_MAX_SEARCH_STEPS;
-  float e[4];
-
-  while (x > end) {
-    sample_image_fn_(x, y, e);
-    if (e[1] == 0.0f) { /* Is the edge not activated? */
-      break;
-    }
-    if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
-      return x;
-    }
-    sample_image_fn_(x, y - 1, e);
-    if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
-      return x;
-    }
-    x--;
-  }
-
-  return x + 1;
-}
-
-int SMAABlendingWeightCalculationOperation::search_xright(int x, int y)
-{
-  int end = x + SMAA_MAX_SEARCH_STEPS;
-  float e[4];
-
-  while (x < end) {
-    x++;
-    sample_image_fn_(x, y, e);
-    if (e[1] == 0.0f || /* Is the edge not activated? */
-        e[0] != 0.0f)   /* Or is there a crossing edge that breaks the line? */
-    {
-      break;
-    }
-    sample_image_fn_(x, y - 1, e);
-    if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
-      break;
-    }
-  }
-
-  return x - 1;
-}
-
-int SMAABlendingWeightCalculationOperation::search_yup(int x, int y)
-{
-  int end = y - SMAA_MAX_SEARCH_STEPS;
-  float e[4];
-
-  while (y > end) {
-    sample_image_fn_(x, y, e);
-    if (e[0] == 0.0f) { /* Is the edge not activated? */
-      break;
-    }
-    if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
-      return y;
-    }
-    sample_image_fn_(x - 1, y, e);
-    if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
-      return y;
-    }
-    y--;
-  }
-
-  return y + 1;
-}
-
-int SMAABlendingWeightCalculationOperation::search_ydown(int x, int y)
-{
-  int end = y + SMAA_MAX_SEARCH_STEPS;
-  float e[4];
-
-  while (y < end) {
-    y++;
-    sample_image_fn_(x, y, e);
-    if (e[0] == 0.0f || /* Is the edge not activated? */
-        e[1] != 0.0f)   /* Or is there a crossing edge that breaks the line? */
-    {
-      break;
-    }
-    sample_image_fn_(x - 1, y, e);
-    if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
-      break;
-    }
-  }
-
-  return y - 1;
-}
-
-/*-----------------------------------------------------------------------------*/
-/* Corner Detection Functions */
-
-void SMAABlendingWeightCalculationOperation::detect_horizontal_corner_pattern(
-    float weights[2], int left, int right, int y, int d1, int d2)
-{
-  float factor[2] = {1.0f, 1.0f};
-  float rounding = corner_rounding_ / 100.0f;
-  float e[4];
-
-  /* Reduce blending for pixels in the center of a line. */
-  rounding *= (d1 == d2) ? 0.5f : 1.0f;
-
-  /* Near the left corner */
-  if (d1 <= d2) {
-    sample_image_fn_(left, y + 1, e);
-    factor[0] -= rounding * e[0];
-    sample_image_fn_(left, y - 2, e);
-    factor[1] -= rounding * e[0];
-  }
-  /* Near the right corner */
-  if (d1 >= d2) {
-    sample_image_fn_(right + 1, y + 1, e);
-    factor[0] -= rounding * e[0];
-    sample_image_fn_(right + 1, y - 2, e);
-    factor[1] -= rounding * e[0];
-  }
-
-  weights[0] *= std::clamp(factor[0], 0.0f, 1.0f);
-  weights[1] *= std::clamp(factor[1], 0.0f, 1.0f);
-}
-
-void SMAABlendingWeightCalculationOperation::detect_vertical_corner_pattern(
-    float weights[2], int x, int top, int bottom, int d1, int d2)
-{
-  float factor[2] = {1.0f, 1.0f};
-  float rounding = corner_rounding_ / 100.0f;
-  float e[4];
-
-  /* Reduce blending for pixels in the center of a line. */
-  rounding *= (d1 == d2) ? 0.5f : 1.0f;
-
-  /* Near the top corner */
-  if (d1 <= d2) {
-    sample_image_fn_(x + 1, top, e);
-    factor[0] -= rounding * e[1];
-    sample_image_fn_(x - 2, top, e);
-    factor[1] -= rounding * e[1];
-  }
-  /* Near the bottom corner */
-  if (d1 >= d2) {
-    sample_image_fn_(x + 1, bottom + 1, e);
-    factor[0] -= rounding * e[1];
-    sample_image_fn_(x - 2, bottom + 1, e);
-    factor[1] -= rounding * e[1];
-  }
-
-  weights[0] *= std::clamp(factor[0], 0.0f, 1.0f);
-  weights[1] *= std::clamp(factor[1], 0.0f, 1.0f);
-}
-
-/*-----------------------------------------------------------------------------*/
-/* Neighborhood Blending (Third Pass) */
-/*-----------------------------------------------------------------------------*/
-
-SMAANeighborhoodBlendingOperation::SMAANeighborhoodBlendingOperation()
-{
-  this->add_input_socket(DataType::Color); /* image */
-  this->add_input_socket(DataType::Color); /* blend */
-  this->add_output_socket(DataType::Color);
-  flags_.can_be_constant = true;
-}
-
-void SMAANeighborhoodBlendingOperation::update_memory_buffer_partial(MemoryBuffer *output,
-                                                                     const rcti &out_area,
-                                                                     Span<MemoryBuffer *> inputs)
-{
-  MemoryBuffer *image1 = inputs[0];
-  MemoryBuffer *image2 = inputs[1];
-  for (BuffersIterator<float> it = output->iterate_with({}, out_area); !it.is_end(); ++it) {
-    const float x = it.x;
-    const float y = it.y;
-    float w[4];
-
-    /* Fetch the blending weights for current pixel: */
-    image2->read_elem_checked(x, y, w);
-    const float left = w[2], top = w[0];
-    image2->read_elem_checked(x + 1, y, w);
-    const float right = w[3];
-    image2->read_elem_checked(x, y + 1, w);
-    const float bottom = w[1];
-
-    /* Is there any blending weight with a value greater than 0.0? */
-    if (right + bottom + left + top < 1e-5f) {
-      image1->read_elem_checked(x, y, it.out);
-      continue;
-    }
-
-    /* Calculate the blending offsets: */
-    void (*sample_fn)(MemoryBuffer *reader, int x, int y, float xoffset, float color[4]);
-    float offset1, offset2, weight1, weight2, color1[4], color2[4];
-
-    if (fmaxf(right, left) > fmaxf(bottom, top)) { /* `max(horizontal) > max(vertical)` */
-      sample_fn = sample_bilinear_horizontal;
-      offset1 = right;
-      offset2 = -left;
-      weight1 = right / (right + left);
-      weight2 = left / (right + left);
-    }
-    else {
-      sample_fn = sample_bilinear_vertical;
-      offset1 = bottom;
-      offset2 = -top;
-      weight1 = bottom / (bottom + top);
-      weight2 = top / (bottom + top);
-    }
-
-    /* We exploit bilinear filtering to mix current pixel with the chosen neighbor: */
-    sample_fn(image1, x, y, offset1, color1);
-    sample_fn(image1, x, y, offset2, color2);
-
-    mul_v4_v4fl(it.out, color1, weight1);
-    madd_v4_v4fl(it.out, color2, weight2);
-  }
-}
-
-void SMAANeighborhoodBlendingOperation::get_area_of_interest(const int /*input_idx*/,
-                                                             const rcti &output_area,
-                                                             rcti &r_input_area)
-{
-  r_input_area = output_area;
-  expand_area_for_sampler(r_input_area, PixelSampler::Bilinear);
+  });
 }
 
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_SMAAOperation.h b/source/blender/compositor/operations/COM_SMAAOperation.h
index 845611705e2..3f379e30716 100644
--- a/source/blender/compositor/operations/COM_SMAAOperation.h
+++ b/source/blender/compositor/operations/COM_SMAAOperation.h
@@ -4,89 +4,38 @@
 
 #pragma once
 
-#include "COM_MultiThreadedOperation.h"
+#include "COM_NodeOperation.h"
 
 namespace blender::compositor {
 
-/*-----------------------------------------------------------------------------*/
-/* Edge Detection (First Pass) */
-
-class SMAAEdgeDetectionOperation : public MultiThreadedOperation {
+class SMAAOperation : public NodeOperation {
  protected:
-  float threshold_;
-  float contrast_limit_;
+  float threshold_ = 0.1f;
+  float local_contrast_adaptation_factor_ = 2.0f;
+  int corner_rounding_ = 25;
 
  public:
-  SMAAEdgeDetectionOperation();
+  SMAAOperation();
 
-  void set_threshold(float threshold);
+  void set_threshold(float threshold)
+  {
+    threshold_ = threshold;
+  }
 
-  void set_local_contrast_adaptation_factor(float factor);
+  void set_local_contrast_adaptation_factor(float factor)
+  {
+    local_contrast_adaptation_factor_ = factor;
+  }
+
+  void set_corner_rounding(int corner_rounding)
+  {
+    corner_rounding_ = corner_rounding;
+  }
 
   void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
-  void update_memory_buffer_partial(MemoryBuffer *output,
-                                    const rcti &area,
-                                    Span<MemoryBuffer *> inputs) override;
-};
-
-/*-----------------------------------------------------------------------------*/
-/*  Blending Weight Calculation (Second Pass) */
-
-class SMAABlendingWeightCalculationOperation : public MultiThreadedOperation {
- private:
-  std::function<void(int x, int y, float *out)> sample_image_fn_;
-  int corner_rounding_;
-
- public:
-  SMAABlendingWeightCalculationOperation();
-
-  void set_corner_rounding(float rounding);
-
-  void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
-  void update_memory_buffer_started(MemoryBuffer *output,
-                                    const rcti &area,
-                                    Span<MemoryBuffer *> inputs) override;
-  void update_memory_buffer_partial(MemoryBuffer *output,
-                                    const rcti &area,
-                                    Span<MemoryBuffer *> inputs) override;
-
- private:
-  /* Diagonal Search Functions */
-  /**
-   * These functions allows to perform diagonal pattern searches.
-   */
-  int search_diag1(int x, int y, int dir, bool *r_found);
-  int search_diag2(int x, int y, int dir, bool *r_found);
-  /**
-   * This searches for diagonal patterns and returns the corresponding weights.
-   */
-  void calculate_diag_weights(int x, int y, const float edges[2], float weights[2]);
-  bool is_vertical_search_unneeded(int x, int y);
-
-  /* Horizontal/Vertical Search Functions */
-  int search_xleft(int x, int y);
-  int search_xright(int x, int y);
-  int search_yup(int x, int y);
-  int search_ydown(int x, int y);
-
-  /*  Corner Detection Functions */
-  void detect_horizontal_corner_pattern(
-      float weights[2], int left, int right, int y, int d1, int d2);
-  void detect_vertical_corner_pattern(
-      float weights[2], int x, int top, int bottom, int d1, int d2);
-};
-
-/*-----------------------------------------------------------------------------*/
-/* Neighborhood Blending (Third Pass) */
-
-class SMAANeighborhoodBlendingOperation : public MultiThreadedOperation {
- public:
-  SMAANeighborhoodBlendingOperation();
-
-  void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
-  void update_memory_buffer_partial(MemoryBuffer *output,
-                                    const rcti &area,
-                                    Span<MemoryBuffer *> inputs) override;
+  void update_memory_buffer(MemoryBuffer *output,
+                            const rcti &area,
+                            Span<MemoryBuffer *> inputs) override;
 };
 
 }  // namespace blender::compositor
-- 
2.30.2


From aede88be85b2014c6236d13121694ee9068a449d Mon Sep 17 00:00:00 2001
From: Hans Goudey <hans@blender.org>
Date: Mon, 25 Mar 2024 09:27:55 -0400
Subject: [PATCH 18/36] Fix: Curves NURBS order attribute validate incorrect
 clamp

---
 source/blender/blenkernel/intern/geometry_component_curves.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/blender/blenkernel/intern/geometry_component_curves.cc b/source/blender/blenkernel/intern/geometry_component_curves.cc
index 896480d0ad7..cc59acddf5d 100644
--- a/source/blender/blenkernel/intern/geometry_component_curves.cc
+++ b/source/blender/blenkernel/intern/geometry_component_curves.cc
@@ -564,7 +564,7 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
 
   static const auto nurbs_order_clamp = mf::build::SI1_SO<int8_t, int8_t>(
       "NURBS Order Validate",
-      [](int8_t value) { return std::max<int8_t>(value, 0); },
+      [](int8_t value) { return std::max<int8_t>(value, 1); },
       mf::build::exec_presets::AllSpanOrSingle());
   static BuiltinCustomDataLayerProvider nurbs_order("nurbs_order",
                                                     AttrDomain::Curve,
-- 
2.30.2


From 43cef92f66a062e95a555794dc7500b7e5b48a61 Mon Sep 17 00:00:00 2001
From: Alaska <alaskayou01@gmail.com>
Date: Mon, 25 Mar 2024 14:47:18 +0100
Subject: [PATCH 19/36] Fix #119692: Cycles render issue with light tree and
 light linking

When using light linking with the light tree, the root index of a
mesh light subtree can be 0. The current code assumed this wasn't
possible, and as such it caused rendering issues, specifically the
incorrect computation of the PDF of certain mesh lights during
forward path tracing.

So we adjust the code to allow mesh light subtree root node
indices of 0.

This was worked on by Alaska, Sergey, and Weizhen

Pull Request: https://projects.blender.org/blender/blender/pulls/119770
---
 intern/cycles/kernel/light/tree.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/intern/cycles/kernel/light/tree.h b/intern/cycles/kernel/light/tree.h
index 3fd49e30cbe..06391ce221e 100644
--- a/intern/cycles/kernel/light/tree.h
+++ b/intern/cycles/kernel/light/tree.h
@@ -796,7 +796,7 @@ ccl_device float light_tree_pdf(KernelGlobals kg,
 
   ccl_global const KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters,
                                                                          index_emitter);
-  int root_index;
+  int subtree_root_index;
   uint bit_trail, target_emitter;
 
   if (is_triangle(kemitter)) {
@@ -805,16 +805,17 @@ ccl_device float light_tree_pdf(KernelGlobals kg,
     target_emitter = kernel_data_fetch(object_to_tree, object_emitter);
     ccl_global const KernelLightTreeEmitter *kmesh = &kernel_data_fetch(light_tree_emitters,
                                                                         target_emitter);
-    root_index = kmesh->mesh.node_id;
-    ccl_global const KernelLightTreeNode *kroot = &kernel_data_fetch(light_tree_nodes, root_index);
+    subtree_root_index = kmesh->mesh.node_id;
+    ccl_global const KernelLightTreeNode *kroot = &kernel_data_fetch(light_tree_nodes,
+                                                                     subtree_root_index);
     bit_trail = kroot->bit_trail;
 
     if (kroot->type == LIGHT_TREE_INSTANCE) {
-      root_index = kroot->instance.reference;
+      subtree_root_index = kroot->instance.reference;
     }
   }
   else {
-    root_index = 0;
+    subtree_root_index = -1;
     bit_trail = kemitter->bit_trail;
     target_emitter = index_emitter;
   }
@@ -856,13 +857,13 @@ ccl_device float light_tree_pdf(KernelGlobals kg,
         return 0.0f;
       }
 
-      if (root_index) {
+      if (subtree_root_index != -1) {
         /* Arrived at the mesh light. Continue with the subtree. */
         float unused;
         light_tree_to_local_space<false>(kg, object_emitter, P, N, unused);
 
-        node_index = root_index;
-        root_index = 0;
+        node_index = subtree_root_index;
+        subtree_root_index = -1;
         target_emitter = index_emitter;
         bit_trail = kemitter->bit_trail;
         continue;
-- 
2.30.2


From 23fab6b6a6ef1e1c4ee2bb76a78b7c5f8c0e89f5 Mon Sep 17 00:00:00 2001
From: Charles Wardlaw <charleswardlaw@noreply.localhost>
Date: Mon, 25 Mar 2024 14:50:59 +0100
Subject: [PATCH 20/36] Fix #99114: USD: Cache file operators now recognize USD
 as well as Alembic

USD files are now findable from the cachefile.open() and
cachefile.layer_add() operators.  Removed the ".abc" appending when
looking for a file for the first time, as it no longer makes sense.

Pull Request: https://projects.blender.org/blender/blender/pulls/118685
---
 source/blender/editors/io/io_cache.cc | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/source/blender/editors/io/io_cache.cc b/source/blender/editors/io/io_cache.cc
index 49ac8f29383..4884318f952 100644
--- a/source/blender/editors/io/io_cache.cc
+++ b/source/blender/editors/io/io_cache.cc
@@ -53,8 +53,8 @@ static int cachefile_open_invoke(bContext *C, wmOperator *op, const wmEvent * /*
     char filepath[FILE_MAX];
     Main *bmain = CTX_data_main(C);
 
-    STRNCPY(filepath, BKE_main_blendfile_path(bmain));
-    BLI_path_extension_replace(filepath, sizeof(filepath), ".abc");
+    /* Default to the same directory as the blend file. */
+    BLI_path_split_dir_part(BKE_main_blendfile_path(bmain), filepath, sizeof(filepath));
     RNA_string_set(op->ptr, "filepath", filepath);
   }
 
@@ -119,7 +119,7 @@ void CACHEFILE_OT_open(wmOperatorType *ot)
   ot->cancel = open_cancel;
 
   WM_operator_properties_filesel(ot,
-                                 FILE_TYPE_ALEMBIC | FILE_TYPE_FOLDER,
+                                 FILE_TYPE_ALEMBIC | FILE_TYPE_USD | FILE_TYPE_FOLDER,
                                  FILE_BLENDER,
                                  FILE_OPENFILE,
                                  WM_FILESEL_FILEPATH | WM_FILESEL_RELPATH,
@@ -163,8 +163,8 @@ static int cachefile_layer_open_invoke(bContext *C, wmOperator *op, const wmEven
     char filepath[FILE_MAX];
     Main *bmain = CTX_data_main(C);
 
-    STRNCPY(filepath, BKE_main_blendfile_path(bmain));
-    BLI_path_extension_replace(filepath, sizeof(filepath), ".abc");
+    /* Default to the same directory as the blend file. */
+    BLI_path_split_dir_part(BKE_main_blendfile_path(bmain), filepath, sizeof(filepath));
     RNA_string_set(op->ptr, "filepath", filepath);
   }
 
@@ -215,7 +215,7 @@ void CACHEFILE_OT_layer_add(wmOperatorType *ot)
   ot->exec = cachefile_layer_add_exec;
 
   WM_operator_properties_filesel(ot,
-                                 FILE_TYPE_ALEMBIC | FILE_TYPE_FOLDER,
+                                 FILE_TYPE_ALEMBIC | FILE_TYPE_USD | FILE_TYPE_FOLDER,
                                  FILE_BLENDER,
                                  FILE_OPENFILE,
                                  WM_FILESEL_FILEPATH | WM_FILESEL_RELPATH,
-- 
2.30.2


From 6b5a97170c4c753442f657c80e169b4cd3ea5dd5 Mon Sep 17 00:00:00 2001
From: Jacques Lucke <jacques@blender.org>
Date: Mon, 25 Mar 2024 14:58:45 +0100
Subject: [PATCH 21/36] Geometry Nodes: allow overriding bake settings on
 overridden object

Previously, the bake settings in geometry nodes were not
editable even though they were stored on the overridden
object (instead of the not-overridden but linked node tree).

Pull Request: https://projects.blender.org/blender/blender/pulls/119874
---
 source/blender/makesrna/intern/rna_modifier.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/source/blender/makesrna/intern/rna_modifier.cc b/source/blender/makesrna/intern/rna_modifier.cc
index 862ede4a6d4..59cd6da07eb 100644
--- a/source/blender/makesrna/intern/rna_modifier.cc
+++ b/source/blender/makesrna/intern/rna_modifier.cc
@@ -7703,6 +7703,8 @@ static void rna_def_modifier_nodes_bake(BlenderRNA *brna)
   StructRNA *srna;
   PropertyRNA *prop;
 
+  RNA_define_lib_overridable(true);
+
   srna = RNA_def_struct(brna, "NodesModifierBake", nullptr);
   RNA_def_struct_ui_text(srna, "Nodes Modifier Bake", "");
 
@@ -7758,6 +7760,8 @@ static void rna_def_modifier_nodes_bake(BlenderRNA *brna)
   RNA_def_property_struct_type(prop, "NodesModifierDataBlock");
   RNA_def_property_collection_sdna(prop, nullptr, "data_blocks", "data_blocks_num");
   RNA_def_property_srna(prop, "NodesModifierBakeDataBlocks");
+
+  RNA_define_lib_overridable(false);
 }
 
 static void rna_def_modifier_nodes_bakes(BlenderRNA *brna)
-- 
2.30.2


From 5ff8738dce3e04b21f8ddcfcba78b192029e945a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sybren=20A=2E=20St=C3=BCvel?= <sybren@blender.org>
Date: Mon, 25 Mar 2024 15:26:27 +0100
Subject: [PATCH 22/36] Cleanup: Anim, reduce indentation in
 ANIM_animdata_filter

Return early if possible, reducing cognitive complexity.

No functional changes.
---
 .../blender/editors/animation/anim_filter.cc  | 228 +++++++++---------
 1 file changed, 113 insertions(+), 115 deletions(-)

diff --git a/source/blender/editors/animation/anim_filter.cc b/source/blender/editors/animation/anim_filter.cc
index 9c8d4699c7d..8f4820f4542 100644
--- a/source/blender/editors/animation/anim_filter.cc
+++ b/source/blender/editors/animation/anim_filter.cc
@@ -3674,138 +3674,136 @@ size_t ANIM_animdata_filter(bAnimContext *ac,
                             void *data,
                             eAnimCont_Types datatype)
 {
+  if (!data || !anim_data) {
+    return 0;
+  }
+
   size_t items = 0;
+  switch (datatype) {
+    /* Action-Editing Modes */
+    case ANIMCONT_ACTION: /* 'Action Editor' */
+    {
+      Object *obact = ac->obact;
+      SpaceAction *saction = (SpaceAction *)ac->sl;
+      bDopeSheet *ads = (saction) ? &saction->ads : nullptr;
 
-  /* only filter data if there's somewhere to put it */
-  if (data && anim_data) {
-    /* firstly filter the data */
-    switch (datatype) {
-      /* Action-Editing Modes */
-      case ANIMCONT_ACTION: /* 'Action Editor' */
-      {
-        Object *obact = ac->obact;
-        SpaceAction *saction = (SpaceAction *)ac->sl;
-        bDopeSheet *ads = (saction) ? &saction->ads : nullptr;
-
-        /* specially check for AnimData filter, see #36687. */
-        if (UNLIKELY(filter_mode & ANIMFILTER_ANIMDATA)) {
-          /* all channels here are within the same AnimData block, hence this special case */
-          if (LIKELY(obact->adt)) {
-            ANIMCHANNEL_NEW_CHANNEL(obact->adt, ANIMTYPE_ANIMDATA, (ID *)obact, nullptr);
-          }
+      /* specially check for AnimData filter, see #36687. */
+      if (UNLIKELY(filter_mode & ANIMFILTER_ANIMDATA)) {
+        /* all channels here are within the same AnimData block, hence this special case */
+        if (LIKELY(obact->adt)) {
+          ANIMCHANNEL_NEW_CHANNEL(obact->adt, ANIMTYPE_ANIMDATA, (ID *)obact, nullptr);
         }
-        else {
-          /* The check for the DopeSheet summary is included here
-           * since the summary works here too. */
-          if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
-            items += animfilter_action(
-                ac, anim_data, ads, static_cast<bAction *>(data), filter_mode, (ID *)obact);
-          }
-        }
-
-        break;
       }
-      case ANIMCONT_SHAPEKEY: /* 'ShapeKey Editor' */
-      {
-        Key *key = (Key *)data;
-
-        /* specially check for AnimData filter, see #36687. */
-        if (UNLIKELY(filter_mode & ANIMFILTER_ANIMDATA)) {
-          /* all channels here are within the same AnimData block, hence this special case */
-          if (LIKELY(key->adt)) {
-            ANIMCHANNEL_NEW_CHANNEL(key->adt, ANIMTYPE_ANIMDATA, (ID *)key, nullptr);
-          }
-        }
-        else {
-          /* The check for the DopeSheet summary is included here
-           * since the summary works here too. */
-          if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
-            items = animdata_filter_shapekey(ac, anim_data, key, filter_mode);
-          }
-        }
-
-        break;
-      }
-
-      /* Modes for Specialty Data Types (i.e. not keyframes) */
-      case ANIMCONT_GPENCIL: {
+      else {
+        /* The check for the DopeSheet summary is included here
+         * since the summary works here too. */
         if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
-          if (U.experimental.use_grease_pencil_version3) {
-            items = animdata_filter_grease_pencil(ac, anim_data, filter_mode);
-          }
-          else {
-            items = animdata_filter_gpencil_legacy(ac, anim_data, data, filter_mode);
-          }
+          items += animfilter_action(
+              ac, anim_data, ads, static_cast<bAction *>(data), filter_mode, (ID *)obact);
         }
-        break;
       }
-      case ANIMCONT_MASK: {
+
+      break;
+    }
+    case ANIMCONT_SHAPEKEY: /* 'ShapeKey Editor' */
+    {
+      Key *key = (Key *)data;
+
+      /* specially check for AnimData filter, see #36687. */
+      if (UNLIKELY(filter_mode & ANIMFILTER_ANIMDATA)) {
+        /* all channels here are within the same AnimData block, hence this special case */
+        if (LIKELY(key->adt)) {
+          ANIMCHANNEL_NEW_CHANNEL(key->adt, ANIMTYPE_ANIMDATA, (ID *)key, nullptr);
+        }
+      }
+      else {
+        /* The check for the DopeSheet summary is included here
+         * since the summary works here too. */
         if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
-          items = animdata_filter_mask(ac->bmain, anim_data, data, filter_mode);
+          items = animdata_filter_shapekey(ac, anim_data, key, filter_mode);
         }
-        break;
       }
 
-      /* DopeSheet Based Modes */
-      case ANIMCONT_DOPESHEET: /* 'DopeSheet Editor' */
-      {
-        /* the DopeSheet editor is the primary place where the DopeSheet summaries are useful */
-        if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
-          items += animdata_filter_dopesheet(
-              ac, anim_data, static_cast<bDopeSheet *>(data), filter_mode);
-        }
-        break;
-      }
-      case ANIMCONT_FCURVES: /* Graph Editor -> F-Curves/Animation Editing */
-      case ANIMCONT_DRIVERS: /* Graph Editor -> Drivers Editing */
-      case ANIMCONT_NLA:     /* NLA Editor */
-      {
-        /* all of these editors use the basic DopeSheet data for filtering options,
-         * but don't have all the same features */
-        items = animdata_filter_dopesheet(
-            ac, anim_data, static_cast<bDopeSheet *>(data), filter_mode);
-        break;
-      }
-
-      /* Timeline Mode - Basically the same as dopesheet,
-       * except we only have the summary for now */
-      case ANIMCONT_TIMELINE: {
-        /* the DopeSheet editor is the primary place where the DopeSheet summaries are useful */
-        if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
-          items += animdata_filter_dopesheet(
-              ac, anim_data, static_cast<bDopeSheet *>(data), filter_mode);
-        }
-        break;
-      }
-
-      /* Special/Internal Use */
-      case ANIMCONT_CHANNEL: /* animation channel */
-      {
-        bDopeSheet *ads = ac->ads;
-
-        /* based on the channel type, filter relevant data for this */
-        items = animdata_filter_animchan(
-            ac, anim_data, ads, static_cast<bAnimListElem *>(data), filter_mode);
-        break;
-      }
-
-      /* unhandled */
-      default: {
-        printf("ANIM_animdata_filter() - Invalid datatype argument %i\n", datatype);
-        break;
-      }
+      break;
     }
 
-    /* remove any 'weedy' entries */
-    items = animdata_filter_remove_invalid(anim_data);
+    /* Modes for Specialty Data Types (i.e. not keyframes) */
+    case ANIMCONT_GPENCIL: {
+      if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
+        if (U.experimental.use_grease_pencil_version3) {
+          items = animdata_filter_grease_pencil(ac, anim_data, filter_mode);
+        }
+        else {
+          items = animdata_filter_gpencil_legacy(ac, anim_data, data, filter_mode);
+        }
+      }
+      break;
+    }
+    case ANIMCONT_MASK: {
+      if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
+        items = animdata_filter_mask(ac->bmain, anim_data, data, filter_mode);
+      }
+      break;
+    }
 
-    /* remove duplicates (if required) */
-    if (filter_mode & ANIMFILTER_NODUPLIS) {
-      items = animdata_filter_remove_duplis(anim_data);
+    /* DopeSheet Based Modes */
+    case ANIMCONT_DOPESHEET: /* 'DopeSheet Editor' */
+    {
+      /* the DopeSheet editor is the primary place where the DopeSheet summaries are useful */
+      if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
+        items += animdata_filter_dopesheet(
+            ac, anim_data, static_cast<bDopeSheet *>(data), filter_mode);
+      }
+      break;
+    }
+    case ANIMCONT_FCURVES: /* Graph Editor -> F-Curves/Animation Editing */
+    case ANIMCONT_DRIVERS: /* Graph Editor -> Drivers Editing */
+    case ANIMCONT_NLA:     /* NLA Editor */
+    {
+      /* all of these editors use the basic DopeSheet data for filtering options,
+       * but don't have all the same features */
+      items = animdata_filter_dopesheet(
+          ac, anim_data, static_cast<bDopeSheet *>(data), filter_mode);
+      break;
+    }
+
+    /* Timeline Mode - Basically the same as dopesheet,
+     * except we only have the summary for now */
+    case ANIMCONT_TIMELINE: {
+      /* the DopeSheet editor is the primary place where the DopeSheet summaries are useful */
+      if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
+        items += animdata_filter_dopesheet(
+            ac, anim_data, static_cast<bDopeSheet *>(data), filter_mode);
+      }
+      break;
+    }
+
+    /* Special/Internal Use */
+    case ANIMCONT_CHANNEL: /* animation channel */
+    {
+      bDopeSheet *ads = ac->ads;
+
+      /* based on the channel type, filter relevant data for this */
+      items = animdata_filter_animchan(
+          ac, anim_data, ads, static_cast<bAnimListElem *>(data), filter_mode);
+      break;
+    }
+
+    /* unhandled */
+    default: {
+      printf("ANIM_animdata_filter() - Invalid datatype argument %i\n", datatype);
+      break;
     }
   }
 
-  /* return the number of items in the list */
+  /* remove any 'weedy' entries */
+  items = animdata_filter_remove_invalid(anim_data);
+
+  /* remove duplicates (if required) */
+  if (filter_mode & ANIMFILTER_NODUPLIS) {
+    items = animdata_filter_remove_duplis(anim_data);
+  }
+
   return items;
 }
 
-- 
2.30.2


From 5a9b292ac4dbd235e1c4cfa96c62bedc94456085 Mon Sep 17 00:00:00 2001
From: Aaron Carlisle <carlisle.b3d@gmail.com>
Date: Mon, 25 Mar 2024 12:50:01 -0400
Subject: [PATCH 23/36] Docs: Python API: Enable version switch menu

---
 doc/python_api/sphinx_doc_gen.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/python_api/sphinx_doc_gen.py b/doc/python_api/sphinx_doc_gen.py
index 02df13af1b9..347fe7c09fa 100644
--- a/doc/python_api/sphinx_doc_gen.py
+++ b/doc/python_api/sphinx_doc_gen.py
@@ -1949,7 +1949,7 @@ if html_theme == "furo":
             "sidebar/scroll-start.html",
             "sidebar/navigation.html",
             "sidebar/scroll-end.html",
-            # "sidebar/variant-selector.html",
+            "sidebar/variant-selector.html",
         ]
     }
 """)
-- 
2.30.2


From 26caa1817314bc49e9aaca02fc9bd98334f52883 Mon Sep 17 00:00:00 2001
From: Thomas Dinges <thomas@blender.org>
Date: Mon, 25 Mar 2024 18:08:02 +0100
Subject: [PATCH 24/36] Release: Bump to 4.1 release

---
 source/blender/blenkernel/BKE_blender_version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/blender/blenkernel/BKE_blender_version.h b/source/blender/blenkernel/BKE_blender_version.h
index 28bd67e102d..f3dafd9531b 100644
--- a/source/blender/blenkernel/BKE_blender_version.h
+++ b/source/blender/blenkernel/BKE_blender_version.h
@@ -25,7 +25,7 @@ extern "C" {
 /** Blender patch version for bug-fix releases. */
 #define BLENDER_VERSION_PATCH 0
 /** Blender release cycle stage: alpha/beta/rc/release. */
-#define BLENDER_VERSION_CYCLE rc
+#define BLENDER_VERSION_CYCLE release
 
 /* Blender file format version. */
 #define BLENDER_FILE_VERSION BLENDER_VERSION
-- 
2.30.2


From 6c74d4af15a4bdc0d29c71e2f2f910842efd864c Mon Sep 17 00:00:00 2001
From: Sergey Sharybin <sergey@blender.org>
Date: Mon, 25 Mar 2024 18:13:38 +0100
Subject: [PATCH 25/36] Fix #118555: Occasional incorrect compositor result
 with relative transform

In the tiled compositor ensure_delta() can be called from multiple threads,
but without any threading synchronization. This worked fine when the node
only supported absolute transform: multiple threads would do the same work
and assign delta to the same values.

With the addition of relative transform in #115947 a code which adjusts
previously calculated delta was added, leading to possible double-applying
relative transform.

The solution is to avoid multiple threads modifying the same data by using
a double-locked check.

This issue does not happen in 4.2 (main branch) because it switched to full
frame compositor, which works differently.

Pull Request: https://projects.blender.org/blender/blender/pulls/119883
---
 .../compositor/operations/COM_TranslateOperation.h       | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/source/blender/compositor/operations/COM_TranslateOperation.h b/source/blender/compositor/operations/COM_TranslateOperation.h
index 82333c57dd6..fe0c4dae4f6 100644
--- a/source/blender/compositor/operations/COM_TranslateOperation.h
+++ b/source/blender/compositor/operations/COM_TranslateOperation.h
@@ -7,6 +7,8 @@
 #include "COM_ConstantOperation.h"
 #include "COM_MultiThreadedOperation.h"
 
+#include <mutex>
+
 namespace blender::compositor {
 
 class TranslateOperation : public MultiThreadedOperation {
@@ -24,6 +26,8 @@ class TranslateOperation : public MultiThreadedOperation {
   bool is_delta_set_;
   bool is_relative_;
 
+  std::mutex mutex_;
+
  protected:
   MemoryBufferExtend x_extend_mode_;
   MemoryBufferExtend y_extend_mode_;
@@ -60,6 +64,11 @@ class TranslateOperation : public MultiThreadedOperation {
   inline void ensure_delta()
   {
     if (!is_delta_set_) {
+      std::unique_lock lock(mutex_);
+      if (is_delta_set_) {
+        return;
+      }
+
       if (execution_model_ == eExecutionModel::Tiled) {
         float temp_delta[4];
         input_xoperation_->read_sampled(temp_delta, 0, 0, PixelSampler::Nearest);
-- 
2.30.2


From fdc2962bebb47e5d55564fcf30b4586f57f4311a Mon Sep 17 00:00:00 2001
From: Weizhen Huang <weizhen@blender.org>
Date: Mon, 25 Mar 2024 18:50:52 +0100
Subject: [PATCH 26/36] Fix #114634: correlated samples in volume when using
 equiangular sampling and light tree

The same random number was used when sampling from the volume segment
and from the direct scattering position, causing correlation issues with
light tree.

To solve this problem, we ensure the same light is picked for
volume segment/direct scattering, equiangular/distance sampling by
sampling the light tree only once in volume segment. From the direct
scattering position in volume, we sample a position on the picked light
as usual. If sampling from the light tree fails, we continue with
indirect scattering.
For unbiased MIS weight for forward sampling, we retrieve the `P`, `D`
and `t` used in volume segment for traversing the light tree.

The main changes are:
1. `light_tree_sample()` and `light_distribution_sample()` now only pick
lights. Sampling a position on light is done separately via
`light_sample()`.
2. `light_tree_sample()` is now only called only once from volume
segment. For direct lighting we call `light_sample()`.
3. `light_tree_pdf()` now has a template `<in_volume_segment>`.
4. A new field `emitter_id` is added to struct `LightSample`, which just
stores the picked emitter index.
5. Additional field `previous_dt = ray->tmax - ray->tmin` is added to
`state->ray`, because we need this quantity for computing the pdf.
6. Distant/Background lights are also picked by light tree in volume
segment now, because we have no way to pick them afterwards. The direct
sample event for these lights will be handled by
`VOLUME_SAMPLE_DISTANCE`.
7. Original paper suggests to use the maximal importance, this results
in very poor sampling probability for distant and point lights therefore
excessive noise. We have a minimal importance for surface to balance, we
could do the same for volume but I do not want to spend much time on
this now. Just doing `min_importance = 0.0f` seems to do the job
okayish. This way we still won't sample the light with zero
`max_importance`.

The current solution might perform worse with distance sampling, because
the light tree measure is biased towards equiangular sampling. However,
it is difficult to perform MIS between equiangular and distance sampling
if different lights are picked for each method. This is something we can
look into in the future if proved to be a serious regression.

Pull Request: https://projects.blender.org/blender/blender/pulls/119389
---
 .../cycles/kernel/integrator/shade_volume.h   | 62 +++++--------
 .../cycles/kernel/integrator/state_template.h |  3 +
 intern/cycles/kernel/light/area.h             |  3 +-
 intern/cycles/kernel/light/common.h           |  5 +-
 intern/cycles/kernel/light/distribution.h     | 30 +------
 intern/cycles/kernel/light/light.h            | 18 ++--
 intern/cycles/kernel/light/sample.h           | 40 ++++++---
 intern/cycles/kernel/light/tree.h             | 88 +++++++++++--------
 intern/cycles/kernel/light/triangle.h         |  6 +-
 intern/cycles/kernel/types.h                  |  1 +
 tests/data                                    |  2 +-
 11 files changed, 127 insertions(+), 131 deletions(-)

diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h
index 594396d987e..ad94e46ebbb 100644
--- a/intern/cycles/kernel/integrator/shade_volume.h
+++ b/intern/cycles/kernel/integrator/shade_volume.h
@@ -324,13 +324,6 @@ ccl_device_inline bool volume_equiangular_valid_ray_segment(KernelGlobals kg,
                                                             ccl_private float2 *t_range,
                                                             const ccl_private LightSample *ls)
 {
-#  ifdef __LIGHT_TREE__
-  /* Do not compute ray segment until #119389 is landed. */
-  if (kernel_data.integrator.use_light_tree) {
-    return true;
-  }
-#  endif
-
   if (ls->type == LIGHT_SPOT) {
     ccl_global const KernelLight *klight = &kernel_data_fetch(lights, ls->lamp);
     return spot_light_valid_ray_segment(klight, ray_P, ray_D, t_range);
@@ -708,7 +701,8 @@ ccl_device_forceinline bool integrate_volume_equiangular_sample_light(
     ccl_private const Ray *ccl_restrict ray,
     ccl_private const ShaderData *ccl_restrict sd,
     ccl_private const RNGState *ccl_restrict rng_state,
-    ccl_private EquiangularCoefficients *ccl_restrict equiangular_coeffs)
+    ccl_private EquiangularCoefficients *ccl_restrict equiangular_coeffs,
+    ccl_private LightSample &ccl_restrict ls)
 {
   /* Test if there is a light or BSDF that needs direct light. */
   if (!kernel_data.integrator.use_direct_light) {
@@ -720,7 +714,6 @@ ccl_device_forceinline bool integrate_volume_equiangular_sample_light(
   const uint bounce = INTEGRATOR_STATE(state, path, bounce);
   const float3 rand_light = path_state_rng_3D(kg, rng_state, PRNG_LIGHT);
 
-  LightSample ls ccl_optional_struct_init;
   if (!light_sample_from_volume_segment(kg,
                                         rand_light,
                                         sd->time,
@@ -761,41 +754,26 @@ ccl_device_forceinline void integrate_volume_direct_light(
 #  ifdef __PATH_GUIDING__
     ccl_private const Spectrum unlit_throughput,
 #  endif
-    ccl_private const Spectrum throughput)
+    ccl_private const Spectrum throughput,
+    ccl_private LightSample &ccl_restrict ls)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT);
 
-  if (!kernel_data.integrator.use_direct_light) {
+  if (!kernel_data.integrator.use_direct_light || ls.emitter_id == EMITTER_NONE) {
     return;
   }
 
-  /* Sample position on the same light again, now from the shading point where we scattered.
-   *
-   * Note that this means we sample the light tree twice when equiangular sampling is used.
-   * We could consider sampling the light tree just once and use the same light position again.
-   *
-   * This would make the PDFs for MIS weights more complicated due to having to account for
-   * both distance/equiangular and direct/indirect light sampling, but could be more accurate.
-   * Additionally we could end up behind the light or outside a spot light cone, which might
-   * waste a sample. Though on the other hand it would be possible to prevent that with
-   * equiangular sampling restricted to a smaller sub-segment where the light has influence. */
-  LightSample ls ccl_optional_struct_init;
+  /* Sample position on the same light again, now from the shading point where we scattered. */
   {
     const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
     const uint bounce = INTEGRATOR_STATE(state, path, bounce);
     const float3 rand_light = path_state_rng_3D(kg, rng_state, PRNG_LIGHT);
+    const float3 N = zero_float3();
+    const int object_receiver = light_link_receiver_nee(kg, sd);
+    const int shader_flags = SD_BSDF_HAS_TRANSMISSION;
 
-    if (!light_sample_from_position(kg,
-                                    rng_state,
-                                    rand_light,
-                                    sd->time,
-                                    P,
-                                    zero_float3(),
-                                    light_link_receiver_nee(kg, sd),
-                                    SD_BSDF_HAS_TRANSMISSION,
-                                    bounce,
-                                    path_flag,
-                                    &ls))
+    if (!light_sample<false>(
+            kg, rand_light, sd->time, P, N, object_receiver, shader_flags, bounce, path_flag, &ls))
     {
       return;
     }
@@ -913,6 +891,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
     KernelGlobals kg,
     IntegratorState state,
     ccl_private ShaderData *sd,
+    ccl_private const Ray *ray,
     ccl_private const RNGState *rng_state,
     ccl_private const ShaderVolumePhases *phases)
 {
@@ -965,6 +944,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
   INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
   INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_wo);
   INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
+  INTEGRATOR_STATE_WRITE(state, ray, previous_dt) = ray->tmax - ray->tmin;
   INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
 #  ifdef __RAY_DIFFERENTIALS__
   INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
@@ -993,7 +973,8 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
 
   /* Update path state */
   INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf;
-  INTEGRATOR_STATE_WRITE(state, path, mis_origin_n) = zero_float3();
+  const float3 previous_P = ray->P + ray->D * ray->tmin;
+  INTEGRATOR_STATE_WRITE(state, path, mis_origin_n) = sd->P - previous_P;
   INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
       unguided_phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
 
@@ -1025,13 +1006,15 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
 
   /* Sample light ahead of volume stepping, for equiangular sampling. */
   /* TODO: distant lights are ignored now, but could instead use even distribution. */
+  LightSample ls ccl_optional_struct_init;
+  ls.emitter_id = EMITTER_NONE;
   const bool need_light_sample = !(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_TERMINATE);
 
   EquiangularCoefficients equiangular_coeffs = {zero_float3(), make_float2(ray->tmin, ray->tmax)};
 
-  const bool have_equiangular_sample = need_light_sample &&
-                                       integrate_volume_equiangular_sample_light(
-                                           kg, state, ray, &sd, &rng_state, &equiangular_coeffs);
+  const bool have_equiangular_sample =
+      need_light_sample && integrate_volume_equiangular_sample_light(
+                               kg, state, ray, &sd, &rng_state, &equiangular_coeffs, ls);
 
   VolumeSampleMethod direct_sample_method = (have_equiangular_sample) ?
                                                 volume_stack_sample_method(kg, state) :
@@ -1129,7 +1112,8 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
 #  ifdef __PATH_GUIDING__
                                   unlit_throughput,
 #  endif
-                                  result.direct_throughput);
+                                  result.direct_throughput,
+                                  ls);
   }
 
   /* Indirect light.
@@ -1168,7 +1152,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
 #    endif
 #  endif
 
-    if (integrate_volume_phase_scatter(kg, state, &sd, &rng_state, &result.indirect_phases)) {
+    if (integrate_volume_phase_scatter(kg, state, &sd, ray, &rng_state, &result.indirect_phases)) {
       return VOLUME_PATH_SCATTERED;
     }
     else {
diff --git a/intern/cycles/kernel/integrator/state_template.h b/intern/cycles/kernel/integrator/state_template.h
index e8683ed9179..34154d1c7fa 100644
--- a/intern/cycles/kernel/integrator/state_template.h
+++ b/intern/cycles/kernel/integrator/state_template.h
@@ -75,6 +75,9 @@ KERNEL_STRUCT_MEMBER(ray, float, tmax, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(ray, float, time, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(ray, float, dP, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(ray, float, dD, KERNEL_FEATURE_PATH_TRACING)
+#ifdef __LIGHT_TREE__
+KERNEL_STRUCT_MEMBER(ray, float, previous_dt, KERNEL_FEATURE_PATH_TRACING)
+#endif
 KERNEL_STRUCT_END(ray)
 
 /*************************** Intersection result ******************************/
diff --git a/intern/cycles/kernel/light/area.h b/intern/cycles/kernel/light/area.h
index 51a66265546..d9188355984 100644
--- a/intern/cycles/kernel/light/area.h
+++ b/intern/cycles/kernel/light/area.h
@@ -518,9 +518,8 @@ ccl_device_forceinline bool area_light_tree_parameters(const ccl_global KernelLi
   const bool shape_above_surface = dot(N, centroid - P) + fabsf(dot(N, extentu)) +
                                        fabsf(dot(N, extentv)) >
                                    0;
-  const bool in_volume = is_zero(N);
 
-  return (front_facing && shape_above_surface) || in_volume;
+  return front_facing && shape_above_surface;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/light/common.h b/intern/cycles/kernel/light/common.h
index ea724991817..7ea7519f94f 100644
--- a/intern/cycles/kernel/light/common.h
+++ b/intern/cycles/kernel/light/common.h
@@ -12,9 +12,9 @@ CCL_NAMESPACE_BEGIN
 
 typedef struct LightSample {
   float3 P;            /* position on light, or direction for distant light */
-  float3 Ng;           /* normal on light */
-  float3 D;            /* direction from shading point to light */
+  packed_float3 Ng;    /* normal on light */
   float t;             /* distance to light (FLT_MAX for distant light) */
+  float3 D;            /* direction from shading point to light */
   float u, v;          /* parametric coordinate on primitive */
   float pdf;           /* pdf for selecting light and point on light */
   float pdf_selection; /* pdf for selecting light */
@@ -25,6 +25,7 @@ typedef struct LightSample {
   int lamp;            /* lamp id */
   int group;           /* lightgroup */
   LightType type;      /* type of light */
+  int emitter_id;      /* index in the emitter array */
 } LightSample;
 
 /* Utilities */
diff --git a/intern/cycles/kernel/light/distribution.h b/intern/cycles/kernel/light/distribution.h
index 23cdaa6dff5..b6b9f2e035c 100644
--- a/intern/cycles/kernel/light/distribution.h
+++ b/intern/cycles/kernel/light/distribution.h
@@ -41,36 +41,14 @@ ccl_device int light_distribution_sample(KernelGlobals kg, const float rand)
   return index;
 }
 
-template<bool in_volume_segment>
 ccl_device_noinline bool light_distribution_sample(KernelGlobals kg,
-                                                   const float3 rand,
-                                                   const float time,
-                                                   const float3 P,
-                                                   const float3 N,
-                                                   const int object_receiver,
-                                                   const int shader_flags,
-                                                   const int bounce,
-                                                   const uint32_t path_flag,
+                                                   const float rand,
                                                    ccl_private LightSample *ls)
 {
   /* Sample light index from distribution. */
-  /* The first two dimensions of the Sobol sequence have better stratification. */
-  const int index = light_distribution_sample(kg, rand.z);
-  const float pdf_selection = kernel_data.integrator.distribution_pdf_lights;
-  const float2 rand_uv = float3_to_float2(rand);
-  return light_sample<in_volume_segment>(kg,
-                                         rand_uv,
-                                         time,
-                                         P,
-                                         N,
-                                         object_receiver,
-                                         shader_flags,
-                                         bounce,
-                                         path_flag,
-                                         index,
-                                         0,
-                                         pdf_selection,
-                                         ls);
+  ls->emitter_id = light_distribution_sample(kg, rand);
+  ls->pdf_selection = kernel_data.integrator.distribution_pdf_lights;
+  return true;
 }
 
 ccl_device_inline float light_distribution_pdf_lamp(KernelGlobals kg)
diff --git a/intern/cycles/kernel/light/light.h b/intern/cycles/kernel/light/light.h
index 8f18e5ce7a5..39a22f92d9d 100644
--- a/intern/cycles/kernel/light/light.h
+++ b/intern/cycles/kernel/light/light.h
@@ -177,7 +177,7 @@ ccl_device_inline bool light_sample(KernelGlobals kg,
 
 template<bool in_volume_segment>
 ccl_device_noinline bool light_sample(KernelGlobals kg,
-                                      const float2 rand,
+                                      const float3 rand_light,
                                       const float time,
                                       const float3 P,
                                       const float3 N,
@@ -185,33 +185,31 @@ ccl_device_noinline bool light_sample(KernelGlobals kg,
                                       const int shader_flags,
                                       const int bounce,
                                       const uint32_t path_flag,
-                                      const int emitter_index,
-                                      const int object_id,
-                                      const float pdf_selection,
                                       ccl_private LightSample *ls)
 {
+  /* The first two dimensions of the Sobol sequence have better stratification, use them to sample
+   * position on the light. */
+  const float2 rand = float3_to_float2(rand_light);
+
   int prim;
   MeshLight mesh_light;
 #ifdef __LIGHT_TREE__
   if (kernel_data.integrator.use_light_tree) {
     ccl_global const KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters,
-                                                                           emitter_index);
+                                                                           ls->emitter_id);
     prim = kemitter->light.id;
     mesh_light.shader_flag = kemitter->mesh_light.shader_flag;
-    mesh_light.object_id = object_id;
+    mesh_light.object_id = ls->object;
   }
   else
 #endif
   {
     ccl_global const KernelLightDistribution *kdistribution = &kernel_data_fetch(
-        light_distribution, emitter_index);
+        light_distribution, ls->emitter_id);
     prim = kdistribution->prim;
     mesh_light = kdistribution->mesh_light;
   }
 
-  /* A different value would be assigned in `triangle_light_sample()` if `!use_light_tree`. */
-  ls->pdf_selection = pdf_selection;
-
   if (prim >= 0) {
     /* Mesh light. */
     const int object = mesh_light.object_id;
diff --git a/intern/cycles/kernel/light/sample.h b/intern/cycles/kernel/light/sample.h
index afc4537c671..434383ebc2b 100644
--- a/intern/cycles/kernel/light/sample.h
+++ b/intern/cycles/kernel/light/sample.h
@@ -329,17 +329,25 @@ ccl_device_inline bool light_sample_from_volume_segment(KernelGlobals kg,
                                                         const uint32_t path_flag,
                                                         ccl_private LightSample *ls)
 {
+  const int shader_flags = SD_BSDF_HAS_TRANSMISSION;
+
 #ifdef __LIGHT_TREE__
   if (kernel_data.integrator.use_light_tree) {
-    return light_tree_sample<true>(
-        kg, rand, time, P, D, t, object_receiver, SD_BSDF_HAS_TRANSMISSION, bounce, path_flag, ls);
+    if (!light_tree_sample<true>(kg, rand.z, P, D, t, object_receiver, shader_flags, ls)) {
+      return false;
+    }
   }
   else
 #endif
   {
-    return light_distribution_sample<true>(
-        kg, rand, time, P, D, object_receiver, SD_BSDF_HAS_TRANSMISSION, bounce, path_flag, ls);
+    if (!light_distribution_sample(kg, rand.z, ls)) {
+      return false;
+    }
   }
+
+  /* Sample position on the selected light. */
+  return light_sample<true>(
+      kg, rand, time, P, D, object_receiver, shader_flags, bounce, path_flag, ls);
 }
 
 ccl_device bool light_sample_from_position(KernelGlobals kg,
@@ -354,17 +362,24 @@ ccl_device bool light_sample_from_position(KernelGlobals kg,
                                            const uint32_t path_flag,
                                            ccl_private LightSample *ls)
 {
+  /* Randomly select a light. */
 #ifdef __LIGHT_TREE__
   if (kernel_data.integrator.use_light_tree) {
-    return light_tree_sample<false>(
-        kg, rand, time, P, N, 0.0f, object_receiver, shader_flags, bounce, path_flag, ls);
+    if (!light_tree_sample<false>(kg, rand.z, P, N, 0.0f, object_receiver, shader_flags, ls)) {
+      return false;
+    }
   }
   else
 #endif
   {
-    return light_distribution_sample<false>(
-        kg, rand, time, P, N, object_receiver, shader_flags, bounce, path_flag, ls);
+    if (!light_distribution_sample(kg, rand.z, ls)) {
+      return false;
+    }
   }
+
+  /* Sample position on the selected light. */
+  return light_sample<false>(
+      kg, rand, time, P, N, object_receiver, shader_flags, bounce, path_flag, ls);
 }
 
 /* Update light sample with new shading point position for MNEE. The position on the light is fixed
@@ -415,13 +430,15 @@ ccl_device_inline float light_sample_mis_weight_forward_surface(KernelGlobals kg
 #ifdef __LIGHT_TREE__
   if (kernel_data.integrator.use_light_tree) {
     float3 ray_P = INTEGRATOR_STATE(state, ray, P);
+    const float dt = INTEGRATOR_STATE(state, ray, previous_dt);
     const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n);
+
     uint lookup_offset = kernel_data_fetch(object_lookup_offset, sd->object);
     uint prim_offset = kernel_data_fetch(object_prim_offset, sd->object);
     uint triangle = kernel_data_fetch(triangle_to_tree, sd->prim - prim_offset + lookup_offset);
 
     pdf *= light_tree_pdf(
-        kg, ray_P, N, path_flag, sd->object, triangle, light_link_receiver_forward(kg, state));
+        kg, ray_P, N, dt, path_flag, sd->object, triangle, light_link_receiver_forward(kg, state));
   }
   else
 #endif
@@ -445,9 +462,11 @@ ccl_device_inline float light_sample_mis_weight_forward_lamp(KernelGlobals kg,
 #ifdef __LIGHT_TREE__
   if (kernel_data.integrator.use_light_tree) {
     const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n);
+    const float dt = INTEGRATOR_STATE(state, ray, previous_dt);
     pdf *= light_tree_pdf(kg,
                           P,
                           N,
+                          dt,
                           path_flag,
                           0,
                           kernel_data_fetch(light_to_tree, ls->lamp),
@@ -485,9 +504,10 @@ ccl_device_inline float light_sample_mis_weight_forward_background(KernelGlobals
 #ifdef __LIGHT_TREE__
   if (kernel_data.integrator.use_light_tree) {
     const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n);
+    const float dt = INTEGRATOR_STATE(state, ray, previous_dt);
     uint light = kernel_data_fetch(light_to_tree, kernel_data.background.light_index);
     pdf *= light_tree_pdf(
-        kg, ray_P, N, path_flag, 0, light, light_link_receiver_forward(kg, state));
+        kg, ray_P, N, dt, path_flag, 0, light, light_link_receiver_forward(kg, state));
   }
   else
 #endif
diff --git a/intern/cycles/kernel/light/tree.h b/intern/cycles/kernel/light/tree.h
index 06391ce221e..bbca17e5f75 100644
--- a/intern/cycles/kernel/light/tree.h
+++ b/intern/cycles/kernel/light/tree.h
@@ -148,10 +148,7 @@ ccl_device void light_tree_importance(const float3 N_or_D,
   float cos_min_incidence_angle = 1.0f;
   float cos_max_incidence_angle = 1.0f;
 
-  /* When sampling the light tree for the second time in `shade_volume.h` and when query the pdf in
-   * `sample.h`. */
-  const bool in_volume = is_zero(N_or_D);
-  if (!in_volume_segment && !in_volume) {
+  if (!in_volume_segment) {
     const float3 N = N_or_D;
     const float cos_theta_i = has_transmission ? fabsf(dot(point_to_centroid, N)) :
                                                  dot(point_to_centroid, N);
@@ -221,9 +218,9 @@ ccl_device void light_tree_importance(const float3 N_or_D,
   max_importance = fabsf(f_a * cos_min_incidence_angle * energy * cos_min_outgoing_angle /
                          (in_volume_segment ? min_distance : sqr(min_distance)));
 
-  /* TODO: also min importance for volume? */
+  /* TODO: compute proper min importance for volume. */
   if (in_volume_segment) {
-    min_importance = max_importance;
+    min_importance = 0.0f;
     return;
   }
 
@@ -270,10 +267,10 @@ ccl_device bool compute_emitter_centroid_and_dir(KernelGlobals kg,
         /* Arbitrary centroid and direction. */
         centroid = make_float3(0.0f, 0.0f, 1.0f);
         dir = make_float3(0.0f, 0.0f, -1.0f);
-        return !in_volume_segment;
+        break;
       case LIGHT_DISTANT:
         dir = centroid;
-        return !in_volume_segment;
+        break;
       default:
         return false;
     }
@@ -323,12 +320,13 @@ ccl_device void light_tree_node_importance(KernelGlobals kg,
   float cos_theta_u;
   float distance;
   if (knode->type == LIGHT_TREE_DISTANT) {
-    if (in_volume_segment) {
-      return;
-    }
     point_to_centroid = -bcone.axis;
     cos_theta_u = fast_cosf(bcone.theta_o + bcone.theta_e);
     distance = 1.0f;
+    if (t == FLT_MAX) {
+      /* In world volume, distant light has no contribution. */
+      return;
+    }
   }
   else {
     const float3 centroid = 0.5f * (bbox.min + bbox.max);
@@ -339,6 +337,9 @@ ccl_device void light_tree_node_importance(KernelGlobals kg,
       /* Minimal distance of the ray to the cluster. */
       distance = len(centroid - closest_point);
       point_to_centroid = -compute_v(centroid, P, D, bcone.axis, t);
+      /* FIXME(weizhen): it is not clear from which point the `cos_theta_u` should be computed in
+       * volume segment. We could use `closest_point` as a conservative measure, but then
+       * `point_to_centroid` should also use `closest_point`. */
       cos_theta_u = light_tree_cos_bounding_box_angle(bbox, closest_point, point_to_centroid);
     }
     else {
@@ -697,17 +698,16 @@ ccl_device int light_tree_root_node_index(KernelGlobals kg, const int object_rec
   return 0;
 }
 
+/* Pick a random light from the light tree from a given shading point P, write to the picked light
+ * index and the probability of picking the light. */
 template<bool in_volume_segment>
 ccl_device_noinline bool light_tree_sample(KernelGlobals kg,
-                                           const float3 rand,
-                                           const float time,
+                                           const float rand,
                                            const float3 P,
                                            float3 N_or_D,
                                            float t,
                                            const int object_receiver,
                                            const int shader_flags,
-                                           const int bounce,
-                                           const uint32_t path_flag,
                                            ccl_private LightSample *ls)
 {
   if (!kernel_data.integrator.use_direct_light) {
@@ -718,10 +718,8 @@ ccl_device_noinline bool light_tree_sample(KernelGlobals kg,
   float pdf_leaf = 1.0f;
   float pdf_selection = 1.0f;
   int selected_emitter = -1;
-  int object_emitter = 0;
   int node_index = light_tree_root_node_index(kg, object_receiver);
-  /* The first two dimensions of the Sobol sequence have better stratification. */
-  float rand_selection = rand.z;
+  float rand_selection = rand;
 
   float3 local_P = P;
 
@@ -743,7 +741,7 @@ ccl_device_noinline bool light_tree_sample(KernelGlobals kg,
       }
 
       /* Continue with the picked mesh light. */
-      object_emitter = kernel_data_fetch(light_tree_emitters, selected_emitter).mesh.object_id;
+      ls->object = kernel_data_fetch(light_tree_emitters, selected_emitter).mesh.object_id;
       continue;
     }
 
@@ -766,27 +764,18 @@ ccl_device_noinline bool light_tree_sample(KernelGlobals kg,
     pdf_leaf *= (node_index == left_index) ? left_prob : (1.0f - left_prob);
   }
 
-  pdf_selection *= pdf_leaf;
+  ls->emitter_id = selected_emitter;
+  ls->pdf_selection = pdf_selection * pdf_leaf;
 
-  return light_sample<in_volume_segment>(kg,
-                                         float3_to_float2(rand),
-                                         time,
-                                         P,
-                                         N_or_D,
-                                         object_receiver,
-                                         shader_flags,
-                                         bounce,
-                                         path_flag,
-                                         selected_emitter,
-                                         object_emitter,
-                                         pdf_selection,
-                                         ls);
+  return true;
 }
 
 /* We need to be able to find the probability of selecting a given light for MIS. */
+template<bool in_volume_segment>
 ccl_device float light_tree_pdf(KernelGlobals kg,
                                 float3 P,
                                 float3 N,
+                                const float dt,
                                 const int path_flag,
                                 const int object_emitter,
                                 const uint index_emitter,
@@ -837,8 +826,8 @@ ccl_device float light_tree_pdf(KernelGlobals kg,
       for (int i = 0; i < knode->num_emitters; i++) {
         const int emitter = knode->leaf.first_emitter + i;
         float max_importance, min_importance;
-        light_tree_emitter_importance<false>(
-            kg, P, N, 0, has_transmission, emitter, max_importance, min_importance);
+        light_tree_emitter_importance<in_volume_segment>(
+            kg, P, N, dt, has_transmission, emitter, max_importance, min_importance);
         num_has_importance += (max_importance > 0);
         if (emitter == target_emitter) {
           target_max_importance = max_importance;
@@ -860,7 +849,7 @@ ccl_device float light_tree_pdf(KernelGlobals kg,
       if (subtree_root_index != -1) {
         /* Arrived at the mesh light. Continue with the subtree. */
         float unused;
-        light_tree_to_local_space<false>(kg, object_emitter, P, N, unused);
+        light_tree_to_local_space<in_volume_segment>(kg, object_emitter, P, N, unused);
 
         node_index = subtree_root_index;
         subtree_root_index = -1;
@@ -878,8 +867,8 @@ ccl_device float light_tree_pdf(KernelGlobals kg,
     const int right_index = knode->inner.right_child;
 
     float left_prob;
-    if (!get_left_probability<false>(
-            kg, P, N, 0, has_transmission, left_index, right_index, left_prob))
+    if (!get_left_probability<in_volume_segment>(
+            kg, P, N, dt, has_transmission, left_index, right_index, left_prob))
     {
       return 0.0f;
     }
@@ -897,4 +886,27 @@ ccl_device float light_tree_pdf(KernelGlobals kg,
   }
 }
 
+/* If the function is called in volume, retrieve the previous point in volume segment, and compute
+ * pdf from there. Otherwise compute from the current shading point. */
+ccl_device_inline float light_tree_pdf(KernelGlobals kg,
+                                       float3 P,
+                                       float3 N,
+                                       const float dt,
+                                       const int path_flag,
+                                       const int emitter_object,
+                                       const uint emitter_id,
+                                       const int object_receiver)
+{
+  if (path_flag & PATH_RAY_VOLUME_SCATTER) {
+    const float3 D_times_t = N;
+    const float3 D = normalize(D_times_t);
+    P = P - D_times_t;
+    return light_tree_pdf<true>(
+        kg, P, D, dt, path_flag, emitter_object, emitter_id, object_receiver);
+  }
+
+  return light_tree_pdf<false>(
+      kg, P, N, 0.0f, path_flag, emitter_object, emitter_id, object_receiver);
+}
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/light/triangle.h b/intern/cycles/kernel/light/triangle.h
index 16834555f1a..8007c8d6123 100644
--- a/intern/cycles/kernel/light/triangle.h
+++ b/intern/cycles/kernel/light/triangle.h
@@ -285,7 +285,8 @@ ccl_device_inline bool triangle_light_valid_ray_segment(KernelGlobals kg,
 
   /* Only one side is sampled, intersect the ray and the triangle light plane to find the visible
    * ray segment. Flip normal if Emission Sampling is set to back. */
-  return ray_plane_intersect((shader_flag & SD_MIS_BACK) ? -ls->Ng : ls->Ng, P, D, t_range);
+  const float3 N = ls->Ng;
+  return ray_plane_intersect((shader_flag & SD_MIS_BACK) ? -N : N, P, D, t_range);
 }
 
 template<bool in_volume_segment>
@@ -326,9 +327,8 @@ ccl_device_forceinline bool triangle_light_tree_parameters(
   }
 
   const bool front_facing = bcone.theta_o != 0.0f || dot(bcone.axis, point_to_centroid) < 0;
-  const bool in_volume = is_zero(N);
 
-  return (front_facing && shape_above_surface) || in_volume;
+  return front_facing && shape_above_surface;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index b4978744cc7..ce1c1da5168 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -45,6 +45,7 @@ CCL_NAMESPACE_BEGIN
 #define OBJECT_NONE (~0)
 #define PRIM_NONE (~0)
 #define LAMP_NONE (~0)
+#define EMITTER_NONE (~0)
 #define ID_NONE (0.0f)
 #define PASS_UNUSED (~0)
 #define LIGHTGROUP_NONE (~0)
diff --git a/tests/data b/tests/data
index 00af9c65712..9ce3adf54da 160000
--- a/tests/data
+++ b/tests/data
@@ -1 +1 @@
-Subproject commit 00af9c65712b6aa78ce6eb0c62c5aafb7a867f18
+Subproject commit 9ce3adf54dae89a9daaa4dcd04cc3a566aed3aaf
-- 
2.30.2


From 2c301c921fc40224c0d9e8cd5cdb6c3ba254cadb Mon Sep 17 00:00:00 2001
From: Weizhen Huang <weizhen@blender.org>
Date: Mon, 25 Mar 2024 19:43:14 +0100
Subject: [PATCH 27/36] Tests: temporarily disable light tree in the new test
 file

seems to cause problem on other platforms
---
 tests/data | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/data b/tests/data
index 9ce3adf54da..bf5c7083054 160000
--- a/tests/data
+++ b/tests/data
@@ -1 +1 @@
-Subproject commit 9ce3adf54dae89a9daaa4dcd04cc3a566aed3aaf
+Subproject commit bf5c70830540b215a3b1df21f28e0e80ead230f7
-- 
2.30.2


From 73cc27b988420d1ee10aaab808e57947dad55bbd Mon Sep 17 00:00:00 2001
From: Hans Goudey <hans@blender.org>
Date: Mon, 25 Mar 2024 15:49:15 -0400
Subject: [PATCH 28/36] Fix #119886: Set curve node missing normals update tag

The change to use generic "capture field on geometry" utilities for this
node and other nodes like it means `AttributeWriter` with its update
tagging isn't being used anymore, the attribute is just being created
with the new values (for some cases anyway). To fix this, call the
attribute provider's update function when creating the attribute too.
This was noted as useful in 130701763bdb9e104486 too.

The initialization of curve and point cloud runtime structs is moved
because they now have to be allocated before any attributes are added.
---
 .../blenkernel/intern/attribute_access.cc     | 24 +++++++++++++++----
 .../blenkernel/intern/curves_geometry.cc      |  4 ++--
 .../blender/blenkernel/intern/pointcloud.cc   |  4 ++--
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/source/blender/blenkernel/intern/attribute_access.cc b/source/blender/blenkernel/intern/attribute_access.cc
index 6b947417e91..c1f6354c606 100644
--- a/source/blender/blenkernel/intern/attribute_access.cc
+++ b/source/blender/blenkernel/intern/attribute_access.cc
@@ -510,22 +510,38 @@ bool BuiltinCustomDataLayerProvider::try_create(void *owner,
     return false;
   }
 
+  auto update = [&]() {
+    if (update_on_change_ != nullptr) {
+      update_on_change_(owner);
+    }
+  };
+
   const int element_num = custom_data_access_.get_element_num(owner);
   if (stored_as_named_attribute_) {
     if (CustomData_has_layer_named(custom_data, data_type_, name_)) {
       /* Exists already. */
       return false;
     }
-    return add_custom_data_layer_from_attribute_init(
-        name_, *custom_data, stored_type_, element_num, initializer);
+    if (add_custom_data_layer_from_attribute_init(
+            name_, *custom_data, stored_type_, element_num, initializer))
+    {
+      update();
+      return true;
+    }
+    return false;
   }
 
   if (CustomData_get_layer(custom_data, stored_type_) != nullptr) {
     /* Exists already. */
     return false;
   }
-  return add_builtin_type_custom_data_layer_from_init(
-      *custom_data, stored_type_, element_num, initializer);
+  if (add_builtin_type_custom_data_layer_from_init(
+          *custom_data, stored_type_, element_num, initializer))
+  {
+    update();
+    return true;
+  }
+  return false;
 }
 
 bool BuiltinCustomDataLayerProvider::exists(const void *owner) const
diff --git a/source/blender/blenkernel/intern/curves_geometry.cc b/source/blender/blenkernel/intern/curves_geometry.cc
index ccb67ec91d0..a4faa76d98e 100644
--- a/source/blender/blenkernel/intern/curves_geometry.cc
+++ b/source/blender/blenkernel/intern/curves_geometry.cc
@@ -58,6 +58,8 @@ CurvesGeometry::CurvesGeometry() : CurvesGeometry(0, 0) {}
 
 CurvesGeometry::CurvesGeometry(const int point_num, const int curve_num)
 {
+  this->runtime = MEM_new<CurvesGeometryRuntime>(__func__);
+
   this->point_num = point_num;
   this->curve_num = curve_num;
   CustomData_reset(&this->point_data);
@@ -67,8 +69,6 @@ CurvesGeometry::CurvesGeometry(const int point_num, const int curve_num)
   this->attributes_for_write().add<float3>(
       "position", AttrDomain::Point, AttributeInitConstruct());
 
-  this->runtime = MEM_new<CurvesGeometryRuntime>(__func__);
-
   if (curve_num > 0) {
     this->curve_offsets = static_cast<int *>(
         MEM_malloc_arrayN(this->curve_num + 1, sizeof(int), __func__));
diff --git a/source/blender/blenkernel/intern/pointcloud.cc b/source/blender/blenkernel/intern/pointcloud.cc
index aa4a3c1d58e..141ac807479 100644
--- a/source/blender/blenkernel/intern/pointcloud.cc
+++ b/source/blender/blenkernel/intern/pointcloud.cc
@@ -62,11 +62,11 @@ static void pointcloud_init_data(ID *id)
 
   MEMCPY_STRUCT_AFTER(pointcloud, DNA_struct_default_get(PointCloud), id);
 
+  pointcloud->runtime = new blender::bke::PointCloudRuntime();
+
   CustomData_reset(&pointcloud->pdata);
   pointcloud->attributes_for_write().add<float3>(
       "position", blender::bke::AttrDomain::Point, blender::bke::AttributeInitConstruct());
-
-  pointcloud->runtime = new blender::bke::PointCloudRuntime();
 }
 
 static void pointcloud_copy_data(Main * /*bmain*/,
-- 
2.30.2


From 11afa153618f0d812cf39b4690a30e958cb4a96d Mon Sep 17 00:00:00 2001
From: Hans Goudey <hans@blender.org>
Date: Mon, 25 Mar 2024 15:50:27 -0400
Subject: [PATCH 29/36] Revert "Fix #119508: Missing update after "Shade Flat"
 operator"

This reverts commit 130701763bdb9e104486c9fd789cf128af68d9d5.

73cc27b988420d1ee10aaab808e57947dad55bbd made this unnecessary.
---
 source/blender/editors/object/object_edit.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/source/blender/editors/object/object_edit.cc b/source/blender/editors/object/object_edit.cc
index 93fbd07f300..9805b9fc286 100644
--- a/source/blender/editors/object/object_edit.cc
+++ b/source/blender/editors/object/object_edit.cc
@@ -1613,7 +1613,6 @@ static int shade_smooth_exec(bContext *C, wmOperator *op)
         const float angle = RNA_float_get(op->ptr, "angle");
         bke::mesh_sharp_edges_set_from_angle(mesh, angle, keep_sharp_edges);
       }
-      mesh.tag_sharpness_changed();
       BKE_mesh_batch_cache_dirty_tag(static_cast<Mesh *>(ob->data), BKE_MESH_BATCH_DIRTY_ALL);
       changed = true;
     }
-- 
2.30.2


From 04bcd17dc81835b94e71d3c620124387c9d60ada Mon Sep 17 00:00:00 2001
From: Hans Goudey <hans@blender.org>
Date: Mon, 25 Mar 2024 16:01:36 -0400
Subject: [PATCH 30/36] Cleanup: Remove unnecessary builtin attribute provider
 logic

All builtin attributes are now stored as named attributes, so the old
code path from where they were stored with non-generic types can be
removed. The stored type and attribute type don't have to be tracked
separately anymore either.
---
 .../blenkernel/intern/attribute_access.cc     | 122 ++----------------
 .../intern/attribute_access_intern.hh         |  13 +-
 .../intern/geometry_component_curves.cc       |  16 ---
 .../intern/geometry_component_instances.cc    |   3 -
 .../intern/geometry_component_mesh.cc         |   8 --
 .../intern/geometry_component_pointcloud.cc   |   3 -
 6 files changed, 16 insertions(+), 149 deletions(-)

diff --git a/source/blender/blenkernel/intern/attribute_access.cc b/source/blender/blenkernel/intern/attribute_access.cc
index c1f6354c606..b9e45831562 100644
--- a/source/blender/blenkernel/intern/attribute_access.cc
+++ b/source/blender/blenkernel/intern/attribute_access.cc
@@ -253,54 +253,6 @@ static AttributeIDRef attribute_id_from_custom_data_layer(const CustomDataLayer
   return layer.name;
 }
 
-static bool add_builtin_type_custom_data_layer_from_init(CustomData &custom_data,
-                                                         const eCustomDataType data_type,
-                                                         const int domain_num,
-                                                         const AttributeInit &initializer)
-{
-  switch (initializer.type) {
-    case AttributeInit::Type::Construct: {
-      void *data = CustomData_add_layer(&custom_data, data_type, CD_CONSTRUCT, domain_num);
-      return data != nullptr;
-    }
-    case AttributeInit::Type::DefaultValue: {
-      void *data = CustomData_add_layer(&custom_data, data_type, CD_SET_DEFAULT, domain_num);
-      return data != nullptr;
-    }
-    case AttributeInit::Type::VArray: {
-      void *data = CustomData_add_layer(&custom_data, data_type, CD_CONSTRUCT, domain_num);
-      if (data == nullptr) {
-        return false;
-      }
-      const GVArray &varray = static_cast<const AttributeInitVArray &>(initializer).varray;
-      varray.materialize_to_uninitialized(varray.index_range(), data);
-      return true;
-    }
-    case AttributeInit::Type::MoveArray: {
-      void *src_data = static_cast<const AttributeInitMoveArray &>(initializer).data;
-      const void *stored_data = CustomData_add_layer_with_data(
-          &custom_data, data_type, src_data, domain_num, nullptr);
-      if (stored_data == nullptr) {
-        return false;
-      }
-      if (stored_data != src_data) {
-        MEM_freeN(src_data);
-        return true;
-      }
-      return true;
-    }
-    case AttributeInit::Type::Shared: {
-      const AttributeInitShared &init = static_cast<const AttributeInitShared &>(initializer);
-      const void *stored_data = CustomData_add_layer_with_data(
-          &custom_data, data_type, const_cast<void *>(init.data), domain_num, init.sharing_info);
-      return stored_data != nullptr;
-    }
-  }
-
-  BLI_assert_unreachable();
-  return false;
-}
-
 static void *add_generic_custom_data_layer(CustomData &custom_data,
                                            const eCustomDataType data_type,
                                            const eCDAllocType alloctype,
@@ -393,10 +345,7 @@ static bool custom_data_layer_matches_attribute_id(const CustomDataLayer &layer,
 
 bool BuiltinCustomDataLayerProvider::layer_exists(const CustomData &custom_data) const
 {
-  if (stored_as_named_attribute_) {
-    return CustomData_get_named_layer_index(&custom_data, stored_type_, name_) != -1;
-  }
-  return CustomData_has_layer(&custom_data, stored_type_);
+  return CustomData_get_named_layer_index(&custom_data, data_type_, name_) != -1;
 }
 
 GAttributeReader BuiltinCustomDataLayerProvider::try_get_for_read(const void *owner) const
@@ -416,13 +365,7 @@ GAttributeReader BuiltinCustomDataLayerProvider::try_get_for_read(const void *ow
     return {};
   }
 
-  int index;
-  if (stored_as_named_attribute_) {
-    index = CustomData_get_named_layer_index(custom_data, stored_type_, name_);
-  }
-  else {
-    index = CustomData_get_layer_index(custom_data, stored_type_);
-  }
+  const int index = CustomData_get_named_layer_index(custom_data, data_type_, name_);
   if (index == -1) {
     return {};
   }
@@ -452,13 +395,7 @@ GAttributeWriter BuiltinCustomDataLayerProvider::try_get_for_write(void *owner)
     return {};
   }
 
-  void *data = nullptr;
-  if (stored_as_named_attribute_) {
-    data = CustomData_get_layer_named_for_write(custom_data, stored_type_, name_, element_num);
-  }
-  else {
-    data = CustomData_get_layer_for_write(custom_data, stored_type_, element_num);
-  }
+  void *data = CustomData_get_layer_named_for_write(custom_data, data_type_, name_, element_num);
   if (data == nullptr) {
     return {};
   }
@@ -475,27 +412,13 @@ bool BuiltinCustomDataLayerProvider::try_delete(void *owner) const
     return {};
   }
 
-  auto update = [&]() {
+  const int element_num = custom_data_access_.get_element_num(owner);
+  if (CustomData_free_layer_named(custom_data, name_, element_num)) {
     if (update_on_change_ != nullptr) {
       update_on_change_(owner);
     }
-  };
-
-  const int element_num = custom_data_access_.get_element_num(owner);
-  if (stored_as_named_attribute_) {
-    if (CustomData_free_layer_named(custom_data, name_, element_num)) {
-      update();
-      return true;
-    }
-    return false;
-  }
-
-  const int layer_index = CustomData_get_layer_index(custom_data, stored_type_);
-  if (CustomData_free_layer(custom_data, stored_type_, element_num, layer_index)) {
-    update();
     return true;
   }
-
   return false;
 }
 
@@ -510,35 +433,17 @@ bool BuiltinCustomDataLayerProvider::try_create(void *owner,
     return false;
   }
 
-  auto update = [&]() {
-    if (update_on_change_ != nullptr) {
-      update_on_change_(owner);
-    }
-  };
-
   const int element_num = custom_data_access_.get_element_num(owner);
-  if (stored_as_named_attribute_) {
-    if (CustomData_has_layer_named(custom_data, data_type_, name_)) {
-      /* Exists already. */
-      return false;
-    }
-    if (add_custom_data_layer_from_attribute_init(
-            name_, *custom_data, stored_type_, element_num, initializer))
-    {
-      update();
-      return true;
-    }
-    return false;
-  }
-
-  if (CustomData_get_layer(custom_data, stored_type_) != nullptr) {
+  if (CustomData_has_layer_named(custom_data, data_type_, name_)) {
     /* Exists already. */
     return false;
   }
-  if (add_builtin_type_custom_data_layer_from_init(
-          *custom_data, stored_type_, element_num, initializer))
+  if (add_custom_data_layer_from_attribute_init(
+          name_, *custom_data, data_type_, element_num, initializer))
   {
-    update();
+    if (update_on_change_ != nullptr) {
+      update_on_change_(owner);
+    }
     return true;
   }
   return false;
@@ -550,10 +455,7 @@ bool BuiltinCustomDataLayerProvider::exists(const void *owner) const
   if (custom_data == nullptr) {
     return false;
   }
-  if (stored_as_named_attribute_) {
-    return CustomData_has_layer_named(custom_data, stored_type_, name_);
-  }
-  return CustomData_get_layer(custom_data, stored_type_) != nullptr;
+  return CustomData_has_layer_named(custom_data, data_type_, name_);
 }
 
 GAttributeReader CustomDataAttributeProvider::try_get_for_read(
diff --git a/source/blender/blenkernel/intern/attribute_access_intern.hh b/source/blender/blenkernel/intern/attribute_access_intern.hh
index 663aab7f69c..3f9810bf89f 100644
--- a/source/blender/blenkernel/intern/attribute_access_intern.hh
+++ b/source/blender/blenkernel/intern/attribute_access_intern.hh
@@ -174,27 +174,22 @@ class CustomDataAttributeProvider final : public DynamicAttributesProvider {
  */
 class BuiltinCustomDataLayerProvider final : public BuiltinAttributeProvider {
   using UpdateOnChange = void (*)(void *owner);
-  const eCustomDataType stored_type_;
   const CustomDataAccessInfo custom_data_access_;
   const UpdateOnChange update_on_change_;
-  bool stored_as_named_attribute_;
 
  public:
   BuiltinCustomDataLayerProvider(std::string attribute_name,
                                  const AttrDomain domain,
-                                 const eCustomDataType attribute_type,
-                                 const eCustomDataType stored_type,
+                                 const eCustomDataType data_type,
                                  const CreatableEnum creatable,
                                  const DeletableEnum deletable,
                                  const CustomDataAccessInfo custom_data_access,
-                                 const UpdateOnChange update_on_write,
+                                 const UpdateOnChange update_on_change,
                                  const AttributeValidator validator = {})
       : BuiltinAttributeProvider(
-            std::move(attribute_name), domain, attribute_type, creatable, deletable, validator),
-        stored_type_(stored_type),
+            std::move(attribute_name), domain, data_type, creatable, deletable, validator),
         custom_data_access_(custom_data_access),
-        update_on_change_(update_on_write),
-        stored_as_named_attribute_(data_type_ == stored_type_)
+        update_on_change_(update_on_change)
   {
   }
 
diff --git a/source/blender/blenkernel/intern/geometry_component_curves.cc b/source/blender/blenkernel/intern/geometry_component_curves.cc
index cc59acddf5d..65994dab4dc 100644
--- a/source/blender/blenkernel/intern/geometry_component_curves.cc
+++ b/source/blender/blenkernel/intern/geometry_component_curves.cc
@@ -476,7 +476,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider position("position",
                                                  AttrDomain::Point,
                                                  CD_PROP_FLOAT3,
-                                                 CD_PROP_FLOAT3,
                                                  BuiltinAttributeProvider::Creatable,
                                                  BuiltinAttributeProvider::NonDeletable,
                                                  point_access,
@@ -485,7 +484,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider radius("radius",
                                                AttrDomain::Point,
                                                CD_PROP_FLOAT,
-                                               CD_PROP_FLOAT,
                                                BuiltinAttributeProvider::Creatable,
                                                BuiltinAttributeProvider::Deletable,
                                                point_access,
@@ -494,7 +492,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider id("id",
                                            AttrDomain::Point,
                                            CD_PROP_INT32,
-                                           CD_PROP_INT32,
                                            BuiltinAttributeProvider::Creatable,
                                            BuiltinAttributeProvider::Deletable,
                                            point_access,
@@ -503,7 +500,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider tilt("tilt",
                                              AttrDomain::Point,
                                              CD_PROP_FLOAT,
-                                             CD_PROP_FLOAT,
                                              BuiltinAttributeProvider::Creatable,
                                              BuiltinAttributeProvider::Deletable,
                                              point_access,
@@ -512,7 +508,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider handle_right("handle_right",
                                                      AttrDomain::Point,
                                                      CD_PROP_FLOAT3,
-                                                     CD_PROP_FLOAT3,
                                                      BuiltinAttributeProvider::Creatable,
                                                      BuiltinAttributeProvider::Deletable,
                                                      point_access,
@@ -521,7 +516,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider handle_left("handle_left",
                                                     AttrDomain::Point,
                                                     CD_PROP_FLOAT3,
-                                                    CD_PROP_FLOAT3,
                                                     BuiltinAttributeProvider::Creatable,
                                                     BuiltinAttributeProvider::Deletable,
                                                     point_access,
@@ -536,7 +530,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider handle_type_right("handle_type_right",
                                                           AttrDomain::Point,
                                                           CD_PROP_INT8,
-                                                          CD_PROP_INT8,
                                                           BuiltinAttributeProvider::Creatable,
                                                           BuiltinAttributeProvider::Deletable,
                                                           point_access,
@@ -546,7 +539,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider handle_type_left("handle_type_left",
                                                          AttrDomain::Point,
                                                          CD_PROP_INT8,
-                                                         CD_PROP_INT8,
                                                          BuiltinAttributeProvider::Creatable,
                                                          BuiltinAttributeProvider::Deletable,
                                                          point_access,
@@ -556,7 +548,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider nurbs_weight("nurbs_weight",
                                                      AttrDomain::Point,
                                                      CD_PROP_FLOAT,
-                                                     CD_PROP_FLOAT,
                                                      BuiltinAttributeProvider::Creatable,
                                                      BuiltinAttributeProvider::Deletable,
                                                      point_access,
@@ -569,7 +560,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider nurbs_order("nurbs_order",
                                                     AttrDomain::Curve,
                                                     CD_PROP_INT8,
-                                                    CD_PROP_INT8,
                                                     BuiltinAttributeProvider::Creatable,
                                                     BuiltinAttributeProvider::Deletable,
                                                     curve_access,
@@ -585,7 +575,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider normal_mode("normal_mode",
                                                     AttrDomain::Curve,
                                                     CD_PROP_INT8,
-                                                    CD_PROP_INT8,
                                                     BuiltinAttributeProvider::Creatable,
                                                     BuiltinAttributeProvider::Deletable,
                                                     curve_access,
@@ -595,7 +584,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider custom_normal("custom_normal",
                                                       AttrDomain::Point,
                                                       CD_PROP_FLOAT3,
-                                                      CD_PROP_FLOAT3,
                                                       BuiltinAttributeProvider::Creatable,
                                                       BuiltinAttributeProvider::Deletable,
                                                       point_access,
@@ -610,7 +598,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider nurbs_knots_mode("knots_mode",
                                                          AttrDomain::Curve,
                                                          CD_PROP_INT8,
-                                                         CD_PROP_INT8,
                                                          BuiltinAttributeProvider::Creatable,
                                                          BuiltinAttributeProvider::Deletable,
                                                          curve_access,
@@ -626,7 +613,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider curve_type("curve_type",
                                                    AttrDomain::Curve,
                                                    CD_PROP_INT8,
-                                                   CD_PROP_INT8,
                                                    BuiltinAttributeProvider::Creatable,
                                                    BuiltinAttributeProvider::Deletable,
                                                    curve_access,
@@ -640,7 +626,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider resolution("resolution",
                                                    AttrDomain::Curve,
                                                    CD_PROP_INT32,
-                                                   CD_PROP_INT32,
                                                    BuiltinAttributeProvider::Creatable,
                                                    BuiltinAttributeProvider::Deletable,
                                                    curve_access,
@@ -650,7 +635,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider cyclic("cyclic",
                                                AttrDomain::Curve,
                                                CD_PROP_BOOL,
-                                               CD_PROP_BOOL,
                                                BuiltinAttributeProvider::Creatable,
                                                BuiltinAttributeProvider::Deletable,
                                                curve_access,
diff --git a/source/blender/blenkernel/intern/geometry_component_instances.cc b/source/blender/blenkernel/intern/geometry_component_instances.cc
index 8137526d7f8..3123a9f91f5 100644
--- a/source/blender/blenkernel/intern/geometry_component_instances.cc
+++ b/source/blender/blenkernel/intern/geometry_component_instances.cc
@@ -133,7 +133,6 @@ static ComponentAttributeProviders create_attribute_providers_for_instances()
   static BuiltinCustomDataLayerProvider id("id",
                                            AttrDomain::Instance,
                                            CD_PROP_INT32,
-                                           CD_PROP_INT32,
                                            BuiltinAttributeProvider::Creatable,
                                            BuiltinAttributeProvider::Deletable,
                                            instance_custom_data_access,
@@ -142,7 +141,6 @@ static ComponentAttributeProviders create_attribute_providers_for_instances()
   static BuiltinCustomDataLayerProvider instance_transform("instance_transform",
                                                            AttrDomain::Instance,
                                                            CD_PROP_FLOAT4X4,
-                                                           CD_PROP_FLOAT4X4,
                                                            BuiltinAttributeProvider::Creatable,
                                                            BuiltinAttributeProvider::NonDeletable,
                                                            instance_custom_data_access,
@@ -152,7 +150,6 @@ static ComponentAttributeProviders create_attribute_providers_for_instances()
   static BuiltinCustomDataLayerProvider reference_index(".reference_index",
                                                         AttrDomain::Instance,
                                                         CD_PROP_INT32,
-                                                        CD_PROP_INT32,
                                                         BuiltinAttributeProvider::Creatable,
                                                         BuiltinAttributeProvider::NonDeletable,
                                                         instance_custom_data_access,
diff --git a/source/blender/blenkernel/intern/geometry_component_mesh.cc b/source/blender/blenkernel/intern/geometry_component_mesh.cc
index 9486615ed4f..93888867e63 100644
--- a/source/blender/blenkernel/intern/geometry_component_mesh.cc
+++ b/source/blender/blenkernel/intern/geometry_component_mesh.cc
@@ -1010,7 +1010,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider position("position",
                                                  AttrDomain::Point,
                                                  CD_PROP_FLOAT3,
-                                                 CD_PROP_FLOAT3,
                                                  BuiltinAttributeProvider::Creatable,
                                                  BuiltinAttributeProvider::NonDeletable,
                                                  point_access,
@@ -1019,7 +1018,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider id("id",
                                            AttrDomain::Point,
                                            CD_PROP_INT32,
-                                           CD_PROP_INT32,
                                            BuiltinAttributeProvider::Creatable,
                                            BuiltinAttributeProvider::Deletable,
                                            point_access,
@@ -1035,7 +1033,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider material_index("material_index",
                                                        AttrDomain::Face,
                                                        CD_PROP_INT32,
-                                                       CD_PROP_INT32,
                                                        BuiltinAttributeProvider::Creatable,
                                                        BuiltinAttributeProvider::Deletable,
                                                        face_access,
@@ -1049,7 +1046,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider edge_verts(".edge_verts",
                                                    AttrDomain::Edge,
                                                    CD_PROP_INT32_2D,
-                                                   CD_PROP_INT32_2D,
                                                    BuiltinAttributeProvider::Creatable,
                                                    BuiltinAttributeProvider::NonDeletable,
                                                    edge_access,
@@ -1065,7 +1061,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider corner_vert(".corner_vert",
                                                     AttrDomain::Corner,
                                                     CD_PROP_INT32,
-                                                    CD_PROP_INT32,
                                                     BuiltinAttributeProvider::Creatable,
                                                     BuiltinAttributeProvider::NonDeletable,
                                                     corner_access,
@@ -1074,7 +1069,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider corner_edge(".corner_edge",
                                                     AttrDomain::Corner,
                                                     CD_PROP_INT32,
-                                                    CD_PROP_INT32,
                                                     BuiltinAttributeProvider::Creatable,
                                                     BuiltinAttributeProvider::NonDeletable,
                                                     corner_access,
@@ -1084,7 +1078,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider sharp_face("sharp_face",
                                                    AttrDomain::Face,
                                                    CD_PROP_BOOL,
-                                                   CD_PROP_BOOL,
                                                    BuiltinAttributeProvider::Creatable,
                                                    BuiltinAttributeProvider::Deletable,
                                                    face_access,
@@ -1093,7 +1086,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider sharp_edge("sharp_edge",
                                                    AttrDomain::Edge,
                                                    CD_PROP_BOOL,
-                                                   CD_PROP_BOOL,
                                                    BuiltinAttributeProvider::Creatable,
                                                    BuiltinAttributeProvider::Deletable,
                                                    edge_access,
diff --git a/source/blender/blenkernel/intern/geometry_component_pointcloud.cc b/source/blender/blenkernel/intern/geometry_component_pointcloud.cc
index 03a8ee2521d..996d915855d 100644
--- a/source/blender/blenkernel/intern/geometry_component_pointcloud.cc
+++ b/source/blender/blenkernel/intern/geometry_component_pointcloud.cc
@@ -147,7 +147,6 @@ static ComponentAttributeProviders create_attribute_providers_for_point_cloud()
   static BuiltinCustomDataLayerProvider position("position",
                                                  AttrDomain::Point,
                                                  CD_PROP_FLOAT3,
-                                                 CD_PROP_FLOAT3,
                                                  BuiltinAttributeProvider::Creatable,
                                                  BuiltinAttributeProvider::NonDeletable,
                                                  point_access,
@@ -155,7 +154,6 @@ static ComponentAttributeProviders create_attribute_providers_for_point_cloud()
   static BuiltinCustomDataLayerProvider radius("radius",
                                                AttrDomain::Point,
                                                CD_PROP_FLOAT,
-                                               CD_PROP_FLOAT,
                                                BuiltinAttributeProvider::Creatable,
                                                BuiltinAttributeProvider::Deletable,
                                                point_access,
@@ -163,7 +161,6 @@ static ComponentAttributeProviders create_attribute_providers_for_point_cloud()
   static BuiltinCustomDataLayerProvider id("id",
                                            AttrDomain::Point,
                                            CD_PROP_INT32,
-                                           CD_PROP_INT32,
                                            BuiltinAttributeProvider::Creatable,
                                            BuiltinAttributeProvider::Deletable,
                                            point_access,
-- 
2.30.2


From 40a5e739e2701ee7434dcbd8acf6b740622ddf50 Mon Sep 17 00:00:00 2001
From: Hans Goudey <hans@blender.org>
Date: Mon, 25 Mar 2024 21:42:07 +0100
Subject: [PATCH 31/36] Fix #119873: Bake node crash with instanced light
 objects

4d0936c7d74bee32cd50 explicitly avoids turning non-geometry object
instances into geometry instances. This code is called to prepare
geometry sets for baking, and baking currently assumes that the baked
instances are always geometry sets.

To fix this, just check the instance type and serialize an empty
geometry set for the crashing object instance case. Compared to before
the crash causing commit, there is no change in behavior, since that
would have created empty geometry sets too.

Pull Request: https://projects.blender.org/blender/blender/pulls/119892
---
 .../blender/blenkernel/intern/bake_items_serialize.cc | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/source/blender/blenkernel/intern/bake_items_serialize.cc b/source/blender/blenkernel/intern/bake_items_serialize.cc
index 28edca63a09..4bc745f4233 100644
--- a/source/blender/blenkernel/intern/bake_items_serialize.cc
+++ b/source/blender/blenkernel/intern/bake_items_serialize.cc
@@ -968,9 +968,14 @@ static std::shared_ptr<DictionaryValue> serialize_geometry_set(const GeometrySet
 
     auto io_references = io_instances->append_array("references");
     for (const InstanceReference &reference : instances.references()) {
-      BLI_assert(reference.type() == InstanceReference::Type::GeometrySet);
-      io_references->append(
-          serialize_geometry_set(reference.geometry_set(), blob_writer, blob_sharing));
+      if (reference.type() == InstanceReference::Type::GeometrySet) {
+        const GeometrySet &geometry = reference.geometry_set();
+        io_references->append(serialize_geometry_set(geometry, blob_writer, blob_sharing));
+      }
+      else {
+        /* TODO: Support serializing object and collection references. */
+        io_references->append(serialize_geometry_set({}, blob_writer, blob_sharing));
+      }
     }
 
     io_instances->append(
-- 
2.30.2


From aa9bfc1b2af65286746472c1dbebdb56d28d64b8 Mon Sep 17 00:00:00 2001
From: Hans Goudey <hans@blender.org>
Date: Mon, 25 Mar 2024 16:58:21 -0400
Subject: [PATCH 32/36] Fix: Uninitialized attribute memory read after recent
 commit

update_on_change_ shouldn't be called when creating an attribute but
not setting the array values. In that case it is UB to not set the values
elsewhere anyway, and that will cause its own update tag.
---
 source/blender/blenkernel/intern/attribute_access.cc | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/source/blender/blenkernel/intern/attribute_access.cc b/source/blender/blenkernel/intern/attribute_access.cc
index b9e45831562..3bd191a9548 100644
--- a/source/blender/blenkernel/intern/attribute_access.cc
+++ b/source/blender/blenkernel/intern/attribute_access.cc
@@ -441,8 +441,12 @@ bool BuiltinCustomDataLayerProvider::try_create(void *owner,
   if (add_custom_data_layer_from_attribute_init(
           name_, *custom_data, data_type_, element_num, initializer))
   {
-    if (update_on_change_ != nullptr) {
-      update_on_change_(owner);
+    if (initializer.type != AttributeInit::Type::Construct) {
+      /* Avoid calling update function when values are not initialized. In that case
+       * values must be set elsewhere anyway, which will cause a separate update tag. */
+      if (update_on_change_ != nullptr) {
+        update_on_change_(owner);
+      }
     }
     return true;
   }
-- 
2.30.2


From ccb416322cc727250656a3c29f483b7b48a1de07 Mon Sep 17 00:00:00 2001
From: Hans Goudey <hans@blender.org>
Date: Mon, 25 Mar 2024 16:58:29 -0400
Subject: [PATCH 33/36] UI: Add XYZ labels to Set Curve Normal node vector
 input

---
 .../nodes/geometry/nodes/node_geo_set_curve_normal.cc        | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/source/blender/nodes/geometry/nodes/node_geo_set_curve_normal.cc b/source/blender/nodes/geometry/nodes/node_geo_set_curve_normal.cc
index d97c8561b69..ded077ee86f 100644
--- a/source/blender/nodes/geometry/nodes/node_geo_set_curve_normal.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_set_curve_normal.cc
@@ -21,7 +21,10 @@ static void node_declare(NodeDeclarationBuilder &b)
   b.add_input<decl::Geometry>("Curve").supported_type(
       {GeometryComponent::Type::Curve, GeometryComponent::Type::GreasePencil});
   b.add_input<decl::Bool>("Selection").default_value(true).hide_value().field_on_all();
-  b.add_input<decl::Vector>("Normal").default_value({0.0f, 0.0f, 1.0f}).field_on_all();
+  b.add_input<decl::Vector>("Normal")
+      .default_value({0.0f, 0.0f, 1.0f})
+      .subtype(PROP_XYZ)
+      .field_on_all();
   b.add_output<decl::Geometry>("Curve").propagate_all();
 }
 
-- 
2.30.2


From ef22826a3cf755de12dceface7751e75460d3c5b Mon Sep 17 00:00:00 2001
From: Hans Goudey <hans@blender.org>
Date: Mon, 25 Mar 2024 17:05:39 -0400
Subject: [PATCH 34/36] Cleanup: Remove non-creatable option for builtin
 attributes

This was necessary when attributes were stored embedded in legacy
structs like `MPoly`. Nowadays that isn't the case anymore, and there
doesn't seem to be a reason to restrict the creation of attributes.
---
 .../blenkernel/intern/attribute_access.cc        |  3 ---
 .../blenkernel/intern/attribute_access_intern.hh | 14 +++-----------
 .../intern/geometry_component_curves.cc          | 16 ----------------
 .../intern/geometry_component_instances.cc       |  3 ---
 .../blenkernel/intern/geometry_component_mesh.cc |  8 --------
 .../intern/geometry_component_pointcloud.cc      |  3 ---
 6 files changed, 3 insertions(+), 44 deletions(-)

diff --git a/source/blender/blenkernel/intern/attribute_access.cc b/source/blender/blenkernel/intern/attribute_access.cc
index 3bd191a9548..f69184175a3 100644
--- a/source/blender/blenkernel/intern/attribute_access.cc
+++ b/source/blender/blenkernel/intern/attribute_access.cc
@@ -425,9 +425,6 @@ bool BuiltinCustomDataLayerProvider::try_delete(void *owner) const
 bool BuiltinCustomDataLayerProvider::try_create(void *owner,
                                                 const AttributeInit &initializer) const
 {
-  if (createable_ != Creatable) {
-    return false;
-  }
   CustomData *custom_data = custom_data_access_.get_custom_data(owner);
   if (custom_data == nullptr) {
     return false;
diff --git a/source/blender/blenkernel/intern/attribute_access_intern.hh b/source/blender/blenkernel/intern/attribute_access_intern.hh
index 3f9810bf89f..ae372a00089 100644
--- a/source/blender/blenkernel/intern/attribute_access_intern.hh
+++ b/source/blender/blenkernel/intern/attribute_access_intern.hh
@@ -31,14 +31,10 @@ struct CustomDataAccessInfo {
  * A #BuiltinAttributeProvider is responsible for exactly one attribute on a geometry component.
  * The attribute is identified by its name and has a fixed domain and type. Builtin attributes do
  * not follow the same loose rules as other attributes, because they are mapped to internal
- * "legacy" data structures. For example, some builtin attributes cannot be deleted. */
+ * "legacy" data structures. For example, some builtin attributes cannot be deleted.
+ */
 class BuiltinAttributeProvider {
  public:
-  /* Some utility enums to avoid hard to read booleans in function calls. */
-  enum CreatableEnum {
-    Creatable,
-    NonCreatable,
-  };
   enum DeletableEnum {
     Deletable,
     NonDeletable,
@@ -48,7 +44,6 @@ class BuiltinAttributeProvider {
   const std::string name_;
   const AttrDomain domain_;
   const eCustomDataType data_type_;
-  const CreatableEnum createable_;
   const DeletableEnum deletable_;
   const AttributeValidator validator_;
 
@@ -56,13 +51,11 @@ class BuiltinAttributeProvider {
   BuiltinAttributeProvider(std::string name,
                            const AttrDomain domain,
                            const eCustomDataType data_type,
-                           const CreatableEnum createable,
                            const DeletableEnum deletable,
                            AttributeValidator validator = {})
       : name_(std::move(name)),
         domain_(domain),
         data_type_(data_type),
-        createable_(createable),
         deletable_(deletable),
         validator_(validator)
   {
@@ -181,13 +174,12 @@ class BuiltinCustomDataLayerProvider final : public BuiltinAttributeProvider {
   BuiltinCustomDataLayerProvider(std::string attribute_name,
                                  const AttrDomain domain,
                                  const eCustomDataType data_type,
-                                 const CreatableEnum creatable,
                                  const DeletableEnum deletable,
                                  const CustomDataAccessInfo custom_data_access,
                                  const UpdateOnChange update_on_change,
                                  const AttributeValidator validator = {})
       : BuiltinAttributeProvider(
-            std::move(attribute_name), domain, data_type, creatable, deletable, validator),
+            std::move(attribute_name), domain, data_type, deletable, validator),
         custom_data_access_(custom_data_access),
         update_on_change_(update_on_change)
   {
diff --git a/source/blender/blenkernel/intern/geometry_component_curves.cc b/source/blender/blenkernel/intern/geometry_component_curves.cc
index 65994dab4dc..a59e14fe566 100644
--- a/source/blender/blenkernel/intern/geometry_component_curves.cc
+++ b/source/blender/blenkernel/intern/geometry_component_curves.cc
@@ -476,7 +476,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider position("position",
                                                  AttrDomain::Point,
                                                  CD_PROP_FLOAT3,
-                                                 BuiltinAttributeProvider::Creatable,
                                                  BuiltinAttributeProvider::NonDeletable,
                                                  point_access,
                                                  tag_component_positions_changed);
@@ -484,7 +483,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider radius("radius",
                                                AttrDomain::Point,
                                                CD_PROP_FLOAT,
-                                               BuiltinAttributeProvider::Creatable,
                                                BuiltinAttributeProvider::Deletable,
                                                point_access,
                                                tag_component_radii_changed);
@@ -492,7 +490,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider id("id",
                                            AttrDomain::Point,
                                            CD_PROP_INT32,
-                                           BuiltinAttributeProvider::Creatable,
                                            BuiltinAttributeProvider::Deletable,
                                            point_access,
                                            nullptr);
@@ -500,7 +497,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider tilt("tilt",
                                              AttrDomain::Point,
                                              CD_PROP_FLOAT,
-                                             BuiltinAttributeProvider::Creatable,
                                              BuiltinAttributeProvider::Deletable,
                                              point_access,
                                              tag_component_normals_changed);
@@ -508,7 +504,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider handle_right("handle_right",
                                                      AttrDomain::Point,
                                                      CD_PROP_FLOAT3,
-                                                     BuiltinAttributeProvider::Creatable,
                                                      BuiltinAttributeProvider::Deletable,
                                                      point_access,
                                                      tag_component_positions_changed);
@@ -516,7 +511,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider handle_left("handle_left",
                                                     AttrDomain::Point,
                                                     CD_PROP_FLOAT3,
-                                                    BuiltinAttributeProvider::Creatable,
                                                     BuiltinAttributeProvider::Deletable,
                                                     point_access,
                                                     tag_component_positions_changed);
@@ -530,7 +524,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider handle_type_right("handle_type_right",
                                                           AttrDomain::Point,
                                                           CD_PROP_INT8,
-                                                          BuiltinAttributeProvider::Creatable,
                                                           BuiltinAttributeProvider::Deletable,
                                                           point_access,
                                                           tag_component_topology_changed,
@@ -539,7 +532,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider handle_type_left("handle_type_left",
                                                          AttrDomain::Point,
                                                          CD_PROP_INT8,
-                                                         BuiltinAttributeProvider::Creatable,
                                                          BuiltinAttributeProvider::Deletable,
                                                          point_access,
                                                          tag_component_topology_changed,
@@ -548,7 +540,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider nurbs_weight("nurbs_weight",
                                                      AttrDomain::Point,
                                                      CD_PROP_FLOAT,
-                                                     BuiltinAttributeProvider::Creatable,
                                                      BuiltinAttributeProvider::Deletable,
                                                      point_access,
                                                      tag_component_positions_changed);
@@ -560,7 +551,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider nurbs_order("nurbs_order",
                                                     AttrDomain::Curve,
                                                     CD_PROP_INT8,
-                                                    BuiltinAttributeProvider::Creatable,
                                                     BuiltinAttributeProvider::Deletable,
                                                     curve_access,
                                                     tag_component_topology_changed,
@@ -575,7 +565,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider normal_mode("normal_mode",
                                                     AttrDomain::Curve,
                                                     CD_PROP_INT8,
-                                                    BuiltinAttributeProvider::Creatable,
                                                     BuiltinAttributeProvider::Deletable,
                                                     curve_access,
                                                     tag_component_normals_changed,
@@ -584,7 +573,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider custom_normal("custom_normal",
                                                       AttrDomain::Point,
                                                       CD_PROP_FLOAT3,
-                                                      BuiltinAttributeProvider::Creatable,
                                                       BuiltinAttributeProvider::Deletable,
                                                       point_access,
                                                       tag_component_normals_changed);
@@ -598,7 +586,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider nurbs_knots_mode("knots_mode",
                                                          AttrDomain::Curve,
                                                          CD_PROP_INT8,
-                                                         BuiltinAttributeProvider::Creatable,
                                                          BuiltinAttributeProvider::Deletable,
                                                          curve_access,
                                                          tag_component_topology_changed,
@@ -613,7 +600,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider curve_type("curve_type",
                                                    AttrDomain::Curve,
                                                    CD_PROP_INT8,
-                                                   BuiltinAttributeProvider::Creatable,
                                                    BuiltinAttributeProvider::Deletable,
                                                    curve_access,
                                                    tag_component_curve_types_changed,
@@ -626,7 +612,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider resolution("resolution",
                                                    AttrDomain::Curve,
                                                    CD_PROP_INT32,
-                                                   BuiltinAttributeProvider::Creatable,
                                                    BuiltinAttributeProvider::Deletable,
                                                    curve_access,
                                                    tag_component_topology_changed,
@@ -635,7 +620,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider cyclic("cyclic",
                                                AttrDomain::Curve,
                                                CD_PROP_BOOL,
-                                               BuiltinAttributeProvider::Creatable,
                                                BuiltinAttributeProvider::Deletable,
                                                curve_access,
                                                tag_component_topology_changed);
diff --git a/source/blender/blenkernel/intern/geometry_component_instances.cc b/source/blender/blenkernel/intern/geometry_component_instances.cc
index 3123a9f91f5..c3a4892ad57 100644
--- a/source/blender/blenkernel/intern/geometry_component_instances.cc
+++ b/source/blender/blenkernel/intern/geometry_component_instances.cc
@@ -133,7 +133,6 @@ static ComponentAttributeProviders create_attribute_providers_for_instances()
   static BuiltinCustomDataLayerProvider id("id",
                                            AttrDomain::Instance,
                                            CD_PROP_INT32,
-                                           BuiltinAttributeProvider::Creatable,
                                            BuiltinAttributeProvider::Deletable,
                                            instance_custom_data_access,
                                            nullptr);
@@ -141,7 +140,6 @@ static ComponentAttributeProviders create_attribute_providers_for_instances()
   static BuiltinCustomDataLayerProvider instance_transform("instance_transform",
                                                            AttrDomain::Instance,
                                                            CD_PROP_FLOAT4X4,
-                                                           BuiltinAttributeProvider::Creatable,
                                                            BuiltinAttributeProvider::NonDeletable,
                                                            instance_custom_data_access,
                                                            nullptr);
@@ -150,7 +148,6 @@ static ComponentAttributeProviders create_attribute_providers_for_instances()
   static BuiltinCustomDataLayerProvider reference_index(".reference_index",
                                                         AttrDomain::Instance,
                                                         CD_PROP_INT32,
-                                                        BuiltinAttributeProvider::Creatable,
                                                         BuiltinAttributeProvider::NonDeletable,
                                                         instance_custom_data_access,
                                                         tag_component_reference_index_changed);
diff --git a/source/blender/blenkernel/intern/geometry_component_mesh.cc b/source/blender/blenkernel/intern/geometry_component_mesh.cc
index 93888867e63..e649853123f 100644
--- a/source/blender/blenkernel/intern/geometry_component_mesh.cc
+++ b/source/blender/blenkernel/intern/geometry_component_mesh.cc
@@ -1010,7 +1010,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider position("position",
                                                  AttrDomain::Point,
                                                  CD_PROP_FLOAT3,
-                                                 BuiltinAttributeProvider::Creatable,
                                                  BuiltinAttributeProvider::NonDeletable,
                                                  point_access,
                                                  tag_component_positions_changed);
@@ -1018,7 +1017,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider id("id",
                                            AttrDomain::Point,
                                            CD_PROP_INT32,
-                                           BuiltinAttributeProvider::Creatable,
                                            BuiltinAttributeProvider::Deletable,
                                            point_access,
                                            nullptr);
@@ -1033,7 +1031,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider material_index("material_index",
                                                        AttrDomain::Face,
                                                        CD_PROP_INT32,
-                                                       BuiltinAttributeProvider::Creatable,
                                                        BuiltinAttributeProvider::Deletable,
                                                        face_access,
                                                        nullptr,
@@ -1046,7 +1043,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider edge_verts(".edge_verts",
                                                    AttrDomain::Edge,
                                                    CD_PROP_INT32_2D,
-                                                   BuiltinAttributeProvider::Creatable,
                                                    BuiltinAttributeProvider::NonDeletable,
                                                    edge_access,
                                                    nullptr,
@@ -1061,7 +1057,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider corner_vert(".corner_vert",
                                                     AttrDomain::Corner,
                                                     CD_PROP_INT32,
-                                                    BuiltinAttributeProvider::Creatable,
                                                     BuiltinAttributeProvider::NonDeletable,
                                                     corner_access,
                                                     nullptr,
@@ -1069,7 +1064,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider corner_edge(".corner_edge",
                                                     AttrDomain::Corner,
                                                     CD_PROP_INT32,
-                                                    BuiltinAttributeProvider::Creatable,
                                                     BuiltinAttributeProvider::NonDeletable,
                                                     corner_access,
                                                     nullptr,
@@ -1078,7 +1072,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider sharp_face("sharp_face",
                                                    AttrDomain::Face,
                                                    CD_PROP_BOOL,
-                                                   BuiltinAttributeProvider::Creatable,
                                                    BuiltinAttributeProvider::Deletable,
                                                    face_access,
                                                    tag_component_sharpness_changed);
@@ -1086,7 +1079,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider sharp_edge("sharp_edge",
                                                    AttrDomain::Edge,
                                                    CD_PROP_BOOL,
-                                                   BuiltinAttributeProvider::Creatable,
                                                    BuiltinAttributeProvider::Deletable,
                                                    edge_access,
                                                    tag_component_sharpness_changed);
diff --git a/source/blender/blenkernel/intern/geometry_component_pointcloud.cc b/source/blender/blenkernel/intern/geometry_component_pointcloud.cc
index 996d915855d..4895f2d4323 100644
--- a/source/blender/blenkernel/intern/geometry_component_pointcloud.cc
+++ b/source/blender/blenkernel/intern/geometry_component_pointcloud.cc
@@ -147,21 +147,18 @@ static ComponentAttributeProviders create_attribute_providers_for_point_cloud()
   static BuiltinCustomDataLayerProvider position("position",
                                                  AttrDomain::Point,
                                                  CD_PROP_FLOAT3,
-                                                 BuiltinAttributeProvider::Creatable,
                                                  BuiltinAttributeProvider::NonDeletable,
                                                  point_access,
                                                  tag_component_positions_changed);
   static BuiltinCustomDataLayerProvider radius("radius",
                                                AttrDomain::Point,
                                                CD_PROP_FLOAT,
-                                               BuiltinAttributeProvider::Creatable,
                                                BuiltinAttributeProvider::Deletable,
                                                point_access,
                                                tag_component_radius_changed);
   static BuiltinCustomDataLayerProvider id("id",
                                            AttrDomain::Point,
                                            CD_PROP_INT32,
-                                           BuiltinAttributeProvider::Creatable,
                                            BuiltinAttributeProvider::Deletable,
                                            point_access,
                                            nullptr);
-- 
2.30.2


From 8bdb437601047e2fefc583d19d4d160cf5a12000 Mon Sep 17 00:00:00 2001
From: David-Haver <davidnhaver@gmail.com>
Date: Mon, 25 Mar 2024 23:41:20 +0200
Subject: [PATCH 35/36] inital commit for the new branch

---
 .../blender/geometry/GEO_join_geometries.hh   |  4 +-
 .../blender/geometry/GEO_realize_instances.hh | 12 +---
 .../geometry/intern/join_geometries.cc        |  6 +-
 .../geometry/intern/realize_instances.cc      | 60 +++++++++----------
 .../nodes/node_geo_realize_instances.cc       | 10 ++--
 5 files changed, 40 insertions(+), 52 deletions(-)

diff --git a/source/blender/geometry/GEO_join_geometries.hh b/source/blender/geometry/GEO_join_geometries.hh
index 8003cf5028b..0bdb179c5db 100644
--- a/source/blender/geometry/GEO_join_geometries.hh
+++ b/source/blender/geometry/GEO_join_geometries.hh
@@ -13,6 +13,6 @@ bke::GeometrySet join_geometries(Span<bke::GeometrySet> geometries,
                                  const bke::AnonymousAttributePropagationInfo &propagation_info);
 
 void join_attributes(Span<const bke::GeometryComponent *> src_components,
-                     bke::GeometryComponent &r_result,
-                     Span<StringRef> ignored_attributes = {});
+                     Span<StringRef> ignored_attributes,
+                     bke::GeometryComponent &r_result);
 }  // namespace blender::geometry
diff --git a/source/blender/geometry/GEO_realize_instances.hh b/source/blender/geometry/GEO_realize_instances.hh
index 08a0d8a6da1..fe69ecddb5a 100644
--- a/source/blender/geometry/GEO_realize_instances.hh
+++ b/source/blender/geometry/GEO_realize_instances.hh
@@ -62,16 +62,8 @@ bke::GeometrySet realize_instances(bke::GeometrySet geometry_set,
                                    const RealizeInstancesOptions &options);
 
 /**
- * Join all instances into a single geometry component for each geometry type. For example, all
- * mesh instances (including the already realized mesh) are joined into a single mesh. The output
- * geometry set does not contain any instances. If the input did not contain any instances, it is
- * returned directly.
- *
- * The `id` attribute has special handling. If there is an id attribute on any component, the
- * output will contain an `id` attribute as well. The output id is generated by mixing/hashing ids
- * of instances and of the instanced geometry data.
- *
- * Will realize only the instances chosen by varied_depth_option to there chosen depth.
+ * Same #realize_instances but will realize only the instances chosen by
+ * varied_depth_option to there chosen depth.
  */
 bke::GeometrySet realize_instances(bke::GeometrySet geometry_set,
                                    const RealizeInstancesOptions &options,
diff --git a/source/blender/geometry/intern/join_geometries.cc b/source/blender/geometry/intern/join_geometries.cc
index 92912e9fa1a..1b1ac578dc8 100644
--- a/source/blender/geometry/intern/join_geometries.cc
+++ b/source/blender/geometry/intern/join_geometries.cc
@@ -74,8 +74,8 @@ static void fill_new_attribute(const Span<const GeometryComponent *> src_compone
 }
 
 void join_attributes(const Span<const GeometryComponent *> src_components,
-                     GeometryComponent &r_result,
-                     const Span<StringRef> ignored_attributes)
+                     const Span<StringRef> ignored_attributes,
+                     GeometryComponent &r_result)
 {
   const Map<AttributeIDRef, AttributeMetaData> info = get_final_attribute_info(src_components,
                                                                                ignored_attributes);
@@ -129,7 +129,7 @@ static void join_instances(const Span<const GeometryComponent *> src_components,
 
   r_result.replace_instances(dst_instances.release());
   auto &dst_component = r_result.get_component_for_write<bke::InstancesComponent>();
-  join_attributes(src_components, dst_component, {".reference_index"});
+  join_attributes(src_components, {".reference_index"}, dst_component);
 }
 
 static void join_volumes(const Span<const GeometryComponent *> /*src_components*/,
diff --git a/source/blender/geometry/intern/realize_instances.cc b/source/blender/geometry/intern/realize_instances.cc
index 45b6d3c1285..27a735a5516 100644
--- a/source/blender/geometry/intern/realize_instances.cc
+++ b/source/blender/geometry/intern/realize_instances.cc
@@ -298,7 +298,7 @@ struct InstanceContext {
         curves(gather_info.curves.attributes.size()),
         instances(gather_info.instances_attriubutes.size())
   {
-    //empty
+    // empty
   }
 };
 
@@ -753,7 +753,7 @@ static void gather_realize_tasks_recursive(GatherTasksInfo &gather_info,
  * is an instance, the condition is true only when the depth is exactly 0. Additionally, the
  * function extends its operation to instances if any of their nested children meet the first
  * condition.
- * 
+ *
  * Based on bke::GeometrySet::attribute_foreach
  */
 static bool attribute_foreach(const bke::GeometrySet &geometry_set,
@@ -823,12 +823,11 @@ static bool attribute_foreach(const bke::GeometrySet &geometry_set,
   return is_relevant;
 }
 
-
 /**
  * Based on bke::GeometrySet::gather_attributes_for_propagation.
  * Specialized for Specialized attribute_foreach to get:
  * current_depth, depth_target, instance_depth and selection.
-*/
+ */
 void static gather_attributes_for_propagation(
     bke::GeometrySet re_geometry_set,
     const Span<bke::GeometryComponent::Type> component_types,
@@ -869,7 +868,8 @@ void static gather_attributes_for_propagation(
 
                       AttrDomain domain = meta_data.domain;
                       if (dst_component_type != bke::GeometryComponent::Type::Instance &&
-                          domain == AttrDomain::Instance) {
+                          domain == AttrDomain::Instance)
+                      {
                         domain = AttrDomain::Point;
                       }
 
@@ -983,7 +983,7 @@ static void execute_instances_tasks(
         continue;
       }
 
-      const void *attribute_ptr;  
+      const void *attribute_ptr;
       if (attribute_fallback_array[attribute_index] != nullptr) {
         attribute_ptr = attribute_fallback_array[attribute_index];
       }
@@ -1014,7 +1014,7 @@ static void execute_instances_tasks(
   }
 
   join_attributes(
-      for_join_attributes, dst_component, {"position", ".reference_index", "instance_transform"});
+      for_join_attributes, {"position", ".reference_index", "instance_transform"}, dst_component);
 }
 
 /** \} */
@@ -1990,9 +1990,7 @@ bke::GeometrySet realize_instances(bke::GeometrySet geometry_set,
   VariedDepthOption all_instances;
   all_instances.depths = VArray<int>::ForSingle(VariedDepthOption::MAX_DEPTH,
                                                 geometry_set.get_instances()->instances_num());
-  IndexMaskMemory memory;
-  all_instances.selection = IndexMask::from_bools(
-      VArray<bool>::ForSingle(true, geometry_set.get_instances()->instances_num()), memory);
+  all_instances.selection = IndexMask(geometry_set.get_instances()->instances_num());
   return realize_instances(geometry_set, options, all_instances);
 }
 
@@ -2049,12 +2047,8 @@ bke::GeometrySet realize_instances(bke::GeometrySet geometry_set,
   const float4x4 transform = float4x4::identity();
   InstanceContext attribute_fallbacks(gather_info);
 
-  gather_realize_tasks_recursive(gather_info,
-                                 0,
-                                 VariedDepthOption::MAX_DEPTH,
-                                 geometry_set,
-                                 transform,
-                                 attribute_fallbacks);
+  gather_realize_tasks_recursive(
+      gather_info, 0, VariedDepthOption::MAX_DEPTH, geometry_set, transform, attribute_fallbacks);
 
   bke::GeometrySet new_geometry_set;
   execute_instances_tasks(gather_info.instances.instances_components_to_merge,
@@ -2062,28 +2056,28 @@ bke::GeometrySet realize_instances(bke::GeometrySet geometry_set,
                           all_instance_attributes,
                           gather_info.instances.attribute_fallback,
                           new_geometry_set);
-                          
+
   const int64_t total_points_num = get_final_points_num(gather_info.r_tasks);
   /* This doesn't have to be exact at all, it's just a rough estimate ot make decisions about
    * multi-threading (overhead). */
   const int64_t approximate_used_bytes_num = total_points_num * 32;
   threading::memory_bandwidth_bound_task(approximate_used_bytes_num, [&]() {
-  execute_realize_pointcloud_tasks(options.keep_original_ids,
-                                   all_pointclouds_info,
-                                   gather_info.r_tasks.pointcloud_tasks,
-                                   all_pointclouds_info.attributes,
-                                   new_geometry_set);
-  execute_realize_mesh_tasks(options.keep_original_ids,
-                             all_meshes_info,
-                             gather_info.r_tasks.mesh_tasks,
-                             all_meshes_info.attributes,
-                             all_meshes_info.materials,
-                             new_geometry_set);
-  execute_realize_curve_tasks(options.keep_original_ids,
-                              all_curves_info,
-                              gather_info.r_tasks.curve_tasks,
-                              all_curves_info.attributes,
-                              new_geometry_set);
+    execute_realize_pointcloud_tasks(options.keep_original_ids,
+                                     all_pointclouds_info,
+                                     gather_info.r_tasks.pointcloud_tasks,
+                                     all_pointclouds_info.attributes,
+                                     new_geometry_set);
+    execute_realize_mesh_tasks(options.keep_original_ids,
+                               all_meshes_info,
+                               gather_info.r_tasks.mesh_tasks,
+                               all_meshes_info.attributes,
+                               all_meshes_info.materials,
+                               new_geometry_set);
+    execute_realize_curve_tasks(options.keep_original_ids,
+                                all_curves_info,
+                                gather_info.r_tasks.curve_tasks,
+                                all_curves_info.attributes,
+                                new_geometry_set);
   });
   if (gather_info.r_tasks.first_volume) {
     new_geometry_set.add(*gather_info.r_tasks.first_volume);
diff --git a/source/blender/nodes/geometry/nodes/node_geo_realize_instances.cc b/source/blender/nodes/geometry/nodes/node_geo_realize_instances.cc
index 2fd19b7b9df..6b11b32eec0 100644
--- a/source/blender/nodes/geometry/nodes/node_geo_realize_instances.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_realize_instances.cc
@@ -18,13 +18,13 @@ static void node_declare(NodeDeclarationBuilder &b)
   b.add_input<decl::Bool>("Selection")
       .default_value(true)
       .hide_value()
-      .supports_field()
+      .field_on_all()
       .description("Which top-level instances to realize");
   b.add_input<decl::Bool>("Realize All")
       .default_value(true)
-      .supports_field()
+      .field_on_all()
       .description("Determine wether to realize nested instances completly");
-  b.add_input<decl::Int>("Depth").default_value(0).min(0).supports_field().description(
+  b.add_input<decl::Int>("Depth").default_value(0).min(0).field_on_all().description(
       "Number of levels of nested instances to realize for each top-level instance");
   b.add_output<decl::Geometry>("Geometry").propagate_all();
 }
@@ -44,7 +44,9 @@ static void node_geo_exec(GeoNodeExecParams params)
 
   static auto depth_override = mf::build::SI2_SO<int, bool, int>(
       "depth_override",
-      [](int value, bool realize) { return realize ? -1 : std::max(value, 0); },
+      [](int value, bool realize_all_filed) {
+        return realize_all_filed ? geometry::VariedDepthOption::MAX_DEPTH : std::max(value, 0);
+      },
       mf::build::exec_presets::AllSpanOrSingle());
 
   static auto selection_override = mf::build::SI2_SO<int, bool, bool>(
-- 
2.30.2


From 0bc454422f1faa3e8eacba8a2aa03e13e4a99983 Mon Sep 17 00:00:00 2001
From: David-Haver <davidnhaver@gmail.com>
Date: Wed, 27 Mar 2024 22:11:55 +0200
Subject: [PATCH 36/36] moved fields were possable instead of givng them by
 value

---
 .../nodes/node_geo_realize_instances.cc       | 21 +++++++++++--------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/source/blender/nodes/geometry/nodes/node_geo_realize_instances.cc b/source/blender/nodes/geometry/nodes/node_geo_realize_instances.cc
index 6b11b32eec0..3f9f64e6a47 100644
--- a/source/blender/nodes/geometry/nodes/node_geo_realize_instances.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_realize_instances.cc
@@ -38,26 +38,29 @@ static void node_geo_exec(GeoNodeExecParams params)
   }
 
   GeometryComponentEditData::remember_deformed_positions_if_necessary(geometry_set);
-  Field<bool> selection_field = params.extract_input<Field<bool>>("Selection");
-  Field<bool> realize_all_filed = params.extract_input<Field<bool>>("Realize All");
+
+  Field<bool> realize_all_field = params.extract_input<Field<bool>>("Realize All");
   Field<int> depth_field = params.extract_input<Field<int>>("Depth");
 
   static auto depth_override = mf::build::SI2_SO<int, bool, int>(
       "depth_override",
-      [](int value, bool realize_all_filed) {
-        return realize_all_filed ? geometry::VariedDepthOption::MAX_DEPTH : std::max(value, 0);
+      [](int depth, bool realize_all_field) {
+        return realize_all_field ? geometry::VariedDepthOption::MAX_DEPTH : std::max(depth, 0);
       },
       mf::build::exec_presets::AllSpanOrSingle());
 
+  Field<int> depth_field_overrided(FieldOperation::Create(
+      depth_override, {std::move(depth_field), std::move(realize_all_field)}));
+
+  Field<bool> selection_field = params.extract_input<Field<bool>>("Selection");
+
   static auto selection_override = mf::build::SI2_SO<int, bool, bool>(
       "selection_override",
-      [](int value, bool selection) { return value == 0 ? false : selection; },
+      [](int depth_override, bool selection) { return depth_override == 0 ? false : selection; },
       mf::build::exec_presets::AllSpanOrSingle());
 
-  Field<int> depth_field_overrided(
-      FieldOperation::Create(depth_override, {depth_field, realize_all_filed}));
-  Field<bool> selection_field_overrided(
-      FieldOperation::Create(selection_override, {depth_field_overrided, selection_field}));
+  Field<bool> selection_field_overrided(FieldOperation::Create(
+      selection_override, {depth_field_overrided, std::move(selection_field)}));
 
   const bke::Instances &instances = *geometry_set.get_instances();
   const bke::InstancesFieldContext field_context(instances);
-- 
2.30.2