diff --git a/doc/python_api/sphinx_doc_gen.py b/doc/python_api/sphinx_doc_gen.py
index e87815821c5..a4d66f5c5fe 100644
--- a/doc/python_api/sphinx_doc_gen.py
+++ b/doc/python_api/sphinx_doc_gen.py
@@ -1949,7 +1949,7 @@ if html_theme == "furo":
             "sidebar/scroll-start.html",
             "sidebar/navigation.html",
             "sidebar/scroll-end.html",
-            # "sidebar/variant-selector.html",
+            "sidebar/variant-selector.html",
         ]
     }
 """)
diff --git a/extern/CMakeLists.txt b/extern/CMakeLists.txt
index 30842f32441..c9b8a2deea2 100644
--- a/extern/CMakeLists.txt
+++ b/extern/CMakeLists.txt
@@ -104,10 +104,6 @@ if(WITH_MOD_FLUID)
   add_subdirectory(mantaflow)
 endif()
 
-if(WITH_COMPOSITOR_CPU)
-  add_subdirectory(smaa_areatex)
-endif()
-
 if(WITH_VULKAN_BACKEND)
   add_subdirectory(vulkan_memory_allocator)
 endif()
diff --git a/extern/smaa_areatex/CMakeLists.txt b/extern/smaa_areatex/CMakeLists.txt
deleted file mode 100644
index ace1406c37f..00000000000
--- a/extern/smaa_areatex/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# SPDX-FileCopyrightText: 2017 Blender Foundation
-#
-# SPDX-License-Identifier: GPL-2.0-or-later
-
-add_executable(smaa_areatex smaa_areatex.cpp)
diff --git a/extern/smaa_areatex/README.blender b/extern/smaa_areatex/README.blender
deleted file mode 100644
index 9c409142ae8..00000000000
--- a/extern/smaa_areatex/README.blender
+++ /dev/null
@@ -1,5 +0,0 @@
-Project: smaa-cpp
-URL: https://github.com/iRi-E/smaa-cpp
-License: MIT
-Upstream version: 0.4.0
-Local modifications:
diff --git a/extern/smaa_areatex/smaa_areatex.cpp b/extern/smaa_areatex/smaa_areatex.cpp
deleted file mode 100644
index c61543e10a0..00000000000
--- a/extern/smaa_areatex/smaa_areatex.cpp
+++ /dev/null
@@ -1,1210 +0,0 @@
-/**
- * Copyright (C) 2016-2017 IRIE Shinsuke
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * smaa_areatex.cpp  version 0.4.0
- *
- * This is a part of smaa-cpp that is an implementation of
- * Enhanced Subpixel Morphological Antialiasing (SMAA) written in C++.
- *
- * This program is C++ rewrite of AreaTex.py included in the original
- * SMAA ditribution:
- *
- *   https://github.com/iryoku/smaa/tree/master/Scripts
- */
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-
-#include <cmath>
-
-/*------------------------------------------------------------------------------*/
-/* Type Definitions */
-
-class Int2;
-class Dbl2;
-
-class Int2 {
-public:
-	int x, y;
-
-	Int2() { this->x = this->y = 0; }
-	Int2(int x) { this->x = this->y = x; }
-	Int2(int x, int y) { this->x = x; this->y = y; }
-
-	operator Dbl2();
-
-	Int2 operator + (Int2 other) { return Int2(x + other.x, y + other.y); }
-	Int2 operator * (Int2 other) { return Int2(x * other.x, y * other.y); }
-};
-
-class Dbl2 {
-public:
-	double x, y;
-
-	Dbl2() { this->x = this->y = 0.0; }
-	Dbl2(double x) { this->x = this->y = x; }
-	Dbl2(double x, double y) { this->x = x; this->y = y; }
-
-	Dbl2 apply(double (* func)(double)) { return Dbl2(func(x), func(y)); }
-
-	operator Int2();
-
-	Dbl2 operator + (Dbl2 other) { return Dbl2(x + other.x, y + other.y); }
-	Dbl2 operator - (Dbl2 other) { return Dbl2(x - other.x, y - other.y); }
-	Dbl2 operator * (Dbl2 other) { return Dbl2(x * other.x, y * other.y); }
-	Dbl2 operator / (Dbl2 other) { return Dbl2(x / other.x, y / other.y); }
-	Dbl2 operator += (Dbl2 other) { return Dbl2(x += other.x, y += other.y); }
-	bool operator == (Dbl2 other) { return (x == other.x && y == other.y); }
-};
-
-Int2::operator Dbl2() { return Dbl2((double)x, (double)y); }
-Dbl2::operator Int2() { return Int2((int)x, (int)y); }
-
-/*------------------------------------------------------------------------------*/
-/* Data to Calculate Areatex */
-
-/* Texture sizes: */
-/* (it's quite possible that this is not easily configurable) */
-static const int SUBSAMPLES_ORTHO = 7;
-static const int SUBSAMPLES_DIAG  = 5;
-static const int MAX_DIST_ORTHO_COMPAT = 16;
-static const int MAX_DIST_ORTHO = 20;
-static const int MAX_DIST_DIAG  = 20;
-static const int TEX_SIZE_ORTHO = 80; /* 16 * 5 slots = 80 */
-static const int TEX_SIZE_DIAG  = 80; /* 20 * 4 slots = 80 */
-
-/* Number of samples for calculating areas in the diagonal textures: */
-/* (diagonal areas are calculated using brute force sampling) */
-static const int SAMPLES_DIAG = 30;
-
-/* Maximum distance for smoothing u-shapes: */
-static const int SMOOTH_MAX_DISTANCE = 32;
-
-/*------------------------------------------------------------------------------*/
-/* Offset Tables */
-
-/* Offsets for subsample rendering */
-static const double subsample_offsets_ortho[SUBSAMPLES_ORTHO] = {
-	0.0,    /* 0 */
-	-0.25,  /* 1 */
-	0.25,   /* 2 */
-	-0.125, /* 3 */
-	0.125,  /* 4 */
-	-0.375, /* 5 */
-	0.375   /* 6 */
-};
-
-static const Dbl2 subsample_offsets_diag[SUBSAMPLES_DIAG] = {
-	{ 0.00,   0.00},  /* 0 */
-	{ 0.25,  -0.25},  /* 1 */
-	{-0.25,   0.25},  /* 2 */
-	{ 0.125, -0.125}, /* 3 */
-	{-0.125,  0.125}  /* 4 */
-};
-
-/* Mapping offsets for placing each pattern subtexture into its place */
-enum edgesorthoIndices
-{
-	EDGESORTHO_NONE_NONE = 0,
-	EDGESORTHO_NONE_NEGA = 1,
-	EDGESORTHO_NONE_POSI = 2,
-	EDGESORTHO_NONE_BOTH = 3,
-	EDGESORTHO_NEGA_NONE = 4,
-	EDGESORTHO_NEGA_NEGA = 5,
-	EDGESORTHO_NEGA_POSI = 6,
-	EDGESORTHO_NEGA_BOTH = 7,
-	EDGESORTHO_POSI_NONE = 8,
-	EDGESORTHO_POSI_NEGA = 9,
-	EDGESORTHO_POSI_POSI = 10,
-	EDGESORTHO_POSI_BOTH = 11,
-	EDGESORTHO_BOTH_NONE = 12,
-	EDGESORTHO_BOTH_NEGA = 13,
-	EDGESORTHO_BOTH_POSI = 14,
-	EDGESORTHO_BOTH_BOTH = 15,
-};
-
-static const Int2 edgesortho_compat[16] = {
-	{0, 0}, {0, 1}, {0, 3}, {0, 4}, {1, 0}, {1, 1}, {1, 3}, {1, 4},
-	{3, 0}, {3, 1}, {3, 3}, {3, 4}, {4, 0}, {4, 1}, {4, 3}, {4, 4}
-};
-
-static const Int2 edgesortho[16] = {
-	{0, 0}, {0, 1}, {0, 2}, {0, 3}, {1, 0}, {1, 1}, {1, 2}, {1, 3},
-	{2, 0}, {2, 1}, {2, 2}, {2, 3}, {3, 0}, {3, 1}, {3, 2}, {3, 3}
-};
-
-enum edgesdiagIndices
-{
-	EDGESDIAG_NONE_NONE = 0,
-	EDGESDIAG_NONE_VERT = 1,
-	EDGESDIAG_NONE_HORZ = 2,
-	EDGESDIAG_NONE_BOTH = 3,
-	EDGESDIAG_VERT_NONE = 4,
-	EDGESDIAG_VERT_VERT = 5,
-	EDGESDIAG_VERT_HORZ = 6,
-	EDGESDIAG_VERT_BOTH = 7,
-	EDGESDIAG_HORZ_NONE = 8,
-	EDGESDIAG_HORZ_VERT = 9,
-	EDGESDIAG_HORZ_HORZ = 10,
-	EDGESDIAG_HORZ_BOTH = 11,
-	EDGESDIAG_BOTH_NONE = 12,
-	EDGESDIAG_BOTH_VERT = 13,
-	EDGESDIAG_BOTH_HORZ = 14,
-	EDGESDIAG_BOTH_BOTH = 15,
-};
-
-static const Int2 edgesdiag[16] = {
-	{0, 0}, {0, 1}, {0, 2}, {0, 3}, {1, 0}, {1, 1}, {1, 2}, {1, 3},
-	{2, 0}, {2, 1}, {2, 2}, {2, 3}, {3, 0}, {3, 1}, {3, 2}, {3, 3}
-};
-
-/*------------------------------------------------------------------------------*/
-/* Miscellaneous Utility Functions */
-
-/* Linear interpolation: */
-static Dbl2 lerp(Dbl2 a, Dbl2 b, double p)
-{
-	return a + (b - a) * Dbl2(p);
-}
-
-/* Saturates a value to [0..1] range: */
-static double saturate(double x)
-{
-	return 0.0 < x ? (x < 1.0 ? x : 1.0) : 0.0;
-}
-
-/*------------------------------------------------------------------------------*/
-/* Horizontal/Vertical Areas */
-
-class AreaOrtho {
-	double m_data[SUBSAMPLES_ORTHO][TEX_SIZE_ORTHO][TEX_SIZE_ORTHO][2];
-	bool m_compat;
-	bool m_orig_u;
-public:
-	AreaOrtho(bool compat, bool orig_u) : m_compat(compat), m_orig_u(orig_u) {}
-
-	double *getData() { return (double *)&m_data; }
-	Dbl2 getPixel(int offset_index, Int2 coords) {
-		return Dbl2(m_data[offset_index][coords.y][coords.x][0],
-			    m_data[offset_index][coords.y][coords.x][1]);
-	}
-
-	void areaTex(int offset_index);
-private:
-	void putPixel(int offset_index, Int2 coords, Dbl2 pixel) {
-		m_data[offset_index][coords.y][coords.x][0] = pixel.x;
-		m_data[offset_index][coords.y][coords.x][1] = pixel.y;
-	}
-
-	Dbl2 smoothArea(double d, Dbl2 a1, Dbl2 a2);
-	Dbl2 makeQuad(int x, double d, double o);
-	Dbl2 area(Dbl2 p1, Dbl2 p2, int x);
-	Dbl2 calculate(int pattern, int left, int right, double offset);
-};
-
-/* Smoothing function for small u-patterns: */
-Dbl2 AreaOrtho::smoothArea(double d, Dbl2 a1, Dbl2 a2)
-{
-	Dbl2 b1 = (a1 * Dbl2(2.0)).apply(sqrt) * Dbl2(0.5);
-	Dbl2 b2 = (a2 * Dbl2(2.0)).apply(sqrt) * Dbl2(0.5);
-	double p = saturate(d / (double)SMOOTH_MAX_DISTANCE);
-	return lerp(b1, a1, p) + lerp(b2, a2, p);
-}
-
-/* Smoothing u-patterns by quadratic function: */
-Dbl2 AreaOrtho::makeQuad(int x, double d, double o)
-{
-	double r = (double)x;
-
-	/* fmin() below is a trick to smooth tiny u-patterns: */
-	return Dbl2(r, (1.0 - fmin(4.0, d) * r * (d - r) / (d * d)) * o);
-}
-
-/* Calculates the area under the line p1->p2, for the pixel x..x+1: */
-Dbl2 AreaOrtho::area(Dbl2 p1, Dbl2 p2, int x)
-{
-	Dbl2 d = p2 - p1;
-	double x1 = (double)x;
-	double x2 = x1 + 1.0;
-
-	if ((x1 >= p1.x && x1 < p2.x) || (x2 > p1.x && x2 <= p2.x)) { /* inside? */
-		double y1 = p1.y + (x1 - p1.x) * d.y / d.x;
-		double y2 = p1.y + (x2 - p1.x) * d.y / d.x;
-
-		if ((copysign(1.0, y1) == copysign(1.0, y2) ||
-		     fabs(y1) < 1e-4 || fabs(y2) < 1e-4)) { /* trapezoid? */
-			double a = (y1 + y2) / 2.0;
-			if (a < 0.0)
-				return Dbl2(fabs(a), 0.0);
-			else
-				return Dbl2(0.0, fabs(a));
-		}
-		else { /* Then, we got two triangles: */
-			double x = p1.x - p1.y * d.x / d.y, xi;
-			double a1 = x > p1.x ? y1 * modf(x, &xi) / 2.0 : 0.0;
-			double a2 = x < p2.x ? y2 * (1.0 - modf(x, &xi)) / 2.0 : 0.0;
-			double a = fabs(a1) > fabs(a2) ? a1 : -a2;
-			if (a < 0.0)
-				return Dbl2(fabs(a1), fabs(a2));
-			else
-				return Dbl2(fabs(a2), fabs(a1));
-		}
-	}
-	else
-		return Dbl2(0.0, 0.0);
-}
-
-/* Calculates the area for a given pattern and distances to the left and to the */
-/* right, biased by an offset: */
-Dbl2 AreaOrtho::calculate(int pattern, int left, int right, double offset)
-{
-	Dbl2 a1, a2;
-
-	/*
-	 * o1           |
-	 *      .-------´
-	 * o2   |
-	 *
-	 *      <---d--->
-	 */
-	double d = (double)(left + right + 1);
-
-	double o1 = 0.5 + offset;
-	double o2 = 0.5 + offset - 1.0;
-
-	switch (pattern) {
-		case EDGESORTHO_NONE_NONE:
-		{
-			/*
-			 *
-			 *    ------
-			 *
-			 */
-			return Dbl2(0.0, 0.0);
-			break;
-		}
-		case EDGESORTHO_POSI_NONE:
-		{
-			/*
-			 *
-			 *   .------
-			 *   |
-			 *
-			 * We only offset L patterns in the crossing edge side, to make it
-			 * converge with the unfiltered pattern 0 (we don't want to filter the
-			 * pattern 0 to avoid artifacts).
-			 */
-			if (left <= right)
-				return area(Dbl2(0.0, o2), Dbl2(d / 2.0, 0.0), left);
-			else
-				return Dbl2(0.0, 0.0);
-			break;
-		}
-		case EDGESORTHO_NONE_POSI:
-		{
-			/*
-			 *
-			 *    ------.
-			 *          |
-			 */
-			if (left >= right)
-				return area(Dbl2(d / 2.0, 0.0), Dbl2(d, o2), left);
-			else
-				return Dbl2(0.0, 0.0);
-			break;
-		}
-		case EDGESORTHO_POSI_POSI:
-		{
-			/*
-			 *
-			 *   .------.
-			 *   |      |
-			 */
-			if (m_orig_u) {
-				a1 = area(Dbl2(0.0, o2), Dbl2(d / 2.0, 0.0), left);
-				a2 = area(Dbl2(d / 2.0, 0.0), Dbl2(d, o2), left);
-				return smoothArea(d, a1, a2);
-			}
-			else
-				return area(makeQuad(left, d, o2), makeQuad(left + 1, d, o2), left);
-			break;
-		}
-		case EDGESORTHO_NEGA_NONE:
-		{
-			/*
-			 *   |
-			 *   `------
-			 *
-			 */
-			if (left <= right)
-				return area(Dbl2(0.0, o1), Dbl2(d / 2.0, 0.0), left);
-			else
-				return Dbl2(0.0, 0.0);
-			break;
-		}
-		case EDGESORTHO_BOTH_NONE:
-		{
-			/*
-			 *   |
-			 *   +------
-			 *   |
-			 */
-			return Dbl2(0.0, 0.0);
-			break;
-		}
-		case EDGESORTHO_NEGA_POSI:
-		{
-			/*
-			 *   |
-			 *   `------.
-			 *          |
-			 *
-			 * A problem of not offseting L patterns (see above), is that for certain
-			 * max search distances, the pixels in the center of a Z pattern will
-			 * detect the full Z pattern, while the pixels in the sides will detect a
-			 * L pattern. To avoid discontinuities, we blend the full offsetted Z
-			 * revectorization with partially offsetted L patterns.
-			 */
-			if (fabs(offset) > 0.0) {
-				a1 = area(Dbl2(0.0, o1), Dbl2(d, o2), left);
-				a2 = area(Dbl2(0.0, o1), Dbl2(d / 2.0, 0.0), left);
-				a2 += area(Dbl2(d / 2.0, 0.0), Dbl2(d, o2), left);
-				return (a1 + a2) / Dbl2(2.0);
-			}
-			else
-				return area(Dbl2(0.0, o1), Dbl2(d, o2), left);
-			break;
-		}
-		case EDGESORTHO_BOTH_POSI:
-		{
-			/*
-			 *   |
-			 *   +------.
-			 *   |      |
-			 */
-			return area(Dbl2(0.0, o1), Dbl2(d, o2), left);
-			break;
-		}
-		case EDGESORTHO_NONE_NEGA:
-		{
-			/*
-			 *          |
-			 *    ------´
-			 *
-			 */
-			if (left >= right)
-				return area(Dbl2(d / 2.0, 0.0), Dbl2(d, o1), left);
-			else
-				return Dbl2(0.0, 0.0);
-			break;
-		}
-		case EDGESORTHO_POSI_NEGA:
-		{
-			/*
-			 *          |
-			 *   .------´
-			 *   |
-			 */
-			if (fabs(offset) > 0.0) {
-				a1 = area(Dbl2(0.0, o2), Dbl2(d, o1), left);
-				a2 = area(Dbl2(0.0, o2), Dbl2(d / 2.0, 0.0), left);
-				a2 += area(Dbl2(d / 2.0, 0.0), Dbl2(d, o1), left);
-				return (a1 + a2) / Dbl2(2.0);
-			}
-			else
-				return area(Dbl2(0.0, o2), Dbl2(d, o1), left);
-			break;
-		}
-		case EDGESORTHO_NONE_BOTH:
-		{
-			/*
-			 *          |
-			 *    ------+
-			 *          |
-			 */
-			return Dbl2(0.0, 0.0);
-			break;
-		}
-		case EDGESORTHO_POSI_BOTH:
-		{
-			/*
-			 *          |
-			 *   .------+
-			 *   |      |
-			 */
-			return area(Dbl2(0.0, o2), Dbl2(d, o1), left);
-			break;
-		}
-		case EDGESORTHO_NEGA_NEGA:
-		{
-			/*
-			 *   |      |
-			 *   `------´
-			 *
-			 */
-			if (m_orig_u) {
-				a1 = area(Dbl2(0.0, o1), Dbl2(d / 2.0, 0.0), left);
-				a2 = area(Dbl2(d / 2.0, 0.0), Dbl2(d, o1), left);
-				return smoothArea(d, a1, a2);
-			}
-			else
-				return area(makeQuad(left, d, o1), makeQuad(left + 1, d, o1), left);
-			break;
-		}
-		case EDGESORTHO_BOTH_NEGA:
-		{
-			/*
-			 *   |      |
-			 *   +------´
-			 *   |
-			 */
-			return area(Dbl2(0.0, o2), Dbl2(d, o1), left);
-			break;
-		}
-		case EDGESORTHO_NEGA_BOTH:
-		{
-			/*
-			 *   |      |
-			 *   `------+
-			 *          |
-			 */
-			return area(Dbl2(0.0, o1), Dbl2(d, o2), left);
-			break;
-		}
-		case EDGESORTHO_BOTH_BOTH:
-		{
-			/*
-			 *   |      |
-			 *   +------+
-			 *   |      |
-			 */
-			return Dbl2(0.0, 0.0);
-			break;
-		}
-	}
-
-	return Dbl2(0.0, 0.0);
-}
-
-/*------------------------------------------------------------------------------*/
-/* Diagonal Areas */
-
-class AreaDiag {
-	double m_data[SUBSAMPLES_DIAG][TEX_SIZE_DIAG][TEX_SIZE_DIAG][2];
-	bool m_numeric;
-	bool m_orig_u;
-public:
-	AreaDiag(bool numeric, bool orig_u) : m_numeric(numeric), m_orig_u(orig_u) {}
-
-	double *getData() { return (double *)&m_data; }
-	Dbl2 getPixel(int offset_index, Int2 coords) {
-		return Dbl2(m_data[offset_index][coords.y][coords.x][0],
-			    m_data[offset_index][coords.y][coords.x][1]);
-	}
-
-	void areaTex(int offset_index);
-private:
-	void putPixel(int offset_index, Int2 coords, Dbl2 pixel) {
-		m_data[offset_index][coords.y][coords.x][0] = pixel.x;
-		m_data[offset_index][coords.y][coords.x][1] = pixel.y;
-	}
-
-	double area1(Dbl2 p1, Dbl2 p2, Int2 p);
-	Dbl2 area(Dbl2 p1, Dbl2 p2, int left);
-	Dbl2 areaTriangle(Dbl2 p1L, Dbl2 p2L, Dbl2 p1R, Dbl2 p2R, int left);
-	Dbl2 calculate(int pattern, int left, int right, Dbl2 offset);
-};
-
-/* Calculates the area under the line p1->p2 for the pixel 'p' using brute */
-/* force sampling: */
-/* (quick and dirty solution, but it works) */
-double AreaDiag::area1(Dbl2 p1, Dbl2 p2, Int2 p)
-{
-	if (p1 == p2)
-		return 1.0;
-
-	double xm = (p1.x + p2.x) / 2.0, ym = (p1.y + p2.y) / 2.0;
-	double a = p2.y - p1.y;
-	double b = p1.x - p2.x;
-	int count = 0;
-
-	for (int ix = 0; ix < SAMPLES_DIAG; ix++) {
-		double x = (double)p.x + (double)ix / (double)(SAMPLES_DIAG - 1);
-		for (int iy = 0; iy < SAMPLES_DIAG; iy++) {
-			double y = (double)p.y + (double)iy / (double)(SAMPLES_DIAG - 1);
-			if (a * (x - xm) + b * (y - ym) > 0.0) /* inside? */
-				count++;
-		}
-	}
-	return (double)count / (double)(SAMPLES_DIAG * SAMPLES_DIAG);
-}
-
-/* Calculates the area under the line p1->p2: */
-/* (includes the pixel and its opposite) */
-Dbl2 AreaDiag::area(Dbl2 p1, Dbl2 p2, int left)
-{
-	if (m_numeric) {
-		double a1 = area1(p1, p2, Int2(1, 0) + Int2(left));
-		double a2 = area1(p1, p2, Int2(1, 1) + Int2(left));
-		return Dbl2(1.0 - a1, a2);
-	}
-
-	/* Calculates the area under the line p1->p2 for the pixel 'p' analytically */
-	Dbl2 d = p2 - p1;
-	if (d.x == 0.0)
-		return Dbl2(0.0, 1.0);
-	if (d.y == 0.0)
-		return Dbl2(1.0, 0.0);
-
-	double x1 = (double)(1 + left);
-	double x2 = x1 + 1.0;
-	double ymid = x1;
-	double xtop = p1.x + (ymid + 1.0 - p1.y) * d.x / d.y;
-	double xmid = p1.x + (ymid       - p1.y) * d.x / d.y;
-	double xbot = p1.x + (ymid - 1.0 - p1.y) * d.x / d.y;
-
-	double y1 = p1.y + (x1 - p1.x) * d.y / d.x;
-	double y2 = p1.y + (x2 - p1.x) * d.y / d.x;
-	double fy1 = y1 - floor(y1);
-	double fy2 = y2 - floor(y2);
-	int iy1 = (int)floor(y1 - ymid);
-	int iy2 = (int)floor(y2 - ymid);
-
-	if (iy1 <= -2) {
-		if (iy2 == -1)
-			return Dbl2(1.0 - (x2 - xbot) * fy2 * 0.5, 0.0);
-		else if (iy2 == 0)
-			return Dbl2((xmid + xbot) * 0.5 - x1, (x2 - xmid) * fy2 * 0.5);
-		else if (iy2 >= 1)
-			return Dbl2((xmid + xbot) * 0.5 - x1, x2 -  (xtop + xmid) * 0.5);
-		else /* iy2 < -1 */
-			return Dbl2(1.0, 0.0);
-	}
-	else if (iy1 == -1) {
-		if (iy2 == -1)
-			return Dbl2(1.0 - (fy1 + fy2) * 0.5, 0.0);
-		else if (iy2 == 0)
-			return Dbl2((xmid - x1) * (1.0 - fy1) * 0.5, (x2 - xmid) * fy2 * 0.5);
-		else if (iy2 >= 1)
-			return Dbl2((xmid - x1) * (1.0 - fy1) * 0.5, x2 - (xtop + xmid) * 0.5);
-		else /* iy2 < -1 */
-			return Dbl2(1.0 - (xbot - x1) * fy1 * 0.5, 0.0);
-	}
-	else if (iy1 == 0) {
-		if (iy2 == -1)
-			return Dbl2((x2 - xmid) * (1.0 - fy2) * 0.5, (xmid - x1) * fy1 * 0.5);
-		else if (iy2 == 0)
-			return Dbl2(0.0, (fy1 + fy2) * 0.5);
-		else if (iy2 >= 1)
-			return Dbl2(0.0, 1.0 - (xtop - x1) * (1.0 - fy1) * 0.5);
-		else /* iy2 < -1 */
-			return Dbl2(x2 - (xmid + xbot) * 0.5, (xmid - x1) * fy1 * 0.5);
-	}
-	else { /* iy1 > 0 */
-		if (iy2 == -1)
-			return Dbl2((x2 - xtop) * (1.0 - fy2) * 0.5, (xtop + xmid) * 0.5 - x1);
-		else if (iy2 == 0)
-			return Dbl2(0.0, 1.0 - (x1 - xtop) * (1.0 - fy2) * 0.5);
-		else if (iy2 >= 1)
-			return Dbl2(0.0, 1.0);
-		else /* iy2 < -1 */
-			return Dbl2(x2 - (xmid + xbot) * 0.5, (xtop + xmid) * 0.5 - x1);
-	}
-}
-
-/* Calculate u-patterns using a triangle: */
-Dbl2 AreaDiag::areaTriangle(Dbl2 p1L, Dbl2 p2L, Dbl2 p1R, Dbl2 p2R, int left)
-{
-	double x1 = (double)(1 + left);
-	double x2 = x1 + 1.0;
-
-	Dbl2 dL = p2L - p1L;
-	Dbl2 dR = p2R - p1R;
-	double xm = ((p1L.x * dL.y / dL.x - p1L.y) - (p1R.x * dR.y / dR.x - p1R.y)) / (dL.y / dL.x - dR.y / dR.x);
-
-	double y1 = (x1 < xm) ? p1L.y + (x1 - p1L.x) * dL.y / dL.x : p1R.y + (x1 - p1R.x) * dR.y / dR.x;
-	double y2 = (x2 < xm) ? p1L.y + (x2 - p1L.x) * dL.y / dL.x : p1R.y + (x2 - p1R.x) * dR.y / dR.x;
-
-	return area(Dbl2(x1, y1), Dbl2(x2, y2), left);
-}
-
-/* Calculates the area for a given pattern and distances to the left and to the */
-/* right, biased by an offset: */
-Dbl2 AreaDiag::calculate(int pattern, int left, int right, Dbl2 offset)
-{
-	Dbl2 a1, a2;
-
-	double d = (double)(left + right + 1);
-
-	/*
-	 * There is some Black Magic around diagonal area calculations. Unlike
-	 * orthogonal patterns, the 'null' pattern (one without crossing edges) must be
-	 * filtered, and the ends of both the 'null' and L patterns are not known: L
-	 * and U patterns have different endings, and we don't know what is the
-	 * adjacent pattern. So, what we do is calculate a blend of both possibilites.
-	 */
-	switch (pattern) {
-		case EDGESDIAG_NONE_NONE:
-		{
-			/*
-			 *
-			 *         .-´
-			 *       .-´
-			 *     .-´
-			 *   .-´
-			 *   ´
-			 *
-			 */
-			a1 = area(Dbl2(1.0, 1.0), Dbl2(1.0, 1.0) + Dbl2(d), left); /* 1st possibility */
-			a2 = area(Dbl2(1.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d), left); /* 2nd possibility */
-			return (a1 + a2) / Dbl2(2.0); /* Blend them */
-			break;
-		}
-		case EDGESDIAG_VERT_NONE:
-		{
-			/*
-			 *
-			 *         .-´
-			 *       .-´
-			 *     .-´
-			 *   .-´
-			 *   |
-			 *   |
-			 */
-			a1 = area(Dbl2(1.0, 0.0) + offset, Dbl2(0.0, 0.0) + Dbl2(d), left);
-			a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d), left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_NONE_HORZ:
-		{
-			/*
-			 *
-			 *         .----
-			 *       .-´
-			 *     .-´
-			 *   .-´
-			 *   ´
-			 *
-			 */
-			a1 = area(Dbl2(0.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			a2 = area(Dbl2(1.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_VERT_HORZ:
-		{
-			/*
-			 *
-			 *         .----
-			 *       .-´
-			 *     .-´
-			 *   .-´
-			 *   |
-			 *   |
-			 */
-			if (m_orig_u)
-				return area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			else
-				return areaTriangle(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d),
-						    Dbl2(0.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			break;
-		}
-		case EDGESDIAG_HORZ_NONE:
-		{
-			/*
-			 *
-			 *         .-´
-			 *       .-´
-			 *     .-´
-			 * ----´
-			 *
-			 *
-			 */
-			a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(0.0, 0.0) + Dbl2(d), left);
-			a2 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d), left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_BOTH_NONE:
-		{
-			/*
-			 *
-			 *         .-´
-			 *       .-´
-			 *     .-´
-			 * --.-´
-			 *   |
-			 *   |
-			 */
-			a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(0.0, 0.0) + Dbl2(d), left);
-			a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d), left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_HORZ_HORZ:
-		{
-			/*
-			 *
-			 *         .----
-			 *       .-´
-			 *     .-´
-			 * ----´
-			 *
-			 *
-			 */
-			return area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			break;
-		}
-		case EDGESDIAG_BOTH_HORZ:
-		{
-			/*
-			 *
-			 *         .----
-			 *       .-´
-			 *     .-´
-			 * --.-´
-			 *   |
-			 *   |
-			 */
-			a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_NONE_VERT:
-		{
-			/*
-			 *         |
-			 *         |
-			 *       .-´
-			 *     .-´
-			 *   .-´
-			 *   ´
-			 *
-			 */
-			a1 = area(Dbl2(0.0, 0.0), Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			a2 = area(Dbl2(1.0, 0.0), Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_VERT_VERT:
-		{
-			/*
-			 *         |
-			 *         |
-			 *       .-´
-			 *     .-´
-			 *   .-´
-			 *   |
-			 *   |
-			 */
-			return area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			break;
-		}
-		case EDGESDIAG_NONE_BOTH:
-		{
-			/*
-			 *         |
-			 *         .----
-			 *       .-´
-			 *     .-´
-			 *   .-´
-			 *   ´
-			 *
-			 */
-			a1 = area(Dbl2(0.0, 0.0), Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			a2 = area(Dbl2(1.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_VERT_BOTH:
-		{
-			/*
-			 *         |
-			 *         .----
-			 *       .-´
-			 *     .-´
-			 *   .-´
-			 *   |
-			 *   |
-			 */
-			a1 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_HORZ_VERT:
-		{
-			/*
-			 *         |
-			 *         |
-			 *       .-´
-			 *     .-´
-			 * ----´
-			 *
-			 *
-			 */
-			if (m_orig_u)
-				return area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			else
-				return areaTriangle(Dbl2(1.0, 1.0) + offset, Dbl2(2.0, 1.0) + Dbl2(d),
-						    Dbl2(1.0, 0.0), Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			break;
-		}
-		case EDGESDIAG_BOTH_VERT:
-		{
-			/*
-			 *         |
-			 *         |
-			 *       .-´
-			 *     .-´
-			 * --.-´
-			 *   |
-			 *   |
-			 */
-			a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_HORZ_BOTH:
-		{
-			/*
-			 *         |
-			 *         .----
-			 *       .-´
-			 *     .-´
-			 * ----´
-			 *
-			 *
-			 */
-			a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			a2 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-		case EDGESDIAG_BOTH_BOTH:
-		{
-			/*
-			 *         |
-			 *         .----
-			 *       .-´
-			 *     .-´
-			 * --.-´
-			 *   |
-			 *   |
-			 */
-			a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left);
-			a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left);
-			return (a1 + a2) / Dbl2(2.0);
-			break;
-		}
-	}
-
-	return Dbl2(0.0, 0.0);
-}
-
-/*------------------------------------------------------------------------------*/
-/* Main Loops */
-
-void AreaOrtho::areaTex(int offset_index)
-{
-	double offset = subsample_offsets_ortho[offset_index];
-	int max_dist = m_compat ? MAX_DIST_ORTHO_COMPAT : MAX_DIST_ORTHO;
-
-	for (int pattern = 0; pattern < 16; pattern++) {
-		Int2 e = Int2(max_dist) * (m_compat ? edgesortho_compat : edgesortho)[pattern];
-		for (int left = 0; left < max_dist; left++) {
-			for (int right = 0; right < max_dist; right++) {
-				Dbl2 p = calculate(pattern, left * left, right * right, offset);
-				Int2 coords = e + Int2(left, right);
-
-				putPixel(offset_index, coords, p);
-			}
-		}
-	}
-	return;
-}
-
-void AreaDiag::areaTex(int offset_index)
-{
-	Dbl2 offset = subsample_offsets_diag[offset_index];
-
-	for (int pattern = 0; pattern < 16; pattern++) {
-		Int2 e = Int2(MAX_DIST_DIAG) * edgesdiag[pattern];
-		for (int left = 0; left < MAX_DIST_DIAG; left++) {
-			for (int right = 0; right < MAX_DIST_DIAG; right++) {
-				Dbl2 p = calculate(pattern, left, right, offset);
-				Int2 coords = e + Int2(left, right);
-
-				putPixel(offset_index, coords, p);
-			}
-		}
-	}
-	return;
-}
-
-/*------------------------------------------------------------------------------*/
-/* Write File to Specified Location on Disk */
-
-/* C/C++ source code (arrays of floats) */
-static void write_double_array(FILE *fp, const double *ptr, int length, const char *array_name, bool quantize)
-{
-	fprintf(fp, "static const float %s[%d] = {", array_name, length);
-
-	for (int n = 0; n < length; n++) {
-		if (n > 0)
-			fprintf(fp, ",");
-		fprintf(fp, (n % 8 != 0) ? " " : "\n\t");
-
-		if (quantize)
-			fprintf(fp, "%3d / 255.0", (int)(*(ptr++) * 255.0));
-		else
-			fprintf(fp, "%1.8lf", *(ptr++));
-	}
-
-	fprintf(fp, "\n};\n");
-}
-
-static void write_csource(AreaOrtho *ortho, AreaDiag *diag, FILE *fp, bool subsampling, bool quantize)
-{
-	fprintf(fp, "/* This file was generated by smaa_areatex.cpp */\n");
-
-	fprintf(fp, "\n/* Horizontal/Vertical Areas */\n");
-	write_double_array(fp, ortho->getData(),
-			   TEX_SIZE_ORTHO * TEX_SIZE_ORTHO * 2 * (subsampling ? SUBSAMPLES_ORTHO : 1),
-			   "areatex", quantize);
-
-	fprintf(fp, "\n/* Diagonal Areas */\n");
-	write_double_array(fp, diag->getData(),
-			   TEX_SIZE_DIAG * TEX_SIZE_DIAG * 2 * (subsampling ? SUBSAMPLES_DIAG : 1),
-			   "areatex_diag", quantize);
-}
-
-/* .tga File (RGBA 32bit uncompressed) */
-static void write_tga(AreaOrtho *ortho, AreaDiag *diag, FILE *fp, bool subsampling)
-{
-	int subsamples = subsampling ? SUBSAMPLES_ORTHO : 1;
-	unsigned char header[18] = {0, 0,
-				    2,   /* uncompressed RGB */
-				    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-				    32,  /* 32bit */
-				    8};  /* 8bit alpha, left to right, bottom to top */
-
-	/* Set width and height */
-	header[12] = (TEX_SIZE_ORTHO + TEX_SIZE_DIAG)      & 0xff;
-	header[13] = ((TEX_SIZE_ORTHO + TEX_SIZE_DIAG) >> 8) & 0xff;
-	header[14] = (subsamples * TEX_SIZE_ORTHO)      & 0xff;
-	header[15] = ((subsamples * TEX_SIZE_ORTHO) >> 8) & 0xff;
-
-	/* Write .tga header */
-	fwrite(header, sizeof(unsigned char), sizeof(header) / sizeof(unsigned char), fp);
-
-	/* Write pixel data  */
-	for (int i = subsamples - 1; i >= 0; i--) {
-		for (int y = TEX_SIZE_ORTHO - 1; y >= 0; y--) {
-			for (int x = 0; x < TEX_SIZE_ORTHO; x++) {
-				Dbl2 p = ortho->getPixel(i, Int2(x, y));
-				fputc(0, fp);                            /* B */
-				fputc((unsigned char)(p.y * 255.0), fp); /* G */
-				fputc((unsigned char)(p.x * 255.0), fp); /* R */
-				fputc(0, fp);                            /* A */
-			}
-
-			for (int x = 0; x < TEX_SIZE_DIAG; x++) {
-				if (i < SUBSAMPLES_DIAG) {
-					Dbl2 p = diag->getPixel(i, Int2(x, y));
-					fputc(0, fp);                            /* B */
-					fputc((unsigned char)(p.y * 255.0), fp); /* G */
-					fputc((unsigned char)(p.x * 255.0), fp); /* R */
-					fputc(0, fp);                            /* A */
-				}
-				else {
-					fputc(0, fp);
-					fputc(0, fp);
-					fputc(0, fp);
-					fputc(0, fp);
-				}
-			}
-		}
-	}
-}
-
-/* .raw File (R8G8 raw data) */
-static void write_raw(AreaOrtho *ortho, AreaDiag *diag, FILE *fp, bool subsampling)
-{
-	int subsamples = subsampling ? SUBSAMPLES_ORTHO : 1;
-
-	/* Write pixel data  */
-	for (int i = 0; i < subsamples; i++) {
-		for (int y = 0; y < TEX_SIZE_ORTHO; y++) {
-			for (int x = 0; x < TEX_SIZE_ORTHO; x++) {
-				Dbl2 p = ortho->getPixel(i, Int2(x, y));
-				fputc((unsigned char)(p.x * 255.0), fp); /* R */
-				fputc((unsigned char)(p.y * 255.0), fp); /* G */
-			}
-
-			for (int x = 0; x < TEX_SIZE_DIAG; x++) {
-				if (i < SUBSAMPLES_DIAG) {
-					Dbl2 p = diag->getPixel(i, Int2(x, y));
-					fputc((unsigned char)(p.x * 255.0), fp); /* R */
-					fputc((unsigned char)(p.y * 255.0), fp); /* G */
-				}
-				else {
-					fputc(0, fp);
-					fputc(0, fp);
-				}
-			}
-		}
-	}
-}
-
-static int generate_file(AreaOrtho *ortho, AreaDiag *diag, const char *path, bool subsampling, bool quantize, bool tga, bool raw)
-{
-	FILE *fp = fopen(path, tga ? "wb" : "w");
-
-	if (!fp) {
-		fprintf(stderr, "Unable to open file: %s\n", path);
-		return 1;
-	}
-
-	// fprintf(stderr, "Generating %s\n", path);
-
-	if (tga)
-		write_tga(ortho, diag, fp, subsampling);
-	else if (raw)
-		write_raw(ortho, diag, fp, subsampling);
-	else
-		write_csource(ortho, diag, fp, subsampling, quantize);
-
-	fclose(fp);
-
-	return 0;
-}
-
-int main(int argc, char **argv)
-{
-	bool subsampling = false;
-	bool quantize = false;
-	bool tga = false;
-	bool raw = false;
-	bool compat = false;
-	bool numeric = false;
-	bool orig_u = false;
-	bool help = false;
-	char *outfile = NULL;
-	int status = 0;
-
-	for (int i = 1; i < argc; i++) {
-		char *ptr = argv[i];
-		if (*ptr++ == '-' && *ptr != '\0') {
-			char c;
-			while ((c = *ptr++) != '\0') {
-				if (c == 's')
-					subsampling = true;
-				else if (c == 'q')
-					quantize = true;
-				else if (c == 't')
-					tga = true;
-				else if (c == 'r')
-					raw = true;
-				else if (c == 'c')
-					compat = true;
-				else if (c == 'n')
-					numeric = true;
-				else if (c == 'u')
-					orig_u = true;
-				else if (c == 'h')
-					help = true;
-				else {
-					fprintf(stderr, "Unknown option: -%c\n", c);
-					status = 1;
-					break;
-				}
-			}
-		}
-		else if (outfile) {
-			fprintf(stderr, "Too much file names: %s, %s\n", outfile, argv[i]);
-			status = 1;
-		}
-		else
-			outfile = argv[i];
-
-		if (status != 0)
-			break;
-	}
-
-	if (status == 0 && !help && !outfile) {
-		fprintf(stderr, "File name was not specified.\n");
-		status = 1;
-	}
-
-	if (status != 0 || help) {
-		fprintf(stderr, "Usage: %s [OPTION]... OUTFILE\n", argv[0]);
-		fprintf(stderr, "Options:\n");
-		fprintf(stderr, "    -s    Calculate data for subpixel rendering\n");
-		fprintf(stderr, "    -q    Quantize data to 256 levels\n");
-		fprintf(stderr, "    -t    Write TGA image instead of C/C++ source\n");
-		fprintf(stderr, "    -r    Write R8G8 raw image instead of C/C++ source\n");
-		fprintf(stderr, "    -c    Generate compatible orthogonal data that subtexture size is 16\n");
-		fprintf(stderr, "    -n    Numerically calculate diagonal data using brute force sampling\n");
-		fprintf(stderr, "    -u    Process orthogonal / diagonal U patterns in older ways\n");
-		fprintf(stderr, "    -h    Print this help and exit\n");
-		fprintf(stderr, "File name OUTFILE usually should have an extension such as .c, .h, or .tga,\n");
-		fprintf(stderr, "except for a special name '-' that means standard output.\n\n");
-		fprintf(stderr, "Example:\n");
-		fprintf(stderr, "  Generate TGA file exactly same as AreaTexDX10.tga bundled with the\n");
-		fprintf(stderr, "  original implementation:\n\n");
-		fprintf(stderr, "  $ smaa_areatex -stcnu AreaTexDX10.tga\n\n");
-		return status;
-	}
-
-	AreaOrtho *ortho = new AreaOrtho(compat, orig_u);
-	AreaDiag *diag = new AreaDiag(numeric, orig_u);
-
-	/* Calculate areatex data */
-	for (int i = 0; i < (subsampling ? SUBSAMPLES_ORTHO : 1); i++)
-		ortho->areaTex(i);
-
-	for (int i = 0; i < (subsampling ? SUBSAMPLES_DIAG : 1); i++)
-		diag->areaTex(i);
-
-	/* Generate .tga, .raw, or C/C++ source file, or write the data to stdout */
-	if (strcmp(outfile, "-") != 0)
-		status = generate_file(ortho, diag, outfile, subsampling, quantize, tga, raw);
-	else if (tga)
-		write_tga(ortho, diag, stdout, subsampling);
-	else if (raw)
-		write_raw(ortho, diag, stdout, subsampling);
-	else
-		write_csource(ortho, diag, stdout, subsampling, quantize);
-
-	delete ortho;
-	delete diag;
-
-	return status;
-}
-
-/* smaa_areatex.cpp ends here */
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index b9da74e2ff3..125c7129de0 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -106,7 +106,7 @@ struct ShaderCache {
 
   friend ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice);
 
-  void compile_thread_func(int thread_index);
+  void compile_thread_func();
 
   using PipelineCollection = std::vector<unique_ptr<MetalKernelPipeline>>;
 
@@ -174,7 +174,7 @@ void ShaderCache::wait_for_all()
   }
 }
 
-void ShaderCache::compile_thread_func(int /*thread_index*/)
+void ShaderCache::compile_thread_func()
 {
   while (running) {
 
@@ -309,7 +309,7 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
 
       metal_printf("Spawning %d Cycles kernel compilation threads\n", max_mtlcompiler_threads);
       for (int i = 0; i < max_mtlcompiler_threads; i++) {
-        compile_threads.push_back(std::thread([&] { compile_thread_func(i); }));
+        compile_threads.push_back(std::thread([this] { this->compile_thread_func(); }));
       }
     }
   }
diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h
index d94a29b7f49..ad94e46ebbb 100644
--- a/intern/cycles/kernel/integrator/shade_volume.h
+++ b/intern/cycles/kernel/integrator/shade_volume.h
@@ -64,6 +64,11 @@ typedef struct VolumeShaderCoefficients {
   Spectrum emission;
 } VolumeShaderCoefficients;
 
+typedef struct EquiangularCoefficients {
+  float3 P;
+  float2 t_range;
+} EquiangularCoefficients;
+
 /* Evaluate shader to get extinction coefficient at P. */
 ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg,
                                                    IntegratorShadowState state,
@@ -264,18 +269,18 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
 #  define VOLUME_SAMPLE_PDF_CUTOFF 1e-8f
 
 ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict ray,
-                                           const float3 light_P,
+                                           ccl_private const EquiangularCoefficients &coeffs,
                                            const float xi,
                                            ccl_private float *pdf)
 {
-  const float tmin = ray->tmin;
-  const float tmax = ray->tmax;
-  const float delta = dot((light_P - ray->P), ray->D);
-  const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+  const float delta = dot((coeffs.P - ray->P), ray->D);
+  const float D = safe_sqrtf(len_squared(coeffs.P - ray->P) - delta * delta);
   if (UNLIKELY(D == 0.0f)) {
     *pdf = 0.0f;
     return 0.0f;
   }
+  const float tmin = coeffs.t_range.x;
+  const float tmax = coeffs.t_range.y;
   const float theta_a = atan2f(tmin - delta, D);
   const float theta_b = atan2f(tmax - delta, D);
   const float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
@@ -289,17 +294,17 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r
 }
 
 ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
-                                        const float3 light_P,
+                                        ccl_private const EquiangularCoefficients &coeffs,
                                         const float sample_t)
 {
-  const float delta = dot((light_P - ray->P), ray->D);
-  const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+  const float delta = dot((coeffs.P - ray->P), ray->D);
+  const float D = safe_sqrtf(len_squared(coeffs.P - ray->P) - delta * delta);
   if (UNLIKELY(D == 0.0f)) {
     return 0.0f;
   }
 
-  const float tmin = ray->tmin;
-  const float tmax = ray->tmax;
+  const float tmin = coeffs.t_range.x;
+  const float tmax = coeffs.t_range.y;
   const float t_ = sample_t - delta;
 
   const float theta_a = atan2f(tmin - delta, D);
@@ -313,6 +318,29 @@ ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
   return pdf;
 }
 
+ccl_device_inline bool volume_equiangular_valid_ray_segment(KernelGlobals kg,
+                                                            const float3 ray_P,
+                                                            const float3 ray_D,
+                                                            ccl_private float2 *t_range,
+                                                            const ccl_private LightSample *ls)
+{
+  if (ls->type == LIGHT_SPOT) {
+    ccl_global const KernelLight *klight = &kernel_data_fetch(lights, ls->lamp);
+    return spot_light_valid_ray_segment(klight, ray_P, ray_D, t_range);
+  }
+  if (ls->type == LIGHT_AREA) {
+    ccl_global const KernelLight *klight = &kernel_data_fetch(lights, ls->lamp);
+    return area_light_valid_ray_segment(&klight->area, ray_P - klight->co, ray_D, t_range);
+  }
+  if (ls->type == LIGHT_TRIANGLE) {
+    return triangle_light_valid_ray_segment(kg, ray_P - ls->P, ray_D, t_range, ls);
+  }
+
+  /* Point light, the whole range of the ray is visible. */
+  kernel_assert(ls->type == LIGHT_POINT);
+  return true;
+}
+
 /* Distance sampling */
 
 ccl_device float volume_distance_sample(float max_t,
@@ -403,7 +431,7 @@ typedef struct VolumeIntegrateState {
 ccl_device_forceinline void volume_integrate_step_scattering(
     ccl_private const ShaderData *sd,
     ccl_private const Ray *ray,
-    const float3 equiangular_light_P,
+    ccl_private const EquiangularCoefficients &equiangular_coeffs,
     ccl_private const VolumeShaderCoefficients &ccl_restrict coeff,
     const Spectrum transmittance,
     ccl_private VolumeIntegrateState &ccl_restrict vstate,
@@ -474,7 +502,7 @@ ccl_device_forceinline void volume_integrate_step_scattering(
 
           /* Multiple importance sampling. */
           if (vstate.use_mis) {
-            const float equiangular_pdf = volume_equiangular_pdf(ray, equiangular_light_P, new_t);
+            const float equiangular_pdf = volume_equiangular_pdf(ray, equiangular_coeffs, new_t);
             const float mis_weight = power_heuristic(vstate.distance_pdf * distance_pdf,
                                                      equiangular_pdf);
             result.direct_throughput *= 2.0f * mis_weight;
@@ -509,7 +537,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
     ccl_global float *ccl_restrict render_buffer,
     const float object_step_size,
     const VolumeSampleMethod direct_sample_method,
-    const float3 equiangular_light_P,
+    ccl_private const EquiangularCoefficients &equiangular_coeffs,
     ccl_private VolumeIntegrateResult &result)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INTEGRATE);
@@ -560,7 +588,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
   /* Equiangular sampling: compute distance and PDF in advance. */
   if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR) {
     result.direct_t = volume_equiangular_sample(
-        ray, equiangular_light_P, vstate.rscatter, &vstate.equiangular_pdf);
+        ray, equiangular_coeffs, vstate.rscatter, &vstate.equiangular_pdf);
   }
 #  ifdef __PATH_GUIDING__
   result.direct_sample_method = vstate.direct_sample_method;
@@ -614,7 +642,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
 
           /* Scattering and absorption. */
           volume_integrate_step_scattering(
-              sd, ray, equiangular_light_P, coeff, transmittance, vstate, result);
+              sd, ray, equiangular_coeffs, coeff, transmittance, vstate, result);
         }
         else {
           /* Absorption only. */
@@ -673,7 +701,8 @@ ccl_device_forceinline bool integrate_volume_equiangular_sample_light(
     ccl_private const Ray *ccl_restrict ray,
     ccl_private const ShaderData *ccl_restrict sd,
     ccl_private const RNGState *ccl_restrict rng_state,
-    ccl_private float3 *ccl_restrict P)
+    ccl_private EquiangularCoefficients *ccl_restrict equiangular_coeffs,
+    ccl_private LightSample &ccl_restrict ls)
 {
   /* Test if there is a light or BSDF that needs direct light. */
   if (!kernel_data.integrator.use_direct_light) {
@@ -685,7 +714,6 @@ ccl_device_forceinline bool integrate_volume_equiangular_sample_light(
   const uint bounce = INTEGRATOR_STATE(state, path, bounce);
   const float3 rand_light = path_state_rng_3D(kg, rng_state, PRNG_LIGHT);
 
-  LightSample ls ccl_optional_struct_init;
   if (!light_sample_from_volume_segment(kg,
                                         rand_light,
                                         sd->time,
@@ -708,9 +736,10 @@ ccl_device_forceinline bool integrate_volume_equiangular_sample_light(
     return false;
   }
 
-  *P = ls.P;
+  equiangular_coeffs->P = ls.P;
 
-  return true;
+  return volume_equiangular_valid_ray_segment(
+      kg, ray->P, ray->D, &equiangular_coeffs->t_range, &ls);
 }
 
 /* Path tracing: sample point on light and evaluate light shader, then
@@ -725,41 +754,26 @@ ccl_device_forceinline void integrate_volume_direct_light(
 #  ifdef __PATH_GUIDING__
     ccl_private const Spectrum unlit_throughput,
 #  endif
-    ccl_private const Spectrum throughput)
+    ccl_private const Spectrum throughput,
+    ccl_private LightSample &ccl_restrict ls)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT);
 
-  if (!kernel_data.integrator.use_direct_light) {
+  if (!kernel_data.integrator.use_direct_light || ls.emitter_id == EMITTER_NONE) {
     return;
   }
 
-  /* Sample position on the same light again, now from the shading point where we scattered.
-   *
-   * Note that this means we sample the light tree twice when equiangular sampling is used.
-   * We could consider sampling the light tree just once and use the same light position again.
-   *
-   * This would make the PDFs for MIS weights more complicated due to having to account for
-   * both distance/equiangular and direct/indirect light sampling, but could be more accurate.
-   * Additionally we could end up behind the light or outside a spot light cone, which might
-   * waste a sample. Though on the other hand it would be possible to prevent that with
-   * equiangular sampling restricted to a smaller sub-segment where the light has influence. */
-  LightSample ls ccl_optional_struct_init;
+  /* Sample position on the same light again, now from the shading point where we scattered. */
   {
     const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
     const uint bounce = INTEGRATOR_STATE(state, path, bounce);
     const float3 rand_light = path_state_rng_3D(kg, rng_state, PRNG_LIGHT);
+    const float3 N = zero_float3();
+    const int object_receiver = light_link_receiver_nee(kg, sd);
+    const int shader_flags = SD_BSDF_HAS_TRANSMISSION;
 
-    if (!light_sample_from_position(kg,
-                                    rng_state,
-                                    rand_light,
-                                    sd->time,
-                                    P,
-                                    zero_float3(),
-                                    light_link_receiver_nee(kg, sd),
-                                    SD_BSDF_HAS_TRANSMISSION,
-                                    bounce,
-                                    path_flag,
-                                    &ls))
+    if (!light_sample<false>(
+            kg, rand_light, sd->time, P, N, object_receiver, shader_flags, bounce, path_flag, &ls))
     {
       return;
     }
@@ -877,6 +891,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
     KernelGlobals kg,
     IntegratorState state,
     ccl_private ShaderData *sd,
+    ccl_private const Ray *ray,
     ccl_private const RNGState *rng_state,
     ccl_private const ShaderVolumePhases *phases)
 {
@@ -929,6 +944,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
   INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
   INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_wo);
   INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
+  INTEGRATOR_STATE_WRITE(state, ray, previous_dt) = ray->tmax - ray->tmin;
   INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
 #  ifdef __RAY_DIFFERENTIALS__
   INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
@@ -957,7 +973,8 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
 
   /* Update path state */
   INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf;
-  INTEGRATOR_STATE_WRITE(state, path, mis_origin_n) = zero_float3();
+  const float3 previous_P = ray->P + ray->D * ray->tmin;
+  INTEGRATOR_STATE_WRITE(state, path, mis_origin_n) = sd->P - previous_P;
   INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
       unguided_phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
 
@@ -989,11 +1006,15 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
 
   /* Sample light ahead of volume stepping, for equiangular sampling. */
   /* TODO: distant lights are ignored now, but could instead use even distribution. */
+  LightSample ls ccl_optional_struct_init;
+  ls.emitter_id = EMITTER_NONE;
   const bool need_light_sample = !(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_TERMINATE);
-  float3 equiangular_P = zero_float3();
-  const bool have_equiangular_sample = need_light_sample &&
-                                       integrate_volume_equiangular_sample_light(
-                                           kg, state, ray, &sd, &rng_state, &equiangular_P);
+
+  EquiangularCoefficients equiangular_coeffs = {zero_float3(), make_float2(ray->tmin, ray->tmax)};
+
+  const bool have_equiangular_sample =
+      need_light_sample && integrate_volume_equiangular_sample_light(
+                               kg, state, ray, &sd, &rng_state, &equiangular_coeffs, ls);
 
   VolumeSampleMethod direct_sample_method = (have_equiangular_sample) ?
                                                 volume_stack_sample_method(kg, state) :
@@ -1023,7 +1044,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
                                  render_buffer,
                                  step_size,
                                  direct_sample_method,
-                                 equiangular_P,
+                                 equiangular_coeffs,
                                  result);
 
   /* Perform path termination. The intersect_closest will have already marked this path
@@ -1091,7 +1112,8 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
 #  ifdef __PATH_GUIDING__
                                   unlit_throughput,
 #  endif
-                                  result.direct_throughput);
+                                  result.direct_throughput,
+                                  ls);
   }
 
   /* Indirect light.
@@ -1130,7 +1152,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
 #    endif
 #  endif
 
-    if (integrate_volume_phase_scatter(kg, state, &sd, &rng_state, &result.indirect_phases)) {
+    if (integrate_volume_phase_scatter(kg, state, &sd, ray, &rng_state, &result.indirect_phases)) {
       return VOLUME_PATH_SCATTERED;
     }
     else {
diff --git a/intern/cycles/kernel/integrator/state_template.h b/intern/cycles/kernel/integrator/state_template.h
index e8683ed9179..34154d1c7fa 100644
--- a/intern/cycles/kernel/integrator/state_template.h
+++ b/intern/cycles/kernel/integrator/state_template.h
@@ -75,6 +75,9 @@ KERNEL_STRUCT_MEMBER(ray, float, tmax, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(ray, float, time, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(ray, float, dP, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(ray, float, dD, KERNEL_FEATURE_PATH_TRACING)
+#ifdef __LIGHT_TREE__
+KERNEL_STRUCT_MEMBER(ray, float, previous_dt, KERNEL_FEATURE_PATH_TRACING)
+#endif
 KERNEL_STRUCT_END(ray)
 
 /*************************** Intersection result ******************************/
diff --git a/intern/cycles/kernel/light/area.h b/intern/cycles/kernel/light/area.h
index eb03ca866ef..d9188355984 100644
--- a/intern/cycles/kernel/light/area.h
+++ b/intern/cycles/kernel/light/area.h
@@ -233,6 +233,11 @@ ccl_device bool area_light_spread_clamp_light(const float3 P,
   return true;
 }
 
+ccl_device_forceinline bool area_light_is_ellipse(const ccl_global KernelAreaLight *light)
+{
+  return light->invarea < 0.0f;
+}
+
 /* Common API. */
 /* Compute `eval_fac` and `pdf`. Also sample a new position on the light if `sample_coord`. */
 template<bool in_volume_segment>
@@ -338,7 +343,7 @@ ccl_device_inline bool area_light_sample(const ccl_global KernelLight *klight,
   const float light_v = dot(inplane, klight->area.axis_v) / klight->area.len_v;
 
   if (!in_volume_segment) {
-    const bool is_ellipse = (klight->area.invarea < 0.0f);
+    const bool is_ellipse = area_light_is_ellipse(&klight->area);
 
     /* Sampled point lies outside of the area light. */
     if (is_ellipse && (sqr(light_u) + sqr(light_v) > 0.25f)) {
@@ -380,7 +385,7 @@ ccl_device_inline bool area_light_intersect(const ccl_global KernelLight *klight
 {
   /* Area light. */
   const float invarea = fabsf(klight->area.invarea);
-  const bool is_ellipse = (klight->area.invarea < 0.0f);
+  const bool is_ellipse = area_light_is_ellipse(&klight->area);
   if (invarea == 0.0f) {
     return false;
   }
@@ -428,6 +433,55 @@ ccl_device_inline bool area_light_sample_from_intersection(
   return area_light_eval<false>(klight, ray_P, &light_P, ls, zero_float2(), false);
 }
 
+/* Returns the maximal distance between the light center and the boundary. */
+ccl_device_forceinline float area_light_max_extent(const ccl_global KernelAreaLight *light)
+{
+  return 0.5f * (area_light_is_ellipse(light) ? fmaxf(light->len_u, light->len_v) :
+                                                len(make_float2(light->len_u, light->len_v)));
+}
+
+/* Find the ray segment lit by the area light. */
+ccl_device_inline bool area_light_valid_ray_segment(const ccl_global KernelAreaLight *light,
+                                                    float3 P,
+                                                    float3 D,
+                                                    ccl_private float2 *t_range)
+{
+  bool valid;
+  const float tan_half_spread = light->tan_half_spread;
+  float3 axis = light->dir;
+
+  const bool angle_almost_zero = (tan_half_spread < 1e-5f);
+  if (angle_almost_zero) {
+    /* Map to local coordinate of the light. Do not use `itfm` in `KernelLight` as there might be
+     * additional scaling in the light size. */
+    const Transform tfm = make_transform(light->axis_u, light->axis_v, axis);
+    P = transform_point(&tfm, P);
+    D = transform_direction(&tfm, D);
+    axis = make_float3(0.0f, 0.0f, 1.0f);
+
+    const float half_len_u = 0.5f * light->len_u;
+    const float half_len_v = 0.5f * light->len_v;
+    if (area_light_is_ellipse(light)) {
+      valid = ray_infinite_cylinder_intersect(P, D, half_len_u, half_len_v, t_range);
+    }
+    else {
+      const float3 bbox_min = make_float3(-half_len_u, -half_len_v, 0.0f);
+      const float3 bbox_max = make_float3(half_len_u, half_len_v, FLT_MAX);
+      valid = ray_aabb_intersect(bbox_min, bbox_max, P, D, t_range);
+    }
+  }
+  else {
+    /* Conservative estimation with the smallest possible cone covering the whole spread. */
+    const float3 apex_to_point = P + area_light_max_extent(light) / tan_half_spread * axis;
+    const float cos_angle_sq = 1.0f / (1.0f + sqr(tan_half_spread));
+
+    valid = ray_cone_intersect(axis, apex_to_point, D, cos_angle_sq, t_range);
+  }
+
+  /* Limit the range to the positive side of the area light. */
+  return valid && ray_plane_intersect(axis, P, D, t_range);
+}
+
 template<bool in_volume_segment>
 ccl_device_forceinline bool area_light_tree_parameters(const ccl_global KernelLight *klight,
                                                        const float3 centroid,
@@ -464,9 +518,8 @@ ccl_device_forceinline bool area_light_tree_parameters(const ccl_global KernelLi
   const bool shape_above_surface = dot(N, centroid - P) + fabsf(dot(N, extentu)) +
                                        fabsf(dot(N, extentv)) >
                                    0;
-  const bool in_volume = is_zero(N);
 
-  return (front_facing && shape_above_surface) || in_volume;
+  return front_facing && shape_above_surface;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/light/common.h b/intern/cycles/kernel/light/common.h
index ea724991817..7ea7519f94f 100644
--- a/intern/cycles/kernel/light/common.h
+++ b/intern/cycles/kernel/light/common.h
@@ -12,9 +12,9 @@ CCL_NAMESPACE_BEGIN
 
 typedef struct LightSample {
   float3 P;            /* position on light, or direction for distant light */
-  float3 Ng;           /* normal on light */
-  float3 D;            /* direction from shading point to light */
+  packed_float3 Ng;    /* normal on light */
   float t;             /* distance to light (FLT_MAX for distant light) */
+  float3 D;            /* direction from shading point to light */
   float u, v;          /* parametric coordinate on primitive */
   float pdf;           /* pdf for selecting light and point on light */
   float pdf_selection; /* pdf for selecting light */
@@ -25,6 +25,7 @@ typedef struct LightSample {
   int lamp;            /* lamp id */
   int group;           /* lightgroup */
   LightType type;      /* type of light */
+  int emitter_id;      /* index in the emitter array */
 } LightSample;
 
 /* Utilities */
diff --git a/intern/cycles/kernel/light/distribution.h b/intern/cycles/kernel/light/distribution.h
index 23cdaa6dff5..b6b9f2e035c 100644
--- a/intern/cycles/kernel/light/distribution.h
+++ b/intern/cycles/kernel/light/distribution.h
@@ -41,36 +41,14 @@ ccl_device int light_distribution_sample(KernelGlobals kg, const float rand)
   return index;
 }
 
-template<bool in_volume_segment>
 ccl_device_noinline bool light_distribution_sample(KernelGlobals kg,
-                                                   const float3 rand,
-                                                   const float time,
-                                                   const float3 P,
-                                                   const float3 N,
-                                                   const int object_receiver,
-                                                   const int shader_flags,
-                                                   const int bounce,
-                                                   const uint32_t path_flag,
+                                                   const float rand,
                                                    ccl_private LightSample *ls)
 {
   /* Sample light index from distribution. */
-  /* The first two dimensions of the Sobol sequence have better stratification. */
-  const int index = light_distribution_sample(kg, rand.z);
-  const float pdf_selection = kernel_data.integrator.distribution_pdf_lights;
-  const float2 rand_uv = float3_to_float2(rand);
-  return light_sample<in_volume_segment>(kg,
-                                         rand_uv,
-                                         time,
-                                         P,
-                                         N,
-                                         object_receiver,
-                                         shader_flags,
-                                         bounce,
-                                         path_flag,
-                                         index,
-                                         0,
-                                         pdf_selection,
-                                         ls);
+  ls->emitter_id = light_distribution_sample(kg, rand);
+  ls->pdf_selection = kernel_data.integrator.distribution_pdf_lights;
+  return true;
 }
 
 ccl_device_inline float light_distribution_pdf_lamp(KernelGlobals kg)
diff --git a/intern/cycles/kernel/light/light.h b/intern/cycles/kernel/light/light.h
index 8f18e5ce7a5..39a22f92d9d 100644
--- a/intern/cycles/kernel/light/light.h
+++ b/intern/cycles/kernel/light/light.h
@@ -177,7 +177,7 @@ ccl_device_inline bool light_sample(KernelGlobals kg,
 
 template<bool in_volume_segment>
 ccl_device_noinline bool light_sample(KernelGlobals kg,
-                                      const float2 rand,
+                                      const float3 rand_light,
                                       const float time,
                                       const float3 P,
                                       const float3 N,
@@ -185,33 +185,31 @@ ccl_device_noinline bool light_sample(KernelGlobals kg,
                                       const int shader_flags,
                                       const int bounce,
                                       const uint32_t path_flag,
-                                      const int emitter_index,
-                                      const int object_id,
-                                      const float pdf_selection,
                                       ccl_private LightSample *ls)
 {
+  /* The first two dimensions of the Sobol sequence have better stratification, use them to sample
+   * position on the light. */
+  const float2 rand = float3_to_float2(rand_light);
+
   int prim;
   MeshLight mesh_light;
 #ifdef __LIGHT_TREE__
   if (kernel_data.integrator.use_light_tree) {
     ccl_global const KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters,
-                                                                           emitter_index);
+                                                                           ls->emitter_id);
     prim = kemitter->light.id;
     mesh_light.shader_flag = kemitter->mesh_light.shader_flag;
-    mesh_light.object_id = object_id;
+    mesh_light.object_id = ls->object;
   }
   else
 #endif
   {
     ccl_global const KernelLightDistribution *kdistribution = &kernel_data_fetch(
-        light_distribution, emitter_index);
+        light_distribution, ls->emitter_id);
     prim = kdistribution->prim;
     mesh_light = kdistribution->mesh_light;
   }
 
-  /* A different value would be assigned in `triangle_light_sample()` if `!use_light_tree`. */
-  ls->pdf_selection = pdf_selection;
-
   if (prim >= 0) {
     /* Mesh light. */
     const int object = mesh_light.object_id;
diff --git a/intern/cycles/kernel/light/sample.h b/intern/cycles/kernel/light/sample.h
index afc4537c671..434383ebc2b 100644
--- a/intern/cycles/kernel/light/sample.h
+++ b/intern/cycles/kernel/light/sample.h
@@ -329,17 +329,25 @@ ccl_device_inline bool light_sample_from_volume_segment(KernelGlobals kg,
                                                         const uint32_t path_flag,
                                                         ccl_private LightSample *ls)
 {
+  const int shader_flags = SD_BSDF_HAS_TRANSMISSION;
+
 #ifdef __LIGHT_TREE__
   if (kernel_data.integrator.use_light_tree) {
-    return light_tree_sample<true>(
-        kg, rand, time, P, D, t, object_receiver, SD_BSDF_HAS_TRANSMISSION, bounce, path_flag, ls);
+    if (!light_tree_sample<true>(kg, rand.z, P, D, t, object_receiver, shader_flags, ls)) {
+      return false;
+    }
   }
   else
 #endif
   {
-    return light_distribution_sample<true>(
-        kg, rand, time, P, D, object_receiver, SD_BSDF_HAS_TRANSMISSION, bounce, path_flag, ls);
+    if (!light_distribution_sample(kg, rand.z, ls)) {
+      return false;
+    }
   }
+
+  /* Sample position on the selected light. */
+  return light_sample<true>(
+      kg, rand, time, P, D, object_receiver, shader_flags, bounce, path_flag, ls);
 }
 
 ccl_device bool light_sample_from_position(KernelGlobals kg,
@@ -354,17 +362,24 @@ ccl_device bool light_sample_from_position(KernelGlobals kg,
                                            const uint32_t path_flag,
                                            ccl_private LightSample *ls)
 {
+  /* Randomly select a light. */
 #ifdef __LIGHT_TREE__
   if (kernel_data.integrator.use_light_tree) {
-    return light_tree_sample<false>(
-        kg, rand, time, P, N, 0.0f, object_receiver, shader_flags, bounce, path_flag, ls);
+    if (!light_tree_sample<false>(kg, rand.z, P, N, 0.0f, object_receiver, shader_flags, ls)) {
+      return false;
+    }
   }
   else
 #endif
   {
-    return light_distribution_sample<false>(
-        kg, rand, time, P, N, object_receiver, shader_flags, bounce, path_flag, ls);
+    if (!light_distribution_sample(kg, rand.z, ls)) {
+      return false;
+    }
   }
+
+  /* Sample position on the selected light. */
+  return light_sample<false>(
+      kg, rand, time, P, N, object_receiver, shader_flags, bounce, path_flag, ls);
 }
 
 /* Update light sample with new shading point position for MNEE. The position on the light is fixed
@@ -415,13 +430,15 @@ ccl_device_inline float light_sample_mis_weight_forward_surface(KernelGlobals kg
 #ifdef __LIGHT_TREE__
   if (kernel_data.integrator.use_light_tree) {
     float3 ray_P = INTEGRATOR_STATE(state, ray, P);
+    const float dt = INTEGRATOR_STATE(state, ray, previous_dt);
     const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n);
+
     uint lookup_offset = kernel_data_fetch(object_lookup_offset, sd->object);
     uint prim_offset = kernel_data_fetch(object_prim_offset, sd->object);
     uint triangle = kernel_data_fetch(triangle_to_tree, sd->prim - prim_offset + lookup_offset);
 
     pdf *= light_tree_pdf(
-        kg, ray_P, N, path_flag, sd->object, triangle, light_link_receiver_forward(kg, state));
+        kg, ray_P, N, dt, path_flag, sd->object, triangle, light_link_receiver_forward(kg, state));
   }
   else
 #endif
@@ -445,9 +462,11 @@ ccl_device_inline float light_sample_mis_weight_forward_lamp(KernelGlobals kg,
 #ifdef __LIGHT_TREE__
   if (kernel_data.integrator.use_light_tree) {
     const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n);
+    const float dt = INTEGRATOR_STATE(state, ray, previous_dt);
     pdf *= light_tree_pdf(kg,
                           P,
                           N,
+                          dt,
                           path_flag,
                           0,
                           kernel_data_fetch(light_to_tree, ls->lamp),
@@ -485,9 +504,10 @@ ccl_device_inline float light_sample_mis_weight_forward_background(KernelGlobals
 #ifdef __LIGHT_TREE__
   if (kernel_data.integrator.use_light_tree) {
     const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n);
+    const float dt = INTEGRATOR_STATE(state, ray, previous_dt);
     uint light = kernel_data_fetch(light_to_tree, kernel_data.background.light_index);
     pdf *= light_tree_pdf(
-        kg, ray_P, N, path_flag, 0, light, light_link_receiver_forward(kg, state));
+        kg, ray_P, N, dt, path_flag, 0, light, light_link_receiver_forward(kg, state));
   }
   else
 #endif
diff --git a/intern/cycles/kernel/light/spot.h b/intern/cycles/kernel/light/spot.h
index c5090573d4d..56989933ded 100644
--- a/intern/cycles/kernel/light/spot.h
+++ b/intern/cycles/kernel/light/spot.h
@@ -265,6 +265,24 @@ ccl_device_inline bool spot_light_sample_from_intersection(
   return true;
 }
 
+/* Find the ray segment lit by the spot light. */
+ccl_device_inline bool spot_light_valid_ray_segment(const ccl_global KernelLight *klight,
+                                                    const float3 P,
+                                                    const float3 D,
+                                                    ccl_private float2 *t_range)
+{
+  /* Convert to local space of the spot light. */
+  const Transform itfm = klight->itfm;
+  float3 local_P = P + klight->spot.dir * klight->spot.ray_segment_dp;
+  local_P = transform_point(&itfm, local_P);
+  const float3 local_D = transform_direction(&itfm, D);
+  const float3 axis = make_float3(0.0f, 0.0f, -1.0f);
+
+  /* Intersect the ray with the smallest enclosing cone of the light spread. */
+  return ray_cone_intersect(
+      axis, local_P, local_D, sqr(klight->spot.cos_half_spot_angle), t_range);
+}
+
 template<bool in_volume_segment>
 ccl_device_forceinline bool spot_light_tree_parameters(const ccl_global KernelLight *klight,
                                                        const float3 centroid,
diff --git a/intern/cycles/kernel/light/tree.h b/intern/cycles/kernel/light/tree.h
index 3fd49e30cbe..bbca17e5f75 100644
--- a/intern/cycles/kernel/light/tree.h
+++ b/intern/cycles/kernel/light/tree.h
@@ -148,10 +148,7 @@ ccl_device void light_tree_importance(const float3 N_or_D,
   float cos_min_incidence_angle = 1.0f;
   float cos_max_incidence_angle = 1.0f;
 
-  /* When sampling the light tree for the second time in `shade_volume.h` and when query the pdf in
-   * `sample.h`. */
-  const bool in_volume = is_zero(N_or_D);
-  if (!in_volume_segment && !in_volume) {
+  if (!in_volume_segment) {
     const float3 N = N_or_D;
     const float cos_theta_i = has_transmission ? fabsf(dot(point_to_centroid, N)) :
                                                  dot(point_to_centroid, N);
@@ -221,9 +218,9 @@ ccl_device void light_tree_importance(const float3 N_or_D,
   max_importance = fabsf(f_a * cos_min_incidence_angle * energy * cos_min_outgoing_angle /
                          (in_volume_segment ? min_distance : sqr(min_distance)));
 
-  /* TODO: also min importance for volume? */
+  /* TODO: compute proper min importance for volume. */
   if (in_volume_segment) {
-    min_importance = max_importance;
+    min_importance = 0.0f;
     return;
   }
 
@@ -270,10 +267,10 @@ ccl_device bool compute_emitter_centroid_and_dir(KernelGlobals kg,
         /* Arbitrary centroid and direction. */
         centroid = make_float3(0.0f, 0.0f, 1.0f);
         dir = make_float3(0.0f, 0.0f, -1.0f);
-        return !in_volume_segment;
+        break;
       case LIGHT_DISTANT:
         dir = centroid;
-        return !in_volume_segment;
+        break;
       default:
         return false;
     }
@@ -323,12 +320,13 @@ ccl_device void light_tree_node_importance(KernelGlobals kg,
   float cos_theta_u;
   float distance;
   if (knode->type == LIGHT_TREE_DISTANT) {
-    if (in_volume_segment) {
-      return;
-    }
     point_to_centroid = -bcone.axis;
     cos_theta_u = fast_cosf(bcone.theta_o + bcone.theta_e);
     distance = 1.0f;
+    if (t == FLT_MAX) {
+      /* In world volume, distant light has no contribution. */
+      return;
+    }
   }
   else {
     const float3 centroid = 0.5f * (bbox.min + bbox.max);
@@ -339,6 +337,9 @@ ccl_device void light_tree_node_importance(KernelGlobals kg,
       /* Minimal distance of the ray to the cluster. */
       distance = len(centroid - closest_point);
       point_to_centroid = -compute_v(centroid, P, D, bcone.axis, t);
+      /* FIXME(weizhen): it is not clear from which point the `cos_theta_u` should be computed in
+       * volume segment. We could use `closest_point` as a conservative measure, but then
+       * `point_to_centroid` should also use `closest_point`. */
       cos_theta_u = light_tree_cos_bounding_box_angle(bbox, closest_point, point_to_centroid);
     }
     else {
@@ -697,17 +698,16 @@ ccl_device int light_tree_root_node_index(KernelGlobals kg, const int object_rec
   return 0;
 }
 
+/* Pick a random light from the light tree from a given shading point P, write to the picked light
+ * index and the probability of picking the light. */
 template<bool in_volume_segment>
 ccl_device_noinline bool light_tree_sample(KernelGlobals kg,
-                                           const float3 rand,
-                                           const float time,
+                                           const float rand,
                                            const float3 P,
                                            float3 N_or_D,
                                            float t,
                                            const int object_receiver,
                                            const int shader_flags,
-                                           const int bounce,
-                                           const uint32_t path_flag,
                                            ccl_private LightSample *ls)
 {
   if (!kernel_data.integrator.use_direct_light) {
@@ -718,10 +718,8 @@ ccl_device_noinline bool light_tree_sample(KernelGlobals kg,
   float pdf_leaf = 1.0f;
   float pdf_selection = 1.0f;
   int selected_emitter = -1;
-  int object_emitter = 0;
   int node_index = light_tree_root_node_index(kg, object_receiver);
-  /* The first two dimensions of the Sobol sequence have better stratification. */
-  float rand_selection = rand.z;
+  float rand_selection = rand;
 
   float3 local_P = P;
 
@@ -743,7 +741,7 @@ ccl_device_noinline bool light_tree_sample(KernelGlobals kg,
       }
 
       /* Continue with the picked mesh light. */
-      object_emitter = kernel_data_fetch(light_tree_emitters, selected_emitter).mesh.object_id;
+      ls->object = kernel_data_fetch(light_tree_emitters, selected_emitter).mesh.object_id;
       continue;
     }
 
@@ -766,27 +764,18 @@ ccl_device_noinline bool light_tree_sample(KernelGlobals kg,
     pdf_leaf *= (node_index == left_index) ? left_prob : (1.0f - left_prob);
   }
 
-  pdf_selection *= pdf_leaf;
+  ls->emitter_id = selected_emitter;
+  ls->pdf_selection = pdf_selection * pdf_leaf;
 
-  return light_sample<in_volume_segment>(kg,
-                                         float3_to_float2(rand),
-                                         time,
-                                         P,
-                                         N_or_D,
-                                         object_receiver,
-                                         shader_flags,
-                                         bounce,
-                                         path_flag,
-                                         selected_emitter,
-                                         object_emitter,
-                                         pdf_selection,
-                                         ls);
+  return true;
 }
 
 /* We need to be able to find the probability of selecting a given light for MIS. */
+template<bool in_volume_segment>
 ccl_device float light_tree_pdf(KernelGlobals kg,
                                 float3 P,
                                 float3 N,
+                                const float dt,
                                 const int path_flag,
                                 const int object_emitter,
                                 const uint index_emitter,
@@ -796,7 +785,7 @@ ccl_device float light_tree_pdf(KernelGlobals kg,
 
   ccl_global const KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters,
                                                                          index_emitter);
-  int root_index;
+  int subtree_root_index;
   uint bit_trail, target_emitter;
 
   if (is_triangle(kemitter)) {
@@ -805,16 +794,17 @@ ccl_device float light_tree_pdf(KernelGlobals kg,
     target_emitter = kernel_data_fetch(object_to_tree, object_emitter);
     ccl_global const KernelLightTreeEmitter *kmesh = &kernel_data_fetch(light_tree_emitters,
                                                                         target_emitter);
-    root_index = kmesh->mesh.node_id;
-    ccl_global const KernelLightTreeNode *kroot = &kernel_data_fetch(light_tree_nodes, root_index);
+    subtree_root_index = kmesh->mesh.node_id;
+    ccl_global const KernelLightTreeNode *kroot = &kernel_data_fetch(light_tree_nodes,
+                                                                     subtree_root_index);
     bit_trail = kroot->bit_trail;
 
     if (kroot->type == LIGHT_TREE_INSTANCE) {
-      root_index = kroot->instance.reference;
+      subtree_root_index = kroot->instance.reference;
     }
   }
   else {
-    root_index = 0;
+    subtree_root_index = -1;
     bit_trail = kemitter->bit_trail;
     target_emitter = index_emitter;
   }
@@ -836,8 +826,8 @@ ccl_device float light_tree_pdf(KernelGlobals kg,
       for (int i = 0; i < knode->num_emitters; i++) {
         const int emitter = knode->leaf.first_emitter + i;
         float max_importance, min_importance;
-        light_tree_emitter_importance<false>(
-            kg, P, N, 0, has_transmission, emitter, max_importance, min_importance);
+        light_tree_emitter_importance<in_volume_segment>(
+            kg, P, N, dt, has_transmission, emitter, max_importance, min_importance);
         num_has_importance += (max_importance > 0);
         if (emitter == target_emitter) {
           target_max_importance = max_importance;
@@ -856,13 +846,13 @@ ccl_device float light_tree_pdf(KernelGlobals kg,
         return 0.0f;
       }
 
-      if (root_index) {
+      if (subtree_root_index != -1) {
         /* Arrived at the mesh light. Continue with the subtree. */
         float unused;
-        light_tree_to_local_space<false>(kg, object_emitter, P, N, unused);
+        light_tree_to_local_space<in_volume_segment>(kg, object_emitter, P, N, unused);
 
-        node_index = root_index;
-        root_index = 0;
+        node_index = subtree_root_index;
+        subtree_root_index = -1;
         target_emitter = index_emitter;
         bit_trail = kemitter->bit_trail;
         continue;
@@ -877,8 +867,8 @@ ccl_device float light_tree_pdf(KernelGlobals kg,
     const int right_index = knode->inner.right_child;
 
     float left_prob;
-    if (!get_left_probability<false>(
-            kg, P, N, 0, has_transmission, left_index, right_index, left_prob))
+    if (!get_left_probability<in_volume_segment>(
+            kg, P, N, dt, has_transmission, left_index, right_index, left_prob))
     {
       return 0.0f;
     }
@@ -896,4 +886,27 @@ ccl_device float light_tree_pdf(KernelGlobals kg,
   }
 }
 
+/* If the function is called in volume, retrieve the previous point in volume segment, and compute
+ * pdf from there. Otherwise compute from the current shading point. */
+ccl_device_inline float light_tree_pdf(KernelGlobals kg,
+                                       float3 P,
+                                       float3 N,
+                                       const float dt,
+                                       const int path_flag,
+                                       const int emitter_object,
+                                       const uint emitter_id,
+                                       const int object_receiver)
+{
+  if (path_flag & PATH_RAY_VOLUME_SCATTER) {
+    const float3 D_times_t = N;
+    const float3 D = normalize(D_times_t);
+    P = P - D_times_t;
+    return light_tree_pdf<true>(
+        kg, P, D, dt, path_flag, emitter_object, emitter_id, object_receiver);
+  }
+
+  return light_tree_pdf<false>(
+      kg, P, N, 0.0f, path_flag, emitter_object, emitter_id, object_receiver);
+}
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/light/triangle.h b/intern/cycles/kernel/light/triangle.h
index 58fc8ea1d92..8007c8d6123 100644
--- a/intern/cycles/kernel/light/triangle.h
+++ b/intern/cycles/kernel/light/triangle.h
@@ -269,6 +269,26 @@ ccl_device_forceinline bool triangle_light_sample(KernelGlobals kg,
   return (ls->pdf > 0.0f);
 }
 
+/* Find the ray segment lit by the triangle light. */
+ccl_device_inline bool triangle_light_valid_ray_segment(KernelGlobals kg,
+                                                        const float3 P,
+                                                        const float3 D,
+                                                        ccl_private float2 *t_range,
+                                                        const ccl_private LightSample *ls)
+{
+  const int shader_flag = kernel_data_fetch(shaders, ls->shader & SHADER_MASK).flags;
+  const int SD_MIS_BOTH = SD_MIS_BACK | SD_MIS_FRONT;
+  if ((shader_flag & SD_MIS_BOTH) == SD_MIS_BOTH) {
+    /* Both sides are sampled, the complete ray segment is visible. */
+    return true;
+  }
+
+  /* Only one side is sampled, intersect the ray and the triangle light plane to find the visible
+   * ray segment. Flip normal if Emission Sampling is set to back. */
+  const float3 N = ls->Ng;
+  return ray_plane_intersect((shader_flag & SD_MIS_BACK) ? -N : N, P, D, t_range);
+}
+
 template<bool in_volume_segment>
 ccl_device_forceinline bool triangle_light_tree_parameters(
     KernelGlobals kg,
@@ -307,9 +327,8 @@ ccl_device_forceinline bool triangle_light_tree_parameters(
   }
 
   const bool front_facing = bcone.theta_o != 0.0f || dot(bcone.axis, point_to_centroid) < 0;
-  const bool in_volume = is_zero(N);
 
-  return (front_facing && shape_above_surface) || in_volume;
+  return front_facing && shape_above_surface;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index 2ed5a790199..ce1c1da5168 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -45,6 +45,7 @@ CCL_NAMESPACE_BEGIN
 #define OBJECT_NONE (~0)
 #define PRIM_NONE (~0)
 #define LAMP_NONE (~0)
+#define EMITTER_NONE (~0)
 #define ID_NONE (0.0f)
 #define PASS_UNUSED (~0)
 #define LIGHTGROUP_NONE (~0)
@@ -1376,6 +1377,8 @@ typedef struct KernelSpotLight {
   int is_sphere;
   /* For non-uniform object scaling, the actual spread might be different. */
   float cos_half_larger_spread;
+  /* Distance from the apex of the smallest enclosing cone of the light spread to light center. */
+  float ray_segment_dp;
 } KernelSpotLight;
 
 /* PointLight is SpotLight with only radius and invarea being used. */
diff --git a/intern/cycles/scene/light.cpp b/intern/cycles/scene/light.cpp
index 78d237bcd8c..fb424a8fadf 100644
--- a/intern/cycles/scene/light.cpp
+++ b/intern/cycles/scene/light.cpp
@@ -1362,6 +1362,9 @@ void LightManager::device_update_lights(Device *device, DeviceScene *dscene, Sce
       /* Choose the angle which spans a larger cone. */
       klights[light_index].spot.cos_half_larger_spread = inversesqrtf(
           1.0f + tan_sq * fmaxf(len_u_sq, len_v_sq) / len_w_sq);
+      /* radius / sin(half_angle_small) */
+      klights[light_index].spot.ray_segment_dp =
+          light->size * sqrtf(1.0f + len_w_sq / (tan_sq * fminf(len_u_sq, len_v_sq)));
     }
 
     klights[light_index].shader_id = shader_id;
diff --git a/intern/cycles/util/math.h b/intern/cycles/util/math.h
index 7d5cab7e30c..cdea258c916 100644
--- a/intern/cycles/util/math.h
+++ b/intern/cycles/util/math.h
@@ -1030,6 +1030,46 @@ ccl_device_inline uint32_t reverse_integer_bits(uint32_t x)
 #endif
 }
 
+/* Check if intervals (first->x, first->y) and (second.x, second.y) intersect, and replace the
+ * first interval with their intersection. */
+ccl_device_inline bool intervals_intersect(ccl_private float2 *first, const float2 second)
+{
+  first->x = fmaxf(first->x, second.x);
+  first->y = fminf(first->y, second.y);
+
+  return first->x < first->y;
+}
+
+/* Solve quadratic equation a*x^2 + b*x + c = 0, adapted from Mitsuba 3
+ * The solution is ordered so that x1 <= x2.
+ * Returns true if at least one solution is found.  */
+ccl_device_inline bool solve_quadratic(
+    const float a, const float b, const float c, ccl_private float &x1, ccl_private float &x2)
+{
+  /* If the equation is linear, the solution is -c/b, but b has to be non-zero. */
+  const bool valid_linear = (a == 0.0f) && (b != 0.0f);
+  x1 = x2 = -c / b;
+
+  const float discriminant = sqr(b) - 4.0f * a * c;
+  /* Allow slightly negative discriminant in case of numerical precision issues. */
+  const bool valid_quadratic = (a != 0.0f) && (discriminant > -1e-5f);
+
+  if (valid_quadratic) {
+    /* Numerically stable version of (-b ± sqrt(discriminant)) / (2 * a), avoiding catastrophic
+     * cancellation when `b` is very close to `sqrt(discriminant)`, by finding the solution of
+     * greater magnitude which does not suffer from loss of precision, then using the identity
+     * x1 * x2 = c / a. */
+    const float temp = -0.5f * (b + copysignf(safe_sqrtf(discriminant), b));
+    const float r1 = temp / a;
+    const float r2 = c / temp;
+
+    x1 = fminf(r1, r2);
+    x2 = fmaxf(r1, r2);
+  }
+
+  return (valid_linear || valid_quadratic);
+}
+
 CCL_NAMESPACE_END
 
 #endif /* __UTIL_MATH_H__ */
diff --git a/intern/cycles/util/math_intersect.h b/intern/cycles/util/math_intersect.h
index b09cf2a4b1b..2e4b9c979f7 100644
--- a/intern/cycles/util/math_intersect.h
+++ b/intern/cycles/util/math_intersect.h
@@ -302,6 +302,140 @@ ccl_device bool ray_quad_intersect(float3 ray_P,
   return true;
 }
 
+/* Find the ray segment that lies in the same side as the normal `N` of the plane.
+ * `P` is the vector pointing from any point on the plane to the ray origin. */
+ccl_device bool ray_plane_intersect(const float3 N,
+                                    const float3 P,
+                                    const float3 ray_D,
+                                    ccl_private float2 *t_range)
+{
+  const float DN = dot(ray_D, N);
+
+  /* Distance from P to the plane. */
+  const float t = -dot(P, N) / DN;
+
+  /* Limit the range to the positive side. */
+  if (DN > 0.0f) {
+    t_range->x = fmaxf(t_range->x, t);
+  }
+  else {
+    t_range->y = fminf(t_range->y, t);
+  }
+
+  return t_range->x < t_range->y;
+}
+
+/* Find the ray segment inside an axis-aligned bounding box. */
+ccl_device bool ray_aabb_intersect(const float3 bbox_min,
+                                   const float3 bbox_max,
+                                   const float3 ray_P,
+                                   const float3 ray_D,
+                                   ccl_private float2 *t_range)
+{
+  const float3 inv_ray_D = rcp(ray_D);
+
+  /* Absolute distances to lower and upper box coordinates; */
+  const float3 t_lower = (bbox_min - ray_P) * inv_ray_D;
+  const float3 t_upper = (bbox_max - ray_P) * inv_ray_D;
+
+  /* The four t-intervals (for x-/y-/z-slabs, and ray p(t)). */
+  const float4 tmins = float3_to_float4(min(t_lower, t_upper), t_range->x);
+  const float4 tmaxes = float3_to_float4(max(t_lower, t_upper), t_range->y);
+
+  /* Max of mins and min of maxes. */
+  const float tmin = reduce_max(tmins);
+  const float tmax = reduce_min(tmaxes);
+
+  *t_range = make_float2(tmin, tmax);
+
+  return tmin < tmax;
+}
+
+/* Find the segment of a ray defined by P + D * t that lies inside a cylinder defined by
+ * (x / len_u)^2 + (y / len_v)^2 = 1. */
+ccl_device_inline bool ray_infinite_cylinder_intersect(const float3 P,
+                                                       const float3 D,
+                                                       const float len_u,
+                                                       const float len_v,
+                                                       ccl_private float2 *t_range)
+{
+  /* Convert to a 2D problem. */
+  const float2 inv_len = 1.0f / make_float2(len_u, len_v);
+  float2 P_proj = float3_to_float2(P) * inv_len;
+  const float2 D_proj = float3_to_float2(D) * inv_len;
+
+  /* Solve quadratic equation a*t^2 + 2b*t + c = 0. */
+  const float a = dot(D_proj, D_proj);
+  float b = dot(P_proj, D_proj);
+
+  /* Move ray origin closer to the cylinder to prevent precision issue when the ray is far away. */
+  const float t_mid = -b / a;
+  P_proj += D_proj * t_mid;
+
+  /* Recompute b from the shifted origin. */
+  b = dot(P_proj, D_proj);
+  const float c = dot(P_proj, P_proj) - 1.0f;
+
+  float tmin, tmax;
+  const bool valid = solve_quadratic(a, 2.0f * b, c, tmin, tmax);
+
+  return valid && intervals_intersect(t_range, make_float2(tmin, tmax) + t_mid);
+}
+
+/* *
+ * Find the ray segment inside a single-sided cone.
+ *
+ * \param axis: a unit-length direction around which the cone has a circular symmetry
+ * \param P: the vector pointing from the cone apex to the ray origin
+ * \param D: the direction of the ray, does not need to have unit-length
+ * \param cos_angle_sq: `sqr(cos(half_aperture_of_the_cone))`
+ * \param t_range: the lower and upper bounds between which the ray lies inside the cone
+ * \return whether the intersection exists and is in the provided range
+ *
+ * See https://www.geometrictools.com/Documentation/IntersectionLineCone.pdf for illustration
+ */
+ccl_device_inline bool ray_cone_intersect(const float3 axis,
+                                          const float3 P,
+                                          float3 D,
+                                          const float cos_angle_sq,
+                                          ccl_private float2 *t_range)
+{
+  if (cos_angle_sq < 1e-4f) {
+    /* The cone is nearly a plane. */
+    return ray_plane_intersect(axis, P, D, t_range);
+  }
+
+  const float inv_len = inversesqrtf(len_squared(D));
+  D *= inv_len;
+
+  const float AD = dot(axis, D);
+  const float AP = dot(axis, P);
+
+  const float a = sqr(AD) - cos_angle_sq;
+  const float b = 2.0f * (AD * AP - cos_angle_sq * dot(D, P));
+  const float c = sqr(AP) - cos_angle_sq * dot(P, P);
+
+  float tmin = 0.0f, tmax = FLT_MAX;
+  bool valid = solve_quadratic(a, b, c, tmin, tmax);
+
+  /* Check if the intersections are in the same hemisphere as the cone. */
+  const bool tmin_valid = AP + tmin * AD > 0.0f;
+  const bool tmax_valid = AP + tmax * AD > 0.0f;
+
+  valid &= (tmin_valid || tmax_valid);
+
+  if (!tmax_valid) {
+    tmax = tmin;
+    tmin = 0.0f;
+  }
+  else if (!tmin_valid) {
+    tmin = tmax;
+    tmax = FLT_MAX;
+  }
+
+  return valid && intervals_intersect(t_range, make_float2(tmin, tmax) * inv_len);
+}
+
 CCL_NAMESPACE_END
 
 #endif /* __UTIL_MATH_INTERSECT_H__ */
diff --git a/intern/cycles/util/transform.h b/intern/cycles/util/transform.h
index 208c68dc5a1..0263be7c841 100644
--- a/intern/cycles/util/transform.h
+++ b/intern/cycles/util/transform.h
@@ -161,6 +161,17 @@ ccl_device_inline Transform make_transform(float a,
   return t;
 }
 
+ccl_device_inline Transform make_transform(const float3 x, const float3 y, const float3 z)
+{
+  Transform t;
+
+  t.x = float3_to_float4(x, 0.0f);
+  t.y = float3_to_float4(y, 0.0f);
+  t.z = float3_to_float4(z, 0.0f);
+
+  return t;
+}
+
 ccl_device_inline Transform euler_to_transform(const float3 euler)
 {
   float cx = cosf(euler.x);
diff --git a/intern/ghost/intern/GHOST_WindowWayland.cc b/intern/ghost/intern/GHOST_WindowWayland.cc
index 93b40b941e3..0b397732744 100644
--- a/intern/ghost/intern/GHOST_WindowWayland.cc
+++ b/intern/ghost/intern/GHOST_WindowWayland.cc
@@ -1886,6 +1886,30 @@ GHOST_WindowWayland::GHOST_WindowWayland(GHOST_SystemWayland *system,
     gwl_window_state_set(window_, state);
   }
 
+  /* NOTE(@ideasman42): Round trips are important before committing.
+   * This is needed because setting the state is likely to resize the window
+   * (in the case of maximized & full-screen), "normal" windows may still be resized when
+   * they are too large or with tiling window-managers.
+   *
+   * The additional updates allow for the actual size to be configured by the window manager
+   * which is read back before committing the surface. This avoids displaying the buffer
+   * before it's resized (avoiding flickering).
+   *
+   * Without the round-trip here:
+   * - The window will be created and this function will return using the requested buffer size,
+   *   instead of the window size which ends up being used (causing a visible flicker).
+   *   This has the down side that Blender's internal window state has the outdated size
+   *   which then gets immediately resized, causing a noticeable glitch.
+   * - The window decorations will be displayed at the wrong size before refreshing
+   *   at the new size.
+   * - On GNOME-Shell 46 shows the previous buffer-size under some conditions, see #119871.
+   * - 2x updates are needed for RIVER & HYPRLAND.
+   */
+  for (int i = 0; i < 2; i++) {
+    wl_display_flush(system->wl_display_get());
+    wl_display_dispatch(system->wl_display_get());
+  }
+
   /* Commit after setting the buffer.
    * While postponing until after the buffer drawing is context is set
    * isn't essential, it reduces flickering. */
diff --git a/scripts/startup/bl_ui/properties_data_armature.py b/scripts/startup/bl_ui/properties_data_armature.py
index 7f14f4bd75b..a8582708dd4 100644
--- a/scripts/startup/bl_ui/properties_data_armature.py
+++ b/scripts/startup/bl_ui/properties_data_armature.py
@@ -158,7 +158,7 @@ class ARMATURE_MT_collection_tree_context_menu(Menu):
         # editable or not. That means this menu has to do the disabling for it.
         sub = layout.column()
         sub.enabled = not active_bcoll_is_locked
-        sub.operator("armature.collection_add", text="Add Child Collection")
+        sub.operator("armature.collection_add", text="Add Bone Collection")
         sub.operator("armature.collection_remove")
         sub.operator("armature.collection_remove_unused", text="Remove Unused Collections")
 
diff --git a/scripts/startup/bl_ui/space_view3d.py b/scripts/startup/bl_ui/space_view3d.py
index ef75e104829..1fbd1c136a4 100644
--- a/scripts/startup/bl_ui/space_view3d.py
+++ b/scripts/startup/bl_ui/space_view3d.py
@@ -2187,6 +2187,7 @@ class VIEW3D_MT_paint_grease_pencil(Menu):
         layout.separator()
 
         layout.menu("VIEW3D_MT_edit_greasepencil_showhide")
+        layout.menu("VIEW3D_MT_edit_greasepencil_cleanup")
 
         layout.separator()
 
@@ -5803,6 +5804,15 @@ class VIEW3D_MT_edit_greasepencil_showhide(Menu):
         layout.operator("grease_pencil.layer_hide", text="Hide Inactive Layers").unselected = True
 
 
+class VIEW3D_MT_edit_greasepencil_cleanup(Menu):
+    bl_label = "Cleanup"
+
+    def draw(self, _context):
+        layout = self.layout
+
+        layout.operator("grease_pencil.clean_loose")
+
+
 class VIEW3D_MT_edit_greasepencil(Menu):
     bl_label = "Grease Pencil"
 
@@ -5828,7 +5838,7 @@ class VIEW3D_MT_edit_greasepencil(Menu):
 
         layout.menu("VIEW3D_MT_edit_greasepencil_showhide")
         layout.operator_menu_enum("grease_pencil.separate", "mode", text="Separate")
-        layout.operator("grease_pencil.clean_loose")
+        layout.menu("VIEW3D_MT_edit_greasepencil_cleanup")
 
         layout.separator()
 
@@ -7526,14 +7536,14 @@ class VIEW3D_PT_snapping(Panel):
         layout = self.layout
         col = layout.column()
 
-        col.label(text="Snap With")
+        col.label(text="Snap Base")
         row = col.row(align=True)
         row.prop(tool_settings, "snap_target", expand=True)
 
-        col.label(text="Snap To")
+        col.label(text="Snap Target")
         col.prop(tool_settings, "snap_elements_base", expand=True)
 
-        col.label(text="Snap Individual Elements To")
+        col.label(text="Snap Target for Individual Elements")
         col.prop(tool_settings, "snap_elements_individual", expand=True)
 
         col.separator()
@@ -8988,6 +8998,7 @@ classes = (
     VIEW3D_MT_edit_gpencil_delete,
     VIEW3D_MT_edit_gpencil_showhide,
     VIEW3D_MT_edit_greasepencil_showhide,
+    VIEW3D_MT_edit_greasepencil_cleanup,
     VIEW3D_MT_weight_gpencil,
     VIEW3D_MT_gpencil_animation,
     VIEW3D_MT_gpencil_simplify,
diff --git a/source/blender/blenkernel/intern/attribute_access.cc b/source/blender/blenkernel/intern/attribute_access.cc
index 6b947417e91..f69184175a3 100644
--- a/source/blender/blenkernel/intern/attribute_access.cc
+++ b/source/blender/blenkernel/intern/attribute_access.cc
@@ -253,54 +253,6 @@ static AttributeIDRef attribute_id_from_custom_data_layer(const CustomDataLayer
   return layer.name;
 }
 
-static bool add_builtin_type_custom_data_layer_from_init(CustomData &custom_data,
-                                                         const eCustomDataType data_type,
-                                                         const int domain_num,
-                                                         const AttributeInit &initializer)
-{
-  switch (initializer.type) {
-    case AttributeInit::Type::Construct: {
-      void *data = CustomData_add_layer(&custom_data, data_type, CD_CONSTRUCT, domain_num);
-      return data != nullptr;
-    }
-    case AttributeInit::Type::DefaultValue: {
-      void *data = CustomData_add_layer(&custom_data, data_type, CD_SET_DEFAULT, domain_num);
-      return data != nullptr;
-    }
-    case AttributeInit::Type::VArray: {
-      void *data = CustomData_add_layer(&custom_data, data_type, CD_CONSTRUCT, domain_num);
-      if (data == nullptr) {
-        return false;
-      }
-      const GVArray &varray = static_cast<const AttributeInitVArray &>(initializer).varray;
-      varray.materialize_to_uninitialized(varray.index_range(), data);
-      return true;
-    }
-    case AttributeInit::Type::MoveArray: {
-      void *src_data = static_cast<const AttributeInitMoveArray &>(initializer).data;
-      const void *stored_data = CustomData_add_layer_with_data(
-          &custom_data, data_type, src_data, domain_num, nullptr);
-      if (stored_data == nullptr) {
-        return false;
-      }
-      if (stored_data != src_data) {
-        MEM_freeN(src_data);
-        return true;
-      }
-      return true;
-    }
-    case AttributeInit::Type::Shared: {
-      const AttributeInitShared &init = static_cast<const AttributeInitShared &>(initializer);
-      const void *stored_data = CustomData_add_layer_with_data(
-          &custom_data, data_type, const_cast<void *>(init.data), domain_num, init.sharing_info);
-      return stored_data != nullptr;
-    }
-  }
-
-  BLI_assert_unreachable();
-  return false;
-}
-
 static void *add_generic_custom_data_layer(CustomData &custom_data,
                                            const eCustomDataType data_type,
                                            const eCDAllocType alloctype,
@@ -393,10 +345,7 @@ static bool custom_data_layer_matches_attribute_id(const CustomDataLayer &layer,
 
 bool BuiltinCustomDataLayerProvider::layer_exists(const CustomData &custom_data) const
 {
-  if (stored_as_named_attribute_) {
-    return CustomData_get_named_layer_index(&custom_data, stored_type_, name_) != -1;
-  }
-  return CustomData_has_layer(&custom_data, stored_type_);
+  return CustomData_get_named_layer_index(&custom_data, data_type_, name_) != -1;
 }
 
 GAttributeReader BuiltinCustomDataLayerProvider::try_get_for_read(const void *owner) const
@@ -416,13 +365,7 @@ GAttributeReader BuiltinCustomDataLayerProvider::try_get_for_read(const void *ow
     return {};
   }
 
-  int index;
-  if (stored_as_named_attribute_) {
-    index = CustomData_get_named_layer_index(custom_data, stored_type_, name_);
-  }
-  else {
-    index = CustomData_get_layer_index(custom_data, stored_type_);
-  }
+  const int index = CustomData_get_named_layer_index(custom_data, data_type_, name_);
   if (index == -1) {
     return {};
   }
@@ -452,13 +395,7 @@ GAttributeWriter BuiltinCustomDataLayerProvider::try_get_for_write(void *owner)
     return {};
   }
 
-  void *data = nullptr;
-  if (stored_as_named_attribute_) {
-    data = CustomData_get_layer_named_for_write(custom_data, stored_type_, name_, element_num);
-  }
-  else {
-    data = CustomData_get_layer_for_write(custom_data, stored_type_, element_num);
-  }
+  void *data = CustomData_get_layer_named_for_write(custom_data, data_type_, name_, element_num);
   if (data == nullptr) {
     return {};
   }
@@ -475,57 +412,42 @@ bool BuiltinCustomDataLayerProvider::try_delete(void *owner) const
     return {};
   }
 
-  auto update = [&]() {
+  const int element_num = custom_data_access_.get_element_num(owner);
+  if (CustomData_free_layer_named(custom_data, name_, element_num)) {
     if (update_on_change_ != nullptr) {
       update_on_change_(owner);
     }
-  };
-
-  const int element_num = custom_data_access_.get_element_num(owner);
-  if (stored_as_named_attribute_) {
-    if (CustomData_free_layer_named(custom_data, name_, element_num)) {
-      update();
-      return true;
-    }
-    return false;
-  }
-
-  const int layer_index = CustomData_get_layer_index(custom_data, stored_type_);
-  if (CustomData_free_layer(custom_data, stored_type_, element_num, layer_index)) {
-    update();
     return true;
   }
-
   return false;
 }
 
 bool BuiltinCustomDataLayerProvider::try_create(void *owner,
                                                 const AttributeInit &initializer) const
 {
-  if (createable_ != Creatable) {
-    return false;
-  }
   CustomData *custom_data = custom_data_access_.get_custom_data(owner);
   if (custom_data == nullptr) {
     return false;
   }
 
   const int element_num = custom_data_access_.get_element_num(owner);
-  if (stored_as_named_attribute_) {
-    if (CustomData_has_layer_named(custom_data, data_type_, name_)) {
-      /* Exists already. */
-      return false;
-    }
-    return add_custom_data_layer_from_attribute_init(
-        name_, *custom_data, stored_type_, element_num, initializer);
-  }
-
-  if (CustomData_get_layer(custom_data, stored_type_) != nullptr) {
+  if (CustomData_has_layer_named(custom_data, data_type_, name_)) {
     /* Exists already. */
     return false;
   }
-  return add_builtin_type_custom_data_layer_from_init(
-      *custom_data, stored_type_, element_num, initializer);
+  if (add_custom_data_layer_from_attribute_init(
+          name_, *custom_data, data_type_, element_num, initializer))
+  {
+    if (initializer.type != AttributeInit::Type::Construct) {
+      /* Avoid calling update function when values are not initialized. In that case
+       * values must be set elsewhere anyway, which will cause a separate update tag. */
+      if (update_on_change_ != nullptr) {
+        update_on_change_(owner);
+      }
+    }
+    return true;
+  }
+  return false;
 }
 
 bool BuiltinCustomDataLayerProvider::exists(const void *owner) const
@@ -534,10 +456,7 @@ bool BuiltinCustomDataLayerProvider::exists(const void *owner) const
   if (custom_data == nullptr) {
     return false;
   }
-  if (stored_as_named_attribute_) {
-    return CustomData_has_layer_named(custom_data, stored_type_, name_);
-  }
-  return CustomData_get_layer(custom_data, stored_type_) != nullptr;
+  return CustomData_has_layer_named(custom_data, data_type_, name_);
 }
 
 GAttributeReader CustomDataAttributeProvider::try_get_for_read(
diff --git a/source/blender/blenkernel/intern/attribute_access_intern.hh b/source/blender/blenkernel/intern/attribute_access_intern.hh
index 663aab7f69c..ae372a00089 100644
--- a/source/blender/blenkernel/intern/attribute_access_intern.hh
+++ b/source/blender/blenkernel/intern/attribute_access_intern.hh
@@ -31,14 +31,10 @@ struct CustomDataAccessInfo {
  * A #BuiltinAttributeProvider is responsible for exactly one attribute on a geometry component.
  * The attribute is identified by its name and has a fixed domain and type. Builtin attributes do
  * not follow the same loose rules as other attributes, because they are mapped to internal
- * "legacy" data structures. For example, some builtin attributes cannot be deleted. */
+ * "legacy" data structures. For example, some builtin attributes cannot be deleted.
+ */
 class BuiltinAttributeProvider {
  public:
-  /* Some utility enums to avoid hard to read booleans in function calls. */
-  enum CreatableEnum {
-    Creatable,
-    NonCreatable,
-  };
   enum DeletableEnum {
     Deletable,
     NonDeletable,
@@ -48,7 +44,6 @@ class BuiltinAttributeProvider {
   const std::string name_;
   const AttrDomain domain_;
   const eCustomDataType data_type_;
-  const CreatableEnum createable_;
   const DeletableEnum deletable_;
   const AttributeValidator validator_;
 
@@ -56,13 +51,11 @@ class BuiltinAttributeProvider {
   BuiltinAttributeProvider(std::string name,
                            const AttrDomain domain,
                            const eCustomDataType data_type,
-                           const CreatableEnum createable,
                            const DeletableEnum deletable,
                            AttributeValidator validator = {})
       : name_(std::move(name)),
         domain_(domain),
         data_type_(data_type),
-        createable_(createable),
         deletable_(deletable),
         validator_(validator)
   {
@@ -174,27 +167,21 @@ class CustomDataAttributeProvider final : public DynamicAttributesProvider {
  */
 class BuiltinCustomDataLayerProvider final : public BuiltinAttributeProvider {
   using UpdateOnChange = void (*)(void *owner);
-  const eCustomDataType stored_type_;
   const CustomDataAccessInfo custom_data_access_;
   const UpdateOnChange update_on_change_;
-  bool stored_as_named_attribute_;
 
  public:
   BuiltinCustomDataLayerProvider(std::string attribute_name,
                                  const AttrDomain domain,
-                                 const eCustomDataType attribute_type,
-                                 const eCustomDataType stored_type,
-                                 const CreatableEnum creatable,
+                                 const eCustomDataType data_type,
                                  const DeletableEnum deletable,
                                  const CustomDataAccessInfo custom_data_access,
-                                 const UpdateOnChange update_on_write,
+                                 const UpdateOnChange update_on_change,
                                  const AttributeValidator validator = {})
       : BuiltinAttributeProvider(
-            std::move(attribute_name), domain, attribute_type, creatable, deletable, validator),
-        stored_type_(stored_type),
+            std::move(attribute_name), domain, data_type, deletable, validator),
         custom_data_access_(custom_data_access),
-        update_on_change_(update_on_write),
-        stored_as_named_attribute_(data_type_ == stored_type_)
+        update_on_change_(update_on_change)
   {
   }
 
diff --git a/source/blender/blenkernel/intern/bake_items_serialize.cc b/source/blender/blenkernel/intern/bake_items_serialize.cc
index 57b06df5343..de1620d1e64 100644
--- a/source/blender/blenkernel/intern/bake_items_serialize.cc
+++ b/source/blender/blenkernel/intern/bake_items_serialize.cc
@@ -972,9 +972,14 @@ static std::shared_ptr<DictionaryValue> serialize_geometry_set(const GeometrySet
 
     auto io_references = io_instances->append_array("references");
     for (const InstanceReference &reference : instances.references()) {
-      BLI_assert(reference.type() == InstanceReference::Type::GeometrySet);
-      io_references->append(
-          serialize_geometry_set(reference.geometry_set(), blob_writer, blob_sharing));
+      if (reference.type() == InstanceReference::Type::GeometrySet) {
+        const GeometrySet &geometry = reference.geometry_set();
+        io_references->append(serialize_geometry_set(geometry, blob_writer, blob_sharing));
+      }
+      else {
+        /* TODO: Support serializing object and collection references. */
+        io_references->append(serialize_geometry_set({}, blob_writer, blob_sharing));
+      }
     }
 
     auto io_attributes = serialize_attributes(
diff --git a/source/blender/blenkernel/intern/curves_geometry.cc b/source/blender/blenkernel/intern/curves_geometry.cc
index ccb67ec91d0..a4faa76d98e 100644
--- a/source/blender/blenkernel/intern/curves_geometry.cc
+++ b/source/blender/blenkernel/intern/curves_geometry.cc
@@ -58,6 +58,8 @@ CurvesGeometry::CurvesGeometry() : CurvesGeometry(0, 0) {}
 
 CurvesGeometry::CurvesGeometry(const int point_num, const int curve_num)
 {
+  this->runtime = MEM_new<CurvesGeometryRuntime>(__func__);
+
   this->point_num = point_num;
   this->curve_num = curve_num;
   CustomData_reset(&this->point_data);
@@ -67,8 +69,6 @@ CurvesGeometry::CurvesGeometry(const int point_num, const int curve_num)
   this->attributes_for_write().add<float3>(
       "position", AttrDomain::Point, AttributeInitConstruct());
 
-  this->runtime = MEM_new<CurvesGeometryRuntime>(__func__);
-
   if (curve_num > 0) {
     this->curve_offsets = static_cast<int *>(
         MEM_malloc_arrayN(this->curve_num + 1, sizeof(int), __func__));
diff --git a/source/blender/blenkernel/intern/editmesh.cc b/source/blender/blenkernel/intern/editmesh.cc
index 750be28c146..cc1c8cf16ea 100644
--- a/source/blender/blenkernel/intern/editmesh.cc
+++ b/source/blender/blenkernel/intern/editmesh.cc
@@ -91,7 +91,7 @@ void BKE_editmesh_looptris_calc_with_partial_ex(BMEditMesh *em,
                                                 const BMeshCalcTessellation_Params *params)
 {
   BLI_assert(em->looptris.size() == poly_to_tri_count(em->bm->totface, em->bm->totloop));
-  BLI_assert(!em->looptris.is_empty());
+  BLI_assert(!(em->bm->totface && em->looptris.is_empty()));
 
   BM_mesh_calc_tessellation_with_partial_ex(em->bm, em->looptris, bmpinfo, params);
 }
diff --git a/source/blender/blenkernel/intern/geometry_component_curves.cc b/source/blender/blenkernel/intern/geometry_component_curves.cc
index 896480d0ad7..a59e14fe566 100644
--- a/source/blender/blenkernel/intern/geometry_component_curves.cc
+++ b/source/blender/blenkernel/intern/geometry_component_curves.cc
@@ -476,8 +476,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider position("position",
                                                  AttrDomain::Point,
                                                  CD_PROP_FLOAT3,
-                                                 CD_PROP_FLOAT3,
-                                                 BuiltinAttributeProvider::Creatable,
                                                  BuiltinAttributeProvider::NonDeletable,
                                                  point_access,
                                                  tag_component_positions_changed);
@@ -485,8 +483,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider radius("radius",
                                                AttrDomain::Point,
                                                CD_PROP_FLOAT,
-                                               CD_PROP_FLOAT,
-                                               BuiltinAttributeProvider::Creatable,
                                                BuiltinAttributeProvider::Deletable,
                                                point_access,
                                                tag_component_radii_changed);
@@ -494,8 +490,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider id("id",
                                            AttrDomain::Point,
                                            CD_PROP_INT32,
-                                           CD_PROP_INT32,
-                                           BuiltinAttributeProvider::Creatable,
                                            BuiltinAttributeProvider::Deletable,
                                            point_access,
                                            nullptr);
@@ -503,8 +497,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider tilt("tilt",
                                              AttrDomain::Point,
                                              CD_PROP_FLOAT,
-                                             CD_PROP_FLOAT,
-                                             BuiltinAttributeProvider::Creatable,
                                              BuiltinAttributeProvider::Deletable,
                                              point_access,
                                              tag_component_normals_changed);
@@ -512,8 +504,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider handle_right("handle_right",
                                                      AttrDomain::Point,
                                                      CD_PROP_FLOAT3,
-                                                     CD_PROP_FLOAT3,
-                                                     BuiltinAttributeProvider::Creatable,
                                                      BuiltinAttributeProvider::Deletable,
                                                      point_access,
                                                      tag_component_positions_changed);
@@ -521,8 +511,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider handle_left("handle_left",
                                                     AttrDomain::Point,
                                                     CD_PROP_FLOAT3,
-                                                    CD_PROP_FLOAT3,
-                                                    BuiltinAttributeProvider::Creatable,
                                                     BuiltinAttributeProvider::Deletable,
                                                     point_access,
                                                     tag_component_positions_changed);
@@ -536,8 +524,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider handle_type_right("handle_type_right",
                                                           AttrDomain::Point,
                                                           CD_PROP_INT8,
-                                                          CD_PROP_INT8,
-                                                          BuiltinAttributeProvider::Creatable,
                                                           BuiltinAttributeProvider::Deletable,
                                                           point_access,
                                                           tag_component_topology_changed,
@@ -546,8 +532,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider handle_type_left("handle_type_left",
                                                          AttrDomain::Point,
                                                          CD_PROP_INT8,
-                                                         CD_PROP_INT8,
-                                                         BuiltinAttributeProvider::Creatable,
                                                          BuiltinAttributeProvider::Deletable,
                                                          point_access,
                                                          tag_component_topology_changed,
@@ -556,21 +540,17 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider nurbs_weight("nurbs_weight",
                                                      AttrDomain::Point,
                                                      CD_PROP_FLOAT,
-                                                     CD_PROP_FLOAT,
-                                                     BuiltinAttributeProvider::Creatable,
                                                      BuiltinAttributeProvider::Deletable,
                                                      point_access,
                                                      tag_component_positions_changed);
 
   static const auto nurbs_order_clamp = mf::build::SI1_SO<int8_t, int8_t>(
       "NURBS Order Validate",
-      [](int8_t value) { return std::max<int8_t>(value, 0); },
+      [](int8_t value) { return std::max<int8_t>(value, 1); },
       mf::build::exec_presets::AllSpanOrSingle());
   static BuiltinCustomDataLayerProvider nurbs_order("nurbs_order",
                                                     AttrDomain::Curve,
                                                     CD_PROP_INT8,
-                                                    CD_PROP_INT8,
-                                                    BuiltinAttributeProvider::Creatable,
                                                     BuiltinAttributeProvider::Deletable,
                                                     curve_access,
                                                     tag_component_topology_changed,
@@ -585,8 +565,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider normal_mode("normal_mode",
                                                     AttrDomain::Curve,
                                                     CD_PROP_INT8,
-                                                    CD_PROP_INT8,
-                                                    BuiltinAttributeProvider::Creatable,
                                                     BuiltinAttributeProvider::Deletable,
                                                     curve_access,
                                                     tag_component_normals_changed,
@@ -595,8 +573,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider custom_normal("custom_normal",
                                                       AttrDomain::Point,
                                                       CD_PROP_FLOAT3,
-                                                      CD_PROP_FLOAT3,
-                                                      BuiltinAttributeProvider::Creatable,
                                                       BuiltinAttributeProvider::Deletable,
                                                       point_access,
                                                       tag_component_normals_changed);
@@ -610,8 +586,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider nurbs_knots_mode("knots_mode",
                                                          AttrDomain::Curve,
                                                          CD_PROP_INT8,
-                                                         CD_PROP_INT8,
-                                                         BuiltinAttributeProvider::Creatable,
                                                          BuiltinAttributeProvider::Deletable,
                                                          curve_access,
                                                          tag_component_topology_changed,
@@ -626,8 +600,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider curve_type("curve_type",
                                                    AttrDomain::Curve,
                                                    CD_PROP_INT8,
-                                                   CD_PROP_INT8,
-                                                   BuiltinAttributeProvider::Creatable,
                                                    BuiltinAttributeProvider::Deletable,
                                                    curve_access,
                                                    tag_component_curve_types_changed,
@@ -640,8 +612,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider resolution("resolution",
                                                    AttrDomain::Curve,
                                                    CD_PROP_INT32,
-                                                   CD_PROP_INT32,
-                                                   BuiltinAttributeProvider::Creatable,
                                                    BuiltinAttributeProvider::Deletable,
                                                    curve_access,
                                                    tag_component_topology_changed,
@@ -650,8 +620,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve()
   static BuiltinCustomDataLayerProvider cyclic("cyclic",
                                                AttrDomain::Curve,
                                                CD_PROP_BOOL,
-                                               CD_PROP_BOOL,
-                                               BuiltinAttributeProvider::Creatable,
                                                BuiltinAttributeProvider::Deletable,
                                                curve_access,
                                                tag_component_topology_changed);
diff --git a/source/blender/blenkernel/intern/geometry_component_instances.cc b/source/blender/blenkernel/intern/geometry_component_instances.cc
index 8137526d7f8..c3a4892ad57 100644
--- a/source/blender/blenkernel/intern/geometry_component_instances.cc
+++ b/source/blender/blenkernel/intern/geometry_component_instances.cc
@@ -133,8 +133,6 @@ static ComponentAttributeProviders create_attribute_providers_for_instances()
   static BuiltinCustomDataLayerProvider id("id",
                                            AttrDomain::Instance,
                                            CD_PROP_INT32,
-                                           CD_PROP_INT32,
-                                           BuiltinAttributeProvider::Creatable,
                                            BuiltinAttributeProvider::Deletable,
                                            instance_custom_data_access,
                                            nullptr);
@@ -142,8 +140,6 @@ static ComponentAttributeProviders create_attribute_providers_for_instances()
   static BuiltinCustomDataLayerProvider instance_transform("instance_transform",
                                                            AttrDomain::Instance,
                                                            CD_PROP_FLOAT4X4,
-                                                           CD_PROP_FLOAT4X4,
-                                                           BuiltinAttributeProvider::Creatable,
                                                            BuiltinAttributeProvider::NonDeletable,
                                                            instance_custom_data_access,
                                                            nullptr);
@@ -152,8 +148,6 @@ static ComponentAttributeProviders create_attribute_providers_for_instances()
   static BuiltinCustomDataLayerProvider reference_index(".reference_index",
                                                         AttrDomain::Instance,
                                                         CD_PROP_INT32,
-                                                        CD_PROP_INT32,
-                                                        BuiltinAttributeProvider::Creatable,
                                                         BuiltinAttributeProvider::NonDeletable,
                                                         instance_custom_data_access,
                                                         tag_component_reference_index_changed);
diff --git a/source/blender/blenkernel/intern/geometry_component_mesh.cc b/source/blender/blenkernel/intern/geometry_component_mesh.cc
index 9486615ed4f..e649853123f 100644
--- a/source/blender/blenkernel/intern/geometry_component_mesh.cc
+++ b/source/blender/blenkernel/intern/geometry_component_mesh.cc
@@ -1010,8 +1010,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider position("position",
                                                  AttrDomain::Point,
                                                  CD_PROP_FLOAT3,
-                                                 CD_PROP_FLOAT3,
-                                                 BuiltinAttributeProvider::Creatable,
                                                  BuiltinAttributeProvider::NonDeletable,
                                                  point_access,
                                                  tag_component_positions_changed);
@@ -1019,8 +1017,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider id("id",
                                            AttrDomain::Point,
                                            CD_PROP_INT32,
-                                           CD_PROP_INT32,
-                                           BuiltinAttributeProvider::Creatable,
                                            BuiltinAttributeProvider::Deletable,
                                            point_access,
                                            nullptr);
@@ -1035,8 +1031,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider material_index("material_index",
                                                        AttrDomain::Face,
                                                        CD_PROP_INT32,
-                                                       CD_PROP_INT32,
-                                                       BuiltinAttributeProvider::Creatable,
                                                        BuiltinAttributeProvider::Deletable,
                                                        face_access,
                                                        nullptr,
@@ -1049,8 +1043,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider edge_verts(".edge_verts",
                                                    AttrDomain::Edge,
                                                    CD_PROP_INT32_2D,
-                                                   CD_PROP_INT32_2D,
-                                                   BuiltinAttributeProvider::Creatable,
                                                    BuiltinAttributeProvider::NonDeletable,
                                                    edge_access,
                                                    nullptr,
@@ -1065,8 +1057,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider corner_vert(".corner_vert",
                                                     AttrDomain::Corner,
                                                     CD_PROP_INT32,
-                                                    CD_PROP_INT32,
-                                                    BuiltinAttributeProvider::Creatable,
                                                     BuiltinAttributeProvider::NonDeletable,
                                                     corner_access,
                                                     nullptr,
@@ -1074,8 +1064,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider corner_edge(".corner_edge",
                                                     AttrDomain::Corner,
                                                     CD_PROP_INT32,
-                                                    CD_PROP_INT32,
-                                                    BuiltinAttributeProvider::Creatable,
                                                     BuiltinAttributeProvider::NonDeletable,
                                                     corner_access,
                                                     nullptr,
@@ -1084,8 +1072,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider sharp_face("sharp_face",
                                                    AttrDomain::Face,
                                                    CD_PROP_BOOL,
-                                                   CD_PROP_BOOL,
-                                                   BuiltinAttributeProvider::Creatable,
                                                    BuiltinAttributeProvider::Deletable,
                                                    face_access,
                                                    tag_component_sharpness_changed);
@@ -1093,8 +1079,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh()
   static BuiltinCustomDataLayerProvider sharp_edge("sharp_edge",
                                                    AttrDomain::Edge,
                                                    CD_PROP_BOOL,
-                                                   CD_PROP_BOOL,
-                                                   BuiltinAttributeProvider::Creatable,
                                                    BuiltinAttributeProvider::Deletable,
                                                    edge_access,
                                                    tag_component_sharpness_changed);
diff --git a/source/blender/blenkernel/intern/geometry_component_pointcloud.cc b/source/blender/blenkernel/intern/geometry_component_pointcloud.cc
index 03a8ee2521d..4895f2d4323 100644
--- a/source/blender/blenkernel/intern/geometry_component_pointcloud.cc
+++ b/source/blender/blenkernel/intern/geometry_component_pointcloud.cc
@@ -147,24 +147,18 @@ static ComponentAttributeProviders create_attribute_providers_for_point_cloud()
   static BuiltinCustomDataLayerProvider position("position",
                                                  AttrDomain::Point,
                                                  CD_PROP_FLOAT3,
-                                                 CD_PROP_FLOAT3,
-                                                 BuiltinAttributeProvider::Creatable,
                                                  BuiltinAttributeProvider::NonDeletable,
                                                  point_access,
                                                  tag_component_positions_changed);
   static BuiltinCustomDataLayerProvider radius("radius",
                                                AttrDomain::Point,
                                                CD_PROP_FLOAT,
-                                               CD_PROP_FLOAT,
-                                               BuiltinAttributeProvider::Creatable,
                                                BuiltinAttributeProvider::Deletable,
                                                point_access,
                                                tag_component_radius_changed);
   static BuiltinCustomDataLayerProvider id("id",
                                            AttrDomain::Point,
                                            CD_PROP_INT32,
-                                           CD_PROP_INT32,
-                                           BuiltinAttributeProvider::Creatable,
                                            BuiltinAttributeProvider::Deletable,
                                            point_access,
                                            nullptr);
diff --git a/source/blender/blenkernel/intern/pointcloud.cc b/source/blender/blenkernel/intern/pointcloud.cc
index aa4a3c1d58e..141ac807479 100644
--- a/source/blender/blenkernel/intern/pointcloud.cc
+++ b/source/blender/blenkernel/intern/pointcloud.cc
@@ -62,11 +62,11 @@ static void pointcloud_init_data(ID *id)
 
   MEMCPY_STRUCT_AFTER(pointcloud, DNA_struct_default_get(PointCloud), id);
 
+  pointcloud->runtime = new blender::bke::PointCloudRuntime();
+
   CustomData_reset(&pointcloud->pdata);
   pointcloud->attributes_for_write().add<float3>(
       "position", blender::bke::AttrDomain::Point, blender::bke::AttributeInitConstruct());
-
-  pointcloud->runtime = new blender::bke::PointCloudRuntime();
 }
 
 static void pointcloud_copy_data(Main * /*bmain*/,
diff --git a/source/blender/compositor/CMakeLists.txt b/source/blender/compositor/CMakeLists.txt
index 6b7c153ddf9..38315eafe5f 100644
--- a/source/blender/compositor/CMakeLists.txt
+++ b/source/blender/compositor/CMakeLists.txt
@@ -584,20 +584,6 @@ if(WITH_COMPOSITOR_CPU)
     ${CMAKE_CURRENT_BINARY_DIR}/operations
   )
 
-  set(GENSRC_DIR ${CMAKE_CURRENT_BINARY_DIR}/operations)
-  set(GENSRC ${GENSRC_DIR}/COM_SMAAAreaTexture.h)
-  add_custom_command(
-    OUTPUT ${GENSRC}
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${GENSRC_DIR}
-    COMMAND "$<TARGET_FILE:smaa_areatex>" ${GENSRC}
-    DEPENDS smaa_areatex
-  )
-  list(APPEND SRC
-    ${GENSRC}
-  )
-  unset(GENSRC)
-  unset(GENSRC_DIR)
-
   if(WITH_OPENIMAGEDENOISE)
     add_definitions(-DWITH_OPENIMAGEDENOISE)
     add_definitions(-DOIDN_STATIC_LIB)
diff --git a/source/blender/compositor/nodes/COM_AntiAliasingNode.cc b/source/blender/compositor/nodes/COM_AntiAliasingNode.cc
index d2c68b7d041..3b73bdc4f1b 100644
--- a/source/blender/compositor/nodes/COM_AntiAliasingNode.cc
+++ b/source/blender/compositor/nodes/COM_AntiAliasingNode.cc
@@ -7,37 +7,41 @@
 
 namespace blender::compositor {
 
+/* Blender encodes the threshold in the [0, 1] range, while the SMAA algorithm expects it in
+ * the [0, 0.5] range. */
+static float get_threshold(const NodeAntiAliasingData *data)
+{
+  return data->threshold / 2.0f;
+}
+
+/* Blender encodes the local contrast adaptation factor in the [0, 1] range, while the SMAA
+ * algorithm expects it in the [0, 10] range. */
+static float get_local_contrast_adaptation_factor(const NodeAntiAliasingData *data)
+{
+  return data->contrast_limit * 10.0f;
+}
+
+/* Blender encodes the corner rounding factor in the float [0, 1] range, while the SMAA algorithm
+ * expects it in the integer [0, 100] range. */
+static int get_corner_rounding(const NodeAntiAliasingData *data)
+{
+  return int(data->corner_rounding * 100.0f);
+}
+
 void AntiAliasingNode::convert_to_operations(NodeConverter &converter,
                                              const CompositorContext & /*context*/) const
 {
   const bNode *node = this->get_bnode();
   const NodeAntiAliasingData *data = (const NodeAntiAliasingData *)node->storage;
 
-  /* Edge Detection (First Pass) */
-  SMAAEdgeDetectionOperation *operation1 = nullptr;
+  SMAAOperation *operation = new SMAAOperation();
+  operation->set_threshold(get_threshold(data));
+  operation->set_local_contrast_adaptation_factor(get_local_contrast_adaptation_factor(data));
+  operation->set_corner_rounding(get_corner_rounding(data));
+  converter.add_operation(operation);
 
-  operation1 = new SMAAEdgeDetectionOperation();
-  operation1->set_threshold(data->threshold);
-  operation1->set_local_contrast_adaptation_factor(data->contrast_limit);
-  converter.add_operation(operation1);
-
-  converter.map_input_socket(get_input_socket(0), operation1->get_input_socket(0));
-
-  /* Blending Weight Calculation Pixel Shader (Second Pass) */
-  SMAABlendingWeightCalculationOperation *operation2 =
-      new SMAABlendingWeightCalculationOperation();
-  operation2->set_corner_rounding(data->corner_rounding);
-  converter.add_operation(operation2);
-
-  converter.add_link(operation1->get_output_socket(), operation2->get_input_socket(0));
-
-  /* Neighborhood Blending Pixel Shader (Third Pass) */
-  SMAANeighborhoodBlendingOperation *operation3 = new SMAANeighborhoodBlendingOperation();
-  converter.add_operation(operation3);
-
-  converter.map_input_socket(get_input_socket(0), operation3->get_input_socket(0));
-  converter.add_link(operation2->get_output_socket(), operation3->get_input_socket(1));
-  converter.map_output_socket(get_output_socket(0), operation3->get_output_socket());
+  converter.map_input_socket(get_input_socket(0), operation->get_input_socket(0));
+  converter.map_output_socket(get_output_socket(0), operation->get_output_socket());
 }
 
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/nodes/COM_CornerPinNode.cc b/source/blender/compositor/nodes/COM_CornerPinNode.cc
index 716f1e1bae6..66d9be04f3b 100644
--- a/source/blender/compositor/nodes/COM_CornerPinNode.cc
+++ b/source/blender/compositor/nodes/COM_CornerPinNode.cc
@@ -18,28 +18,13 @@ void CornerPinNode::convert_to_operations(NodeConverter &converter,
   PlaneCornerPinMaskOperation *plane_mask_operation = new PlaneCornerPinMaskOperation();
   converter.add_operation(plane_mask_operation);
 
-  SMAAEdgeDetectionOperation *smaa_edge_detection = new SMAAEdgeDetectionOperation();
-  converter.add_operation(smaa_edge_detection);
+  SMAAOperation *smaa_operation = new SMAAOperation();
+  converter.add_operation(smaa_operation);
 
   converter.add_link(plane_mask_operation->get_output_socket(),
-                     smaa_edge_detection->get_input_socket(0));
+                     smaa_operation->get_input_socket(0));
 
-  SMAABlendingWeightCalculationOperation *smaa_blending_weights =
-      new SMAABlendingWeightCalculationOperation();
-  converter.add_operation(smaa_blending_weights);
-
-  converter.add_link(smaa_edge_detection->get_output_socket(),
-                     smaa_blending_weights->get_input_socket(0));
-
-  SMAANeighborhoodBlendingOperation *smaa_neighborhood = new SMAANeighborhoodBlendingOperation();
-  converter.add_operation(smaa_neighborhood);
-
-  converter.add_link(plane_mask_operation->get_output_socket(),
-                     smaa_neighborhood->get_input_socket(0));
-  converter.add_link(smaa_blending_weights->get_output_socket(),
-                     smaa_neighborhood->get_input_socket(1));
-
-  converter.map_output_socket(this->get_output_socket(1), smaa_neighborhood->get_output_socket());
+  converter.map_output_socket(this->get_output_socket(1), smaa_operation->get_output_socket());
 
   PlaneCornerPinWarpImageOperation *warp_image_operation = new PlaneCornerPinWarpImageOperation();
   converter.add_operation(warp_image_operation);
@@ -62,7 +47,7 @@ void CornerPinNode::convert_to_operations(NodeConverter &converter,
   converter.add_operation(set_alpha_operation);
   converter.add_link(warp_image_operation->get_output_socket(),
                      set_alpha_operation->get_input_socket(0));
-  converter.add_link(smaa_neighborhood->get_output_socket(),
+  converter.add_link(smaa_operation->get_output_socket(),
                      set_alpha_operation->get_input_socket(1));
   converter.map_output_socket(this->get_output_socket(0),
                               set_alpha_operation->get_output_socket());
diff --git a/source/blender/compositor/nodes/COM_DilateErodeNode.cc b/source/blender/compositor/nodes/COM_DilateErodeNode.cc
index f087b42e507..731fdf89aaa 100644
--- a/source/blender/compositor/nodes/COM_DilateErodeNode.cc
+++ b/source/blender/compositor/nodes/COM_DilateErodeNode.cc
@@ -37,26 +37,10 @@ void DilateErodeNode::convert_to_operations(NodeConverter &converter,
     converter.map_input_socket(get_input_socket(0), operation->get_input_socket(0));
 
     if (editor_node->custom3 < 2.0f) {
-      SMAAEdgeDetectionOperation *smaa_edge_detection = new SMAAEdgeDetectionOperation();
-      converter.add_operation(smaa_edge_detection);
-
-      converter.add_link(operation->get_output_socket(), smaa_edge_detection->get_input_socket(0));
-
-      SMAABlendingWeightCalculationOperation *smaa_blending_weights =
-          new SMAABlendingWeightCalculationOperation();
-      converter.add_operation(smaa_blending_weights);
-
-      converter.add_link(smaa_edge_detection->get_output_socket(),
-                         smaa_blending_weights->get_input_socket(0));
-
-      SMAANeighborhoodBlendingOperation *smaa_neighborhood =
-          new SMAANeighborhoodBlendingOperation();
-      converter.add_operation(smaa_neighborhood);
-
-      converter.add_link(operation->get_output_socket(), smaa_neighborhood->get_input_socket(0));
-      converter.add_link(smaa_blending_weights->get_output_socket(),
-                         smaa_neighborhood->get_input_socket(1));
-      converter.map_output_socket(get_output_socket(0), smaa_neighborhood->get_output_socket());
+      SMAAOperation *smaa_operation = new SMAAOperation();
+      converter.add_operation(smaa_operation);
+      converter.add_link(operation->get_output_socket(), smaa_operation->get_input_socket(0));
+      converter.map_output_socket(get_output_socket(0), smaa_operation->get_output_socket());
     }
     else {
       converter.map_output_socket(get_output_socket(0), operation->get_output_socket(0));
diff --git a/source/blender/compositor/nodes/COM_IDMaskNode.cc b/source/blender/compositor/nodes/COM_IDMaskNode.cc
index a0b712889fe..98a0dc638e9 100644
--- a/source/blender/compositor/nodes/COM_IDMaskNode.cc
+++ b/source/blender/compositor/nodes/COM_IDMaskNode.cc
@@ -27,27 +27,10 @@ void IDMaskNode::convert_to_operations(NodeConverter &converter,
     converter.map_output_socket(get_output_socket(0), operation->get_output_socket(0));
   }
   else {
-    SMAAEdgeDetectionOperation *operation1 = nullptr;
-
-    operation1 = new SMAAEdgeDetectionOperation();
-    converter.add_operation(operation1);
-
-    converter.add_link(operation->get_output_socket(0), operation1->get_input_socket(0));
-
-    /* Blending Weight Calculation Pixel Shader (Second Pass). */
-    SMAABlendingWeightCalculationOperation *operation2 =
-        new SMAABlendingWeightCalculationOperation();
-    converter.add_operation(operation2);
-
-    converter.add_link(operation1->get_output_socket(), operation2->get_input_socket(0));
-
-    /* Neighborhood Blending Pixel Shader (Third Pass). */
-    SMAANeighborhoodBlendingOperation *operation3 = new SMAANeighborhoodBlendingOperation();
-    converter.add_operation(operation3);
-
-    converter.add_link(operation->get_output_socket(0), operation3->get_input_socket(0));
-    converter.add_link(operation2->get_output_socket(), operation3->get_input_socket(1));
-    converter.map_output_socket(get_output_socket(0), operation3->get_output_socket());
+    SMAAOperation *smaa_operation = new SMAAOperation();
+    converter.add_operation(smaa_operation);
+    converter.add_link(operation->get_output_socket(0), smaa_operation->get_input_socket(0));
+    converter.map_output_socket(get_output_socket(0), smaa_operation->get_output_socket());
   }
 }
 
diff --git a/source/blender/compositor/nodes/COM_PlaneTrackDeformNode.cc b/source/blender/compositor/nodes/COM_PlaneTrackDeformNode.cc
index 21f3d26202b..8a02e95a855 100644
--- a/source/blender/compositor/nodes/COM_PlaneTrackDeformNode.cc
+++ b/source/blender/compositor/nodes/COM_PlaneTrackDeformNode.cc
@@ -35,28 +35,13 @@ void PlaneTrackDeformNode::convert_to_operations(NodeConverter &converter,
   }
   converter.add_operation(plane_mask_operation);
 
-  SMAAEdgeDetectionOperation *smaa_edge_detection = new SMAAEdgeDetectionOperation();
-  converter.add_operation(smaa_edge_detection);
+  SMAAOperation *smaa_operation = new SMAAOperation();
+  converter.add_operation(smaa_operation);
 
   converter.add_link(plane_mask_operation->get_output_socket(),
-                     smaa_edge_detection->get_input_socket(0));
+                     smaa_operation->get_input_socket(0));
 
-  SMAABlendingWeightCalculationOperation *smaa_blending_weights =
-      new SMAABlendingWeightCalculationOperation();
-  converter.add_operation(smaa_blending_weights);
-
-  converter.add_link(smaa_edge_detection->get_output_socket(),
-                     smaa_blending_weights->get_input_socket(0));
-
-  SMAANeighborhoodBlendingOperation *smaa_neighborhood = new SMAANeighborhoodBlendingOperation();
-  converter.add_operation(smaa_neighborhood);
-
-  converter.add_link(plane_mask_operation->get_output_socket(),
-                     smaa_neighborhood->get_input_socket(0));
-  converter.add_link(smaa_blending_weights->get_output_socket(),
-                     smaa_neighborhood->get_input_socket(1));
-
-  converter.map_output_socket(this->get_output_socket(1), smaa_neighborhood->get_output_socket());
+  converter.map_output_socket(this->get_output_socket(1), smaa_operation->get_output_socket());
 
   PlaneTrackWarpImageOperation *warp_image_operation = new PlaneTrackWarpImageOperation();
   warp_image_operation->set_movie_clip(clip);
@@ -75,7 +60,7 @@ void PlaneTrackDeformNode::convert_to_operations(NodeConverter &converter,
   converter.add_operation(set_alpha_operation);
   converter.add_link(warp_image_operation->get_output_socket(),
                      set_alpha_operation->get_input_socket(0));
-  converter.add_link(smaa_neighborhood->get_output_socket(),
+  converter.add_link(smaa_operation->get_output_socket(),
                      set_alpha_operation->get_input_socket(1));
   converter.map_output_socket(this->get_output_socket(0),
                               set_alpha_operation->get_output_socket());
diff --git a/source/blender/compositor/nodes/COM_ZCombineNode.cc b/source/blender/compositor/nodes/COM_ZCombineNode.cc
index f8dd36d1db3..ed79727b7cd 100644
--- a/source/blender/compositor/nodes/COM_ZCombineNode.cc
+++ b/source/blender/compositor/nodes/COM_ZCombineNode.cc
@@ -54,25 +54,10 @@ void ZCombineNode::convert_to_operations(NodeConverter &converter,
     converter.map_input_socket(get_input_socket(3), maskoperation->get_input_socket(1));
 
     /* Step 2 anti alias mask bit of an expensive operation, but does the trick. */
-    SMAAEdgeDetectionOperation *smaa_edge_detection = new SMAAEdgeDetectionOperation();
-    converter.add_operation(smaa_edge_detection);
+    SMAAOperation *smaa_operation = new SMAAOperation();
+    converter.add_operation(smaa_operation);
 
-    converter.add_link(maskoperation->get_output_socket(),
-                       smaa_edge_detection->get_input_socket(0));
-
-    SMAABlendingWeightCalculationOperation *smaa_blending_weights =
-        new SMAABlendingWeightCalculationOperation();
-    converter.add_operation(smaa_blending_weights);
-
-    converter.add_link(smaa_edge_detection->get_output_socket(),
-                       smaa_blending_weights->get_input_socket(0));
-
-    SMAANeighborhoodBlendingOperation *smaa_neighborhood = new SMAANeighborhoodBlendingOperation();
-    converter.add_operation(smaa_neighborhood);
-
-    converter.add_link(maskoperation->get_output_socket(), smaa_neighborhood->get_input_socket(0));
-    converter.add_link(smaa_blending_weights->get_output_socket(),
-                       smaa_neighborhood->get_input_socket(1));
+    converter.add_link(maskoperation->get_output_socket(), smaa_operation->get_input_socket(0));
 
     /* use mask to blend between the input colors. */
     ZCombineMaskOperation *zcombineoperation = this->get_bnode()->custom1 ?
@@ -80,7 +65,7 @@ void ZCombineNode::convert_to_operations(NodeConverter &converter,
                                                    new ZCombineMaskOperation();
     converter.add_operation(zcombineoperation);
 
-    converter.add_link(smaa_neighborhood->get_output_socket(),
+    converter.add_link(smaa_operation->get_output_socket(),
                        zcombineoperation->get_input_socket(0));
     converter.map_input_socket(get_input_socket(0), zcombineoperation->get_input_socket(1));
     converter.map_input_socket(get_input_socket(2), zcombineoperation->get_input_socket(2));
diff --git a/source/blender/compositor/operations/COM_SMAAOperation.cc b/source/blender/compositor/operations/COM_SMAAOperation.cc
index f49a069a81e..bb96c20c94d 100644
--- a/source/blender/compositor/operations/COM_SMAAOperation.cc
+++ b/source/blender/compositor/operations/COM_SMAAOperation.cc
@@ -1,805 +1,1514 @@
-/* SPDX-FileCopyrightText: 2024 Blender Authors
+/* SPDX-FileCopyrightText: 2013 Jorge Jimenez <jorge@iryoku.com>
+ * SPDX-FileCopyrightText: 2013 Jose I. Echevarria <joseignacioechevarria@gmail.com>
+ * SPDX-FileCopyrightText: 2013 Belen Masia <bmasia@unizar.es>
+ * SPDX-FileCopyrightText: 2013 Fernando Navarro <fernandn@microsoft.com>
+ * SPDX-FileCopyrightText: 2013 Diego Gutierrez <diegog@unizar.es>
+ * SPDX-FileCopyrightText: 2019-2023 Blender Authors
  *
- * SPDX-License-Identifier: GPL-2.0-or-later */
+ * SPDX-License-Identifier: MIT AND GPL-2.0-or-later */
 
-#include "COM_SMAAOperation.h"
-#include "BKE_node.hh"
-#include "COM_SMAAAreaTexture.h"
+#include "BLI_math_vector.h"
+#include "BLI_math_vector.hh"
+#include "BLI_smaa_textures.h"
+#include "BLI_span.hh"
+#include "BLI_task.hh"
 
 #include "IMB_colormanagement.hh"
 
+#include "COM_MemoryBuffer.h"
+#include "COM_SMAAOperation.h"
+
+/**
+ *                  _______  ___  ___       ___           ___
+ *                 /       ||   \/   |     /   \         /   \
+ *                |   (---- |  \  /  |    /  ^  \       /  ^  \
+ *                 \   \    |  |\/|  |   /  /_\  \     /  /_\  \
+ *              ----)   |   |  |  |  |  /  _____  \   /  _____  \
+ *             |_______/    |__|  |__| /__/     \__\ /__/     \__\
+ *
+ *                               E N H A N C E D
+ *       S U B P I X E L   M O R P H O L O G I C A L   A N T I A L I A S I N G
+ *
+ *                         http://www.iryoku.com/smaa/
+ *
+ * Hi, welcome aboard!
+ *
+ * Here you'll find instructions to get the shader up and running as fast as
+ * possible.
+ *
+ * IMPORTANTE NOTICE: when updating, remember to update both this file and the
+ * precomputed textures! They may change from version to version.
+ *
+ * The shader has three passes, chained together as follows:
+ *
+ *                           |input|------------------�
+ *                              v                     |
+ *                    [ SMAA*EdgeDetection ]          |
+ *                              v                     |
+ *                          |edgesTex|                |
+ *                              v                     |
+ *              [ SMAABlendingWeightCalculation ]     |
+ *                              v                     |
+ *                          |blendTex|                |
+ *                              v                     |
+ *                [ SMAANeighborhoodBlending ] <------�
+ *                              v
+ *                           |output|
+ *
+ * Note that each [pass] has its own vertex and pixel shader. Remember to use
+ * oversized triangles instead of quads to avoid overshading along the
+ * diagonal.
+ *
+ * You've three edge detection methods to choose from: luma, color or depth.
+ * They represent different quality/performance and anti-aliasing/sharpness
+ * tradeoffs, so our recommendation is for you to choose the one that best
+ * suits your particular scenario:
+ *
+ * - Depth edge detection is usually the fastest but it may miss some edges.
+ *
+ * - Luma edge detection is usually more expensive than depth edge detection,
+ *   but catches visible edges that depth edge detection can miss.
+ *
+ * - Color edge detection is usually the most expensive one but catches
+ *   chroma-only edges.
+ *
+ * For quickstarters: just use luma edge detection.
+ *
+ * The general advice is to not rush the integration process and ensure each
+ * step is done correctly (don't try to integrate SMAA T2x with predicated edge
+ * detection from the start!). Ok then, let's go!
+ *
+ *  1. The first step is to create two RGBA temporal render targets for holding
+ *     |edgesTex| and |blendTex|.
+ *
+ *     In DX10 or DX11, you can use a RG render target for the edges texture.
+ *     In the case of NVIDIA GPUs, using RG render targets seems to actually be
+ *     slower.
+ *
+ *     On the Xbox 360, you can use the same render target for resolving both
+ *     |edgesTex| and |blendTex|, as they aren't needed simultaneously.
+ *
+ *  2. Both temporal render targets |edgesTex| and |blendTex| must be cleared
+ *     each frame. Do not forget to clear the alpha channel!
+ *
+ *  3. The next step is loading the two supporting precalculated textures,
+ *     'areaTex' and 'searchTex'. You'll find them in the 'Textures' folder as
+ *     C++ headers, and also as regular DDS files. They'll be needed for the
+ *     'SMAABlendingWeightCalculation' pass.
+ *
+ *     If you use the C++ headers, be sure to load them in the format specified
+ *     inside of them.
+ *
+ *     You can also compress 'areaTex' and 'searchTex' using BC5 and BC4
+ *     respectively, if you have that option in your content processor pipeline.
+ *     When compressing then, you get a non-perceptible quality decrease, and a
+ *     marginal performance increase.
+ *
+ *  4. All samplers must be set to linear filtering and clamp.
+ *
+ *     After you get the technique working, remember that 64-bit inputs have
+ *     half-rate linear filtering on GCN.
+ *
+ *     If SMAA is applied to 64-bit color buffers, switching to point filtering
+ *     when accessing them will increase the performance. Search for
+ *     'SMAASamplePoint' to see which textures may benefit from point
+ *     filtering, and where (which is basically the color input in the edge
+ *     detection and resolve passes).
+ *
+ *  5. All texture reads and buffer writes must be non-sRGB, with the exception
+ *     of the input read and the output write in
+ *     'SMAANeighborhoodBlending' (and only in this pass!). If sRGB reads in
+ *     this last pass are not possible, the technique will work anyway, but
+ *     will perform antialiasing in gamma space.
+ *
+ *     IMPORTANT: for best results the input read for the color/luma edge
+ *     detection should *NOT* be sRGB.
+ *
+ *  6. Before including SMAA.h you'll have to setup the render target metrics,
+ *     the target and any optional configuration defines. Optionally you can
+ *     use a preset.
+ *
+ *     You have the following targets available:
+ *         SMAA_HLSL_3
+ *         SMAA_HLSL_4
+ *         SMAA_HLSL_4_1
+ *         SMAA_GLSL_3 *
+ *         SMAA_GLSL_4 *
+ *
+ *         * (See SMAA_INCLUDE_VS and SMAA_INCLUDE_PS below).
+ *
+ *     And four presets:
+ *         SMAA_PRESET_LOW          (%60 of the quality)
+ *         SMAA_PRESET_MEDIUM       (%80 of the quality)
+ *         SMAA_PRESET_HIGH         (%95 of the quality)
+ *         SMAA_PRESET_ULTRA        (%99 of the quality)
+ *
+ *     For example:
+ *         #define SMAA_RT_METRICS float4(1.0 / 1280.0, 1.0 / 720.0, 1280.0, 720.0)
+ *         #define SMAA_HLSL_4
+ *         #define SMAA_PRESET_HIGH
+ *         #include "SMAA.h"
+ *
+ *     Note that SMAA_RT_METRICS doesn't need to be a macro, it can be a
+ *     uniform variable. The code is designed to minimize the impact of not
+ *     using a constant value, but it is still better to hardcode it.
+ *
+ *     Depending on how you encoded 'areaTex' and 'searchTex', you may have to
+ *     add (and customize) the following defines before including SMAA.h:
+ *          #define SMAA_AREATEX_SELECT(sample) sample.rg
+ *          #define SMAA_SEARCHTEX_SELECT(sample) sample.r
+ *
+ *     If your engine is already using porting macros, you can define
+ *     SMAA_CUSTOM_SL, and define the porting functions by yourself.
+ *
+ *  7. Then, you'll have to setup the passes as indicated in the scheme above.
+ *     You can take a look into SMAA.fx, to see how we did it for our demo.
+ *     Checkout the function wrappers, you may want to copy-paste them!
+ *
+ *  8. It's recommended to validate the produced |edgesTex| and |blendTex|.
+ *     You can use a screenshot from your engine to compare the |edgesTex|
+ *     and |blendTex| produced inside of the engine with the results obtained
+ *     with the reference demo.
+ *
+ *  9. After you get the last pass to work, it's time to optimize. You'll have
+ *     to initialize a stencil buffer in the first pass (discard is already in
+ *     the code), then mask execution by using it the second pass. The last
+ *     pass should be executed in all pixels.
+ *
+ *
+ * After this point you can choose to enable predicated thresholding,
+ * temporal supersampling and motion blur integration:
+ *
+ * a) If you want to use predicated thresholding, take a look into
+ *    SMAA_PREDICATION; you'll need to pass an extra texture in the edge
+ *    detection pass.
+ *
+ * b) If you want to enable temporal supersampling (SMAA T2x):
+ *
+ * 1. The first step is to render using subpixel jitters. I won't go into
+ *    detail, but it's as simple as moving each vertex position in the
+ *    vertex shader, you can check how we do it in our DX10 demo.
+ *
+ * 2. Then, you must setup the temporal resolve. You may want to take a look
+ *    into SMAAResolve for resolving 2x modes. After you get it working, you'll
+ *    probably see ghosting everywhere. But fear not, you can enable the
+ *    CryENGINE temporal reprojection by setting the SMAA_REPROJECTION macro.
+ *    Check out SMAA_DECODE_VELOCITY if your velocity buffer is encoded.
+ *
+ * 3. The next step is to apply SMAA to each subpixel jittered frame, just as
+ *    done for 1x.
+ *
+ * 4. At this point you should already have something usable, but for best
+ *    results the proper area textures must be set depending on current jitter.
+ *    For this, the parameter 'subsampleIndices' of
+ *    'SMAABlendingWeightCalculationPS' must be set as follows, for our T2x
+ *    mode:
+ *
+ *    @SUBSAMPLE_INDICES
+ *
+ *    | S# |  Camera Jitter   |  subsampleIndices    |
+ *    +----+------------------+---------------------+
+ *    |  0 |  ( 0.25, -0.25)  |  float4(1, 1, 1, 0)  |
+ *    |  1 |  (-0.25,  0.25)  |  float4(2, 2, 2, 0)  |
+ *
+ *    These jitter positions assume a bottom-to-top y axis. S# stands for the
+ *    sample number.
+ *
+ * More information about temporal supersampling here:
+ *    http://iryoku.com/aacourse/downloads/13-Anti-Aliasing-Methods-in-CryENGINE-3.pdf
+ *
+ * c) If you want to enable spatial multisampling (SMAA S2x):
+ *
+ * 1. The scene must be rendered using MSAA 2x. The MSAA 2x buffer must be
+ *    created with:
+ *      - DX10:     see below (*)
+ *      - DX10.1:   D3D10_STANDARD_MULTISAMPLE_PATTERN or
+ *      - DX11:     D3D11_STANDARD_MULTISAMPLE_PATTERN
+ *
+ *    This allows to ensure that the subsample order matches the table in
+ *    @SUBSAMPLE_INDICES.
+ *
+ *    (*) In the case of DX10, we refer the reader to:
+ *      - SMAA::detectMSAAOrder and
+ *      - SMAA::msaaReorder
+ *
+ *    These functions allow matching the standard multisample patterns by
+ *    detecting the subsample order for a specific GPU, and reordering
+ *    them appropriately.
+ *
+ * 2. A shader must be run to output each subsample into a separate buffer
+ *    (DX10 is required). You can use SMAASeparate for this purpose, or just do
+ *    it in an existing pass (for example, in the tone mapping pass, which has
+ *    the advantage of feeding tone mapped subsamples to SMAA, which will yield
+ *    better results).
+ *
+ * 3. The full SMAA 1x pipeline must be run for each separated buffer, storing
+ *    the results in the final buffer. The second run should alpha blend with
+ *    the existing final buffer using a blending factor of 0.5.
+ *    'subsampleIndices' must be adjusted as in the SMAA T2x case (see point
+ *    b).
+ *
+ * d) If you want to enable temporal supersampling on top of SMAA S2x
+ *    (which actually is SMAA 4x):
+ *
+ * 1. SMAA 4x consists on temporally jittering SMAA S2x, so the first step is
+ *    to calculate SMAA S2x for current frame. In this case, 'subsampleIndices'
+ *    must be set as follows:
+ *
+ *    | F# | S# |   Camera Jitter    |    Net Jitter     |   subsampleIndices   |
+ *    +----+----+--------------------+-------------------+----------------------+
+ *    |  0 |  0 |  ( 0.125,  0.125)  |  ( 0.375, -0.125) |  float4(5, 3, 1, 3)  |
+ *    |  0 |  1 |  ( 0.125,  0.125)  |  (-0.125,  0.375) |  float4(4, 6, 2, 3)  |
+ *    +----+----+--------------------+-------------------+----------------------+
+ *    |  1 |  2 |  (-0.125, -0.125)  |  ( 0.125, -0.375) |  float4(3, 5, 1, 4)  |
+ *    |  1 |  3 |  (-0.125, -0.125)  |  (-0.375,  0.125) |  float4(6, 4, 2, 4)  |
+ *
+ *    These jitter positions assume a bottom-to-top y axis. F# stands for the
+ *    frame number. S# stands for the sample number.
+ *
+ * 2. After calculating SMAA S2x for current frame (with the new subsample
+ *    indices), previous frame must be reprojected as in SMAA T2x mode (see
+ *    point b).
+ *
+ * e) If motion blur is used, you may want to do the edge detection pass
+ *    together with motion blur. This has two advantages:
+ *
+ * 1. Pixels under heavy motion can be omitted from the edge detection process.
+ *    For these pixels we can just store "no edge", as motion blur will take
+ *    care of them.
+ * 2. The center pixel tap is reused.
+ *
+ * Note that in this case depth testing should be used instead of stenciling,
+ * as we have to write all the pixels in the motion blur pass.
+ *
+ * That's it!
+ */
+
+/* ----------------------------------------------------------------------------
+ * Blender's Defines */
+
+#define SMAA_CUSTOM_SL
+#define SMAA_AREATEX_SELECT(sample) sample.xy()
+#define SMAA_SEARCHTEX_SELECT(sample) sample.x
+#define SMAATexture2D(tex) const MemoryBuffer *tex
+#define SMAATexturePass2D(tex) tex
+#define SMAASampleLevelZero(tex, coord) tex->texture_bilinear_extend(coord)
+#define SMAASampleLevelZeroPoint(tex, coord) tex->texture_bilinear_extend(coord)
+#define SMAASampleLevelZeroOffset(tex, coord, offset, size) \
+  tex->texture_bilinear_extend(coord + float2(offset) / float2(size))
+#define SMAASample(tex, coord) tex->texture_bilinear_extend(coord)
+#define SMAASamplePoint(tex, coord) tex->texture_nearest_extend(coord)
+#define SMAASamplePointOffset(tex, coord, offset, size) \
+  tex->texture_nearest_extend(coord + float2(offset) / float2(size))
+#define SMAASampleOffset(tex, coord, offset, size) \
+  tex->texture_bilinear_extend(coord + float2(offset) / float2(size))
+#define SMAA_FLATTEN
+#define SMAA_BRANCH
+#define lerp(a, b, t) math::interpolate(a, b, t)
+#define saturate(a) math::clamp(a, 0.0f, 1.0f)
+#define mad(a, b, c) (a * b + c)
+
+/* ----------------------------------------------------------------------------
+ * SMAA Presets */
+
+/**
+ * Note that if you use one of these presets, the following configuration
+ * macros will be ignored if set in the "Configurable Defines" section.
+ */
+
+#if defined(SMAA_PRESET_LOW)
+#  define SMAA_THRESHOLD 0.15f
+#  define SMAA_MAX_SEARCH_STEPS 4
+#  define SMAA_DISABLE_DIAG_DETECTION
+#  define SMAA_DISABLE_CORNER_DETECTION
+#elif defined(SMAA_PRESET_MEDIUM)
+#  define SMAA_THRESHOLD 0.1f
+#  define SMAA_MAX_SEARCH_STEPS 8
+#  define SMAA_DISABLE_DIAG_DETECTION
+#  define SMAA_DISABLE_CORNER_DETECTION
+#elif defined(SMAA_PRESET_HIGH)
+#  define SMAA_THRESHOLD 0.1f
+#  define SMAA_MAX_SEARCH_STEPS 16
+#  define SMAA_MAX_SEARCH_STEPS_DIAG 8
+#  define SMAA_CORNER_ROUNDING 25
+#elif defined(SMAA_PRESET_ULTRA)
+#  define SMAA_THRESHOLD 0.05f
+#  define SMAA_MAX_SEARCH_STEPS 32
+#  define SMAA_MAX_SEARCH_STEPS_DIAG 16
+#  define SMAA_CORNER_ROUNDING 25
+#endif
+
+/* ----------------------------------------------------------------------------
+ * Configurable Defines */
+
+/**
+ * SMAA_THRESHOLD specifies the threshold or sensitivity to edges.
+ * Lowering this value you will be able to detect more edges at the expense of
+ * performance.
+ *
+ * Range: [0, 0.5]
+ *   0.1 is a reasonable value, and allows to catch most visible edges.
+ *   0.05 is a rather overkill value, that allows to catch 'em all.
+ *
+ *   If temporal supersampling is used, 0.2 could be a reasonable value, as low
+ *   contrast edges are properly filtered by just 2x.
+ */
+#ifndef SMAA_THRESHOLD
+#  define SMAA_THRESHOLD 0.1f
+#endif
+
+/**
+ * SMAA_DEPTH_THRESHOLD specifies the threshold for depth edge detection.
+ *
+ * Range: depends on the depth range of the scene.
+ */
+#ifndef SMAA_DEPTH_THRESHOLD
+#  define SMAA_DEPTH_THRESHOLD (0.1f * SMAA_THRESHOLD)
+#endif
+
+/**
+ * SMAA_MAX_SEARCH_STEPS specifies the maximum steps performed in the
+ * horizontal/vertical pattern searches, at each side of the pixel.
+ *
+ * In number of pixels, it's actually the double. So the maximum line length
+ * perfectly handled by, for example 16, is 64 (by perfectly, we meant that
+ * longer lines won't look as good, but still antialiased).
+ *
+ * Range: [0, 112]
+ */
+#ifndef SMAA_MAX_SEARCH_STEPS
+#  define SMAA_MAX_SEARCH_STEPS 16
+#endif
+
+/**
+ * SMAA_MAX_SEARCH_STEPS_DIAG specifies the maximum steps performed in the
+ * diagonal pattern searches, at each side of the pixel. In this case we jump
+ * one pixel at time, instead of two.
+ *
+ * Range: [0, 20]
+ *
+ * On high-end machines it is cheap (between a 0.8x and 0.9x slower for 16
+ * steps), but it can have a significant impact on older machines.
+ *
+ * Define SMAA_DISABLE_DIAG_DETECTION to disable diagonal processing.
+ */
+#ifndef SMAA_MAX_SEARCH_STEPS_DIAG
+#  define SMAA_MAX_SEARCH_STEPS_DIAG 8
+#endif
+
+/**
+ * SMAA_CORNER_ROUNDING specifies how much sharp corners will be rounded.
+ *
+ * Range: [0, 100]
+ *
+ * Define SMAA_DISABLE_CORNER_DETECTION to disable corner processing.
+ */
+#ifndef SMAA_CORNER_ROUNDING
+#  define SMAA_CORNER_ROUNDING 25
+#endif
+
+/**
+ * If there is an neighbor edge that has SMAA_LOCAL_CONTRAST_FACTOR times
+ * bigger contrast than current edge, current edge will be discarded.
+ *
+ * This allows to eliminate spurious crossing edges, and is based on the fact
+ * that, if there is too much contrast in a direction, that will hide
+ * perceptually contrast in the other neighbors.
+ */
+#ifndef SMAA_LOCAL_CONTRAST_ADAPTATION_FACTOR
+#  define SMAA_LOCAL_CONTRAST_ADAPTATION_FACTOR 2.0f
+#endif
+
+/**
+ * Predicated thresholding allows to better preserve texture details and to
+ * improve performance, by decreasing the number of detected edges using an
+ * additional buffer like the light accumulation buffer, object ids or even the
+ * depth buffer (the depth buffer usage may be limited to indoor or short range
+ * scenes).
+ *
+ * It locally decreases the luma or color threshold if an edge is found in an
+ * additional buffer (so the global threshold can be higher).
+ *
+ * This method was developed by Playstation EDGE MLAA team, and used in
+ * Killzone 3, by using the light accumulation buffer. More information here:
+ *     http://iryoku.com/aacourse/downloads/06-MLAA-on-PS3.pptx
+ */
+#ifndef SMAA_PREDICATION
+#  define SMAA_PREDICATION 0
+#endif
+
+/**
+ * Threshold to be used in the additional predication buffer.
+ *
+ * Range: depends on the input, so you'll have to find the magic number that
+ * works for you.
+ */
+#ifndef SMAA_PREDICATION_THRESHOLD
+#  define SMAA_PREDICATION_THRESHOLD 0.01f
+#endif
+
+/**
+ * How much to scale the global threshold used for luma or color edge
+ * detection when using predication.
+ *
+ * Range: [1, 5]
+ */
+#ifndef SMAA_PREDICATION_SCALE
+#  define SMAA_PREDICATION_SCALE 2.0f
+#endif
+
+/**
+ * How much to locally decrease the threshold.
+ *
+ * Range: [0, 1]
+ */
+#ifndef SMAA_PREDICATION_STRENGTH
+#  define SMAA_PREDICATION_STRENGTH 0.4f
+#endif
+
+/**
+ * Temporal reprojection allows to remove ghosting artifacts when using
+ * temporal supersampling. We use the CryEngine 3 method which also introduces
+ * velocity weighting. This feature is of extreme importance for totally
+ * removing ghosting. More information here:
+ *    http://iryoku.com/aacourse/downloads/13-Anti-Aliasing-Methods-in-CryENGINE-3.pdf
+ *
+ * Note that you'll need to setup a velocity buffer for enabling reprojection.
+ * For static geometry, saving the previous depth buffer is a viable
+ * alternative.
+ */
+#ifndef SMAA_REPROJECTION
+#  define SMAA_REPROJECTION 0
+#endif
+
+/**
+ * SMAA_REPROJECTION_WEIGHT_SCALE controls the velocity weighting. It allows to
+ * remove ghosting trails behind the moving object, which are not removed by
+ * just using reprojection. Using low values will exhibit ghosting, while using
+ * high values will disable temporal supersampling under motion.
+ *
+ * Behind the scenes, velocity weighting removes temporal supersampling when
+ * the velocity of the subsamples differs (meaning they are different objects).
+ *
+ * Range: [0, 80]
+ */
+#ifndef SMAA_REPROJECTION_WEIGHT_SCALE
+#  define SMAA_REPROJECTION_WEIGHT_SCALE 30.0f
+#endif
+
+/**
+ * On some compilers, discard cannot be used in vertex shaders. Thus, they need
+ * to be compiled separately.
+ */
+#ifndef SMAA_INCLUDE_VS
+#  define SMAA_INCLUDE_VS 1
+#endif
+#ifndef SMAA_INCLUDE_PS
+#  define SMAA_INCLUDE_PS 1
+#endif
+
+/* ----------------------------------------------------------------------------
+ * Texture Access Defines */
+
+#ifndef SMAA_AREATEX_SELECT
+#  if defined(SMAA_HLSL_3)
+#    define SMAA_AREATEX_SELECT(sample) sample.ra
+#  else
+#    define SMAA_AREATEX_SELECT(sample) sample.rg
+#  endif
+#endif
+
+#ifndef SMAA_SEARCHTEX_SELECT
+#  define SMAA_SEARCHTEX_SELECT(sample) sample.r
+#endif
+
+#ifndef SMAA_DECODE_VELOCITY
+#  define SMAA_DECODE_VELOCITY(sample) sample.rg
+#endif
+
+/* ----------------------------------------------------------------------------
+ * Non-Configurable Defines */
+
+#define SMAA_AREATEX_MAX_DISTANCE 16
+#define SMAA_AREATEX_MAX_DISTANCE_DIAG 20
+#define SMAA_AREATEX_PIXEL_SIZE (1.0f / float2(160.0f, 560.0f))
+#define SMAA_AREATEX_SUBTEX_SIZE (1.0f / 7.0f)
+#define SMAA_SEARCHTEX_SIZE float2(66.0f, 33.0f)
+#define SMAA_SEARCHTEX_PACKED_SIZE float2(64.0f, 16.0f)
+#define SMAA_CORNER_ROUNDING_NORM (float(SMAA_CORNER_ROUNDING) / 100.0f)
+
+/* ----------------------------------------------------------------------------
+ * Porting Functions */
+
+#if defined(SMAA_HLSL_3)
+#  define SMAATexture2D(tex) sampler2D tex
+#  define SMAATexturePass2D(tex) tex
+#  define SMAASampleLevelZero(tex, coord) tex2Dlod(tex, float4(coord, 0.0, 0.0))
+#  define SMAASampleLevelZeroPoint(tex, coord) tex2Dlod(tex, float4(coord, 0.0, 0.0))
+/* clang-format off */
+#  define SMAASampleLevelZeroOffset(tex, coord, offset) tex2Dlod(tex, float4(coord + offset * SMAA_RT_METRICS.xy, 0.0, 0.0))
+/* clang-format on */
+#  define SMAASample(tex, coord) tex2D(tex, coord)
+#  define SMAASamplePoint(tex, coord) tex2D(tex, coord)
+#  define SMAASampleOffset(tex, coord, offset) tex2D(tex, coord + offset * SMAA_RT_METRICS.xy)
+#  define SMAA_FLATTEN [flatten]
+#  define SMAA_BRANCH [branch]
+#endif
+#if defined(SMAA_HLSL_4) || defined(SMAA_HLSL_4_1)
+SamplerState LinearSampler
+{
+  Filter = MIN_MAG_LINEAR_MIP_POINT;
+  AddressU = Clamp;
+  AddressV = Clamp;
+};
+SamplerState PointSampler
+{
+  Filter = MIN_MAG_MIP_POINT;
+  AddressU = Clamp;
+  AddressV = Clamp;
+};
+#  define SMAATexture2D(tex) Texture2D tex
+#  define SMAATexturePass2D(tex) tex
+#  define SMAASampleLevelZero(tex, coord) tex.SampleLevel(LinearSampler, coord, 0)
+#  define SMAASampleLevelZeroPoint(tex, coord) tex.SampleLevel(PointSampler, coord, 0)
+/* clang-format off */
+#  define SMAASampleLevelZeroOffset(tex, coord, offset) tex.SampleLevel(LinearSampler, coord, 0, offset)
+/* clang-format on */
+#  define SMAASample(tex, coord) tex.Sample(LinearSampler, coord)
+#  define SMAASamplePoint(tex, coord) tex.Sample(PointSampler, coord)
+#  define SMAASampleOffset(tex, coord, offset) tex.Sample(LinearSampler, coord, offset)
+#  define SMAA_FLATTEN [flatten]
+#  define SMAA_BRANCH [branch]
+#  define SMAATexture2DMS2(tex) Texture2DMS<float4, 2> tex
+#  define SMAALoad(tex, pos, sample) tex.Load(pos, sample)
+#  if defined(SMAA_HLSL_4_1)
+#    define SMAAGather(tex, coord) tex.Gather(LinearSampler, coord, 0)
+#  endif
+#endif
+#if defined(SMAA_GLSL_3) || defined(SMAA_GLSL_4) || defined(GPU_METAL) || defined(GPU_VULKAN)
+#  define SMAATexture2D(tex) sampler2D tex
+#  define SMAATexturePass2D(tex) tex
+#  define SMAASampleLevelZero(tex, coord) textureLod(tex, coord, 0.0)
+#  define SMAASampleLevelZeroPoint(tex, coord) textureLod(tex, coord, 0.0)
+#  define SMAASampleLevelZeroOffset(tex, coord, offset) textureLodOffset(tex, coord, 0.0, offset)
+#  define SMAASample(tex, coord) texture(tex, coord)
+#  define SMAASamplePoint(tex, coord) texture(tex, coord)
+#  define SMAASampleOffset(tex, coord, offset) texture(tex, coord, offset)
+#  define SMAA_FLATTEN
+#  define SMAA_BRANCH
+#  define lerp(a, b, t) mix(a, b, t)
+#  define saturate(a) clamp(a, 0.0, 1.0)
+#  if defined(SMAA_GLSL_4)
+#    define SMAAGather(tex, coord) textureGather(tex, coord)
+#  endif
+#  if defined(SMAA_GLSL_4)
+#    define mad(a, b, c) fma(a, b, c)
+#  elif defined(GPU_VULKAN)
+/* NOTE(Vulkan) mad macro doesn't work, define each override as work-around. */
+vec4 mad(vec4 a, vec4 b, vec4 c)
+{
+  return fma(a, b, c);
+}
+vec3 mad(vec3 a, vec3 b, vec3 c)
+{
+  return fma(a, b, c);
+}
+vec2 mad(vec2 a, vec2 b, vec2 c)
+{
+  return fma(a, b, c);
+}
+float mad(float a, float b, float c)
+{
+  return fma(a, b, c);
+}
+#  else
+#    define mad(a, b, c) (a * b + c)
+#  endif
+/* NOTE(Metal): Types already natively declared in MSL. */
+#  ifndef GPU_METAL
+#    define float2 vec2
+#    define float3 vec3
+#    define float4 vec4
+#    define int2 ivec2
+#    define int3 ivec3
+#    define int4 ivec4
+#    define bool2 bvec2
+#    define bool3 bvec3
+#    define bool4 bvec4
+#  endif
+#endif
+
+/* clang-format off */
+#if !defined(SMAA_HLSL_3) && !defined(SMAA_HLSL_4) && !defined(SMAA_HLSL_4_1) && !defined(SMAA_GLSL_3) && !defined(SMAA_GLSL_4) && !defined(SMAA_CUSTOM_SL)
+#  error you must define the shading language: SMAA_HLSL_*, SMAA_GLSL_* or SMAA_CUSTOM_SL
+#endif
+/* clang-format on */
+
 namespace blender::compositor {
 
-/*
- * An implementation of Enhanced Sub-pixel Morphological Anti-aliasing (SMAA)
- *
- * The algorithm was proposed by:
- *   Jorge Jimenez, Jose I. Echevarria, Tiago Sousa, Diego Gutierrez
- *
- *   http://www.iryoku.com/smaa/
- *
- * This file is based on SMAA-CPP:
- *
- *   https://github.com/i_ri-E/smaa-cpp
- *
- * Currently only SMAA 1x mode is provided, so the operation will be done
- * with no spatial multi-sampling nor temporal super-sampling.
- *
- * NOTE: This program assumes the screen coordinates are DirectX style, so
- * the vertical direction is upside-down. "top" and "bottom" actually mean
- * bottom and top, respectively.
+/* ----------------------------------------------------------------------------
+ * Misc functions */
+
+/**
+ * Conditional move:
  */
-
-/*-----------------------------------------------------------------------------*/
-/* Non-Configurable Defines */
-
-#define SMAA_AREATEX_SIZE 80
-#define SMAA_AREATEX_MAX_DISTANCE 20
-#define SMAA_AREATEX_MAX_DISTANCE_DIAG 20
-#define SMAA_MAX_SEARCH_STEPS 362 /* 362 - 1 = 19^2 */
-#define SMAA_MAX_SEARCH_STEPS_DIAG 19
-
-/*-----------------------------------------------------------------------------*/
-/* Internal Functions to Sample Pixel Color from Image */
-
-static inline void sample(MemoryBuffer *reader, int x, int y, float color[4])
+static void SMAAMovc(float2 cond, float2 &variable, float2 value)
 {
-  reader->read_elem_checked(x, y, color);
+  /* Use select function (select(genType A, genType B, genBType cond)). */
+  variable = math::interpolate(variable, value, cond);
 }
 
-template<typename T>
-static void sample_bilinear_vertical(T *reader, int x, int y, float yoffset, float color[4])
+static void SMAAMovc(float4 cond, float4 &variable, float4 value)
 {
-  float iy = floorf(yoffset);
-  float fy = yoffset - iy;
-  y += int(iy);
-
-  float color00[4], color01[4];
-
-  sample(reader, x + 0, y + 0, color00);
-  sample(reader, x + 0, y + 1, color01);
-
-  color[0] = interpf(color01[0], color00[0], fy);
-  color[1] = interpf(color01[1], color00[1], fy);
-  color[2] = interpf(color01[2], color00[2], fy);
-  color[3] = interpf(color01[3], color00[3], fy);
+  /* Use select function (select(genType A, genType B, genBType cond)). */
+  variable = math::interpolate(variable, value, cond);
 }
 
-template<typename T>
-static void sample_bilinear_horizontal(T *reader, int x, int y, float xoffset, float color[4])
+#if SMAA_INCLUDE_VS
+/* ----------------------------------------------------------------------------
+ * Vertex Shaders */
+
+/**
+ * Edge Detection Vertex Shader
+ */
+static void SMAAEdgeDetectionVS(float2 texcoord, int2 size, float4 offset[3])
 {
-  float ix = floorf(xoffset);
-  float fx = xoffset - ix;
-  x += int(ix);
-
-  float color00[4], color10[4];
-
-  sample(reader, x + 0, y + 0, color00);
-  sample(reader, x + 1, y + 0, color10);
-
-  color[0] = interpf(color10[0], color00[0], fx);
-  color[1] = interpf(color10[1], color00[1], fx);
-  color[2] = interpf(color10[2], color00[2], fx);
-  color[3] = interpf(color10[3], color00[3], fx);
-}
-
-/*-----------------------------------------------------------------------------*/
-/* Internal Functions to Sample Blending Weights from AreaTex */
-
-static inline const float *areatex_sample_internal(const float *areatex, int x, int y)
-{
-  return &areatex[(std::clamp(x, 0, SMAA_AREATEX_SIZE - 1) +
-                   std::clamp(y, 0, SMAA_AREATEX_SIZE - 1) * SMAA_AREATEX_SIZE) *
-                  2];
+  offset[0] = float4(texcoord.xy(), texcoord.xy()) +
+              float4(-1.0f, 0.0f, 0.0f, -1.0f) / float4(size, size);
+  offset[1] = float4(texcoord.xy(), texcoord.xy()) +
+              float4(1.0f, 0.0f, 0.0f, 1.0f) / float4(size, size);
+  offset[2] = float4(texcoord.xy(), texcoord.xy()) +
+              float4(-2.0f, 0.0f, 0.0f, -2.0f) / float4(size, size);
 }
 
 /**
- * We have the distance and both crossing edges. So, what are the areas
- * at each side of current edge?
+ * Blend Weight Calculation Vertex Shader
  */
-static void area(int d1, int d2, int e1, int e2, float weights[2])
+static void SMAABlendingWeightCalculationVS(float2 texcoord,
+                                            int2 size,
+                                            float2 &pixcoord,
+                                            float4 offset[3])
 {
-  /* The areas texture is compressed  quadratically: */
-  float x = float(SMAA_AREATEX_MAX_DISTANCE * e1) + sqrtf(float(d1));
-  float y = float(SMAA_AREATEX_MAX_DISTANCE * e2) + sqrtf(float(d2));
+  pixcoord = texcoord * float2(size);
 
-  float ix = floorf(x), iy = floorf(y);
-  float fx = x - ix, fy = y - iy;
-  int X = int(ix), Y = int(iy);
+  // We will use these offsets for the searches later on (see @PSEUDO_GATHER4):
+  offset[0] = float4(texcoord.xy(), texcoord.xy()) +
+              float4(-0.25f, -0.125f, 1.25f, -0.125f) / float4(size, size);
+  offset[1] = float4(texcoord.xy(), texcoord.xy()) +
+              float4(-0.125f, -0.25f, -0.125f, 1.25f) / float4(size, size);
 
-  const float *weights00 = areatex_sample_internal(areatex, X + 0, Y + 0);
-  const float *weights10 = areatex_sample_internal(areatex, X + 1, Y + 0);
-  const float *weights01 = areatex_sample_internal(areatex, X + 0, Y + 1);
-  const float *weights11 = areatex_sample_internal(areatex, X + 1, Y + 1);
-
-  weights[0] = interpf(
-      interpf(weights11[0], weights01[0], fx), interpf(weights10[0], weights00[0], fx), fy);
-  weights[1] = interpf(
-      interpf(weights11[1], weights01[1], fx), interpf(weights10[1], weights00[1], fx), fy);
+  // And these for the searches, they indicate the ends of the loops:
+  offset[2] = float4(offset[0].x, offset[0].z, offset[1].y, offset[1].w) +
+              (float4(-2.0f, 2.0f, -2.0f, 2.0f) * float(SMAA_MAX_SEARCH_STEPS)) /
+                  float4(float2(size.x), float2(size.y));
 }
 
 /**
- * Similar to area(), this calculates the area corresponding to a certain
+ * Neighborhood Blending Vertex Shader
+ */
+static void SMAANeighborhoodBlendingVS(float2 texcoord, int2 size, float4 &offset)
+{
+  offset = float4(texcoord, texcoord) + float4(1.0f, 0.0f, 0.0f, 1.0f) / float4(size, size);
+}
+#endif  // SMAA_INCLUDE_VS
+
+/**
+ * Luma Edge Detection
+ *
+ * IMPORTANT NOTICE: luma edge detection requires gamma-corrected colors, and
+ * thus 'colorTex' should be a non-sRGB texture.
+ */
+static float2 SMAALumaEdgeDetectionPS(float2 texcoord,
+                                      float4 offset[3],
+                                      SMAATexture2D(colorTex),
+#if SMAA_PREDICATION
+                                      SMAATexture2D(predicationTex),
+#endif
+                                      float edge_threshold,
+                                      float3 luminance_coefficients,
+                                      float local_contrast_adaptation_factor)
+{
+#if SMAA_PREDICATION
+  float2 threshold = SMAACalculatePredicatedThreshold(
+      texcoord, offset, SMAATexturePass2D(predicationTex));
+#else
+  // Calculate the threshold:
+  float2 threshold = float2(edge_threshold, edge_threshold);
+#endif
+
+  // Calculate lumas:
+  // float4 weights = float4(0.2126, 0.7152, 0.0722, 0.0);
+  float4 weights = float4(luminance_coefficients, 0.0f);
+  float L = math::dot(SMAASamplePoint(colorTex, texcoord), weights);
+
+  float Lleft = math::dot(SMAASamplePoint(colorTex, offset[0].xy()), weights);
+  float Ltop = math::dot(SMAASamplePoint(colorTex, offset[0].zw()), weights);
+
+  // We do the usual threshold:
+  float4 delta;
+  float2 delta_left_top = math::abs(L - float2(Lleft, Ltop));
+  delta.x = delta_left_top.x;
+  delta.y = delta_left_top.y;
+  float2 edges = math::step(threshold, delta.xy());
+
+  // Then return early if there is no edge:
+  if (math::dot(edges, float2(1.0f, 1.0f)) == 0.0f) {
+    return float2(0.0f);
+  }
+
+  // Calculate right and bottom deltas:
+  float Lright = math::dot(SMAASamplePoint(colorTex, offset[1].xy()), weights);
+  float Lbottom = math::dot(SMAASamplePoint(colorTex, offset[1].zw()), weights);
+  float2 delta_right_bottom = math::abs(L - float2(Lright, Lbottom));
+  delta.z = delta_right_bottom.x;
+  delta.w = delta_right_bottom.y;
+
+  // Calculate the maximum delta in the direct neighborhood:
+  float2 maxDelta = math::max(delta.xy(), delta.zw());
+
+  // Calculate left-left and top-top deltas:
+  float Lleftleft = math::dot(SMAASamplePoint(colorTex, offset[2].xy()), weights);
+  float Ltoptop = math::dot(SMAASamplePoint(colorTex, offset[2].zw()), weights);
+  float2 delta_left_left_top_top = math::abs(float2(Lleft, Ltop) - float2(Lleftleft, Ltoptop));
+  delta.z = delta_left_left_top_top.x;
+  delta.w = delta_left_left_top_top.y;
+
+  // Calculate the final maximum delta:
+  maxDelta = math::max(maxDelta.xy(), delta.zw());
+  float finalDelta = math::max(maxDelta.x, maxDelta.y);
+
+  // Local contrast adaptation:
+  edges *= math::step(finalDelta, local_contrast_adaptation_factor * delta.xy());
+
+  return edges;
+}
+
+/* ----------------------------------------------------------------------------
+ * Diagonal Search Functions */
+
+#if !defined(SMAA_DISABLE_DIAG_DETECTION)
+
+/**
+ * Allows to decode two binary values from a bilinear-filtered access.
+ */
+static float2 SMAADecodeDiagBilinearAccess(float2 e)
+{
+  // Bilinear access for fetching 'e' have a 0.25 offset, and we are
+  // interested in the R and G edges:
+  //
+  // +---G---+-------+
+  // |   x o R   x   |
+  // +-------+-------+
+  //
+  // Then, if one of these edge is enabled:
+  //   Red:   (0.75 * X + 0.25 * 1) => 0.25 or 1.0
+  //   Green: (0.75 * 1 + 0.25 * X) => 0.75 or 1.0
+  //
+  // This function will unpack the values (mad + mul + round):
+  // wolframalpha.com: round(x * abs(5 * x - 5 * 0.75)) plot 0 to 1
+  e.x = e.x * math::abs(5.0f * e.x - 5.0f * 0.75f);
+  return math::round(e);
+}
+
+static float4 SMAADecodeDiagBilinearAccess(float4 e)
+{
+  e.x = e.x * math::abs(5.0f * e.x - 5.0f * 0.75f);
+  e.z = e.z * math::abs(5.0f * e.z - 5.0f * 0.75f);
+  return math::round(e);
+}
+
+/**
+ * These functions allows to perform diagonal pattern searches.
+ */
+static float2 SMAASearchDiag1(
+    SMAATexture2D(edgesTex), float2 texcoord, float2 dir, int2 size, float2 &e)
+{
+  float4 coord = float4(texcoord, -1.0f, 1.0f);
+  float3 t = float3(1.0f / float2(size), 1.0f);
+  while (coord.z < float(SMAA_MAX_SEARCH_STEPS_DIAG - 1) && coord.w > 0.9f) {
+    float3 increment = mad(t, float3(dir, 1.0f), coord.xyz());
+    coord.x = increment.x;
+    coord.y = increment.y;
+    coord.z = increment.z;
+    e = SMAASamplePoint(edgesTex, coord.xy()).xy();
+    coord.w = math::dot(e, float2(0.5f, 0.5f));
+  }
+  return coord.zw();
+}
+
+static float2 SMAASearchDiag2(
+    SMAATexture2D(edgesTex), float2 texcoord, float2 dir, int2 size, float2 &e)
+{
+  float4 coord = float4(texcoord, -1.0f, 1.0f);
+  coord.x += 0.25f / size.x;  // See @SearchDiag2Optimization
+  float3 t = float3(1.0f / float2(size), 1.0f);
+  while (coord.z < float(SMAA_MAX_SEARCH_STEPS_DIAG - 1) && coord.w > 0.9f) {
+    float3 increment = mad(t, float3(dir, 1.0f), coord.xyz());
+    coord.x = increment.x;
+    coord.y = increment.y;
+    coord.z = increment.z;
+
+    // @SearchDiag2Optimization
+    // Fetch both edges at once using bilinear filtering:
+    e = SMAASampleLevelZero(edgesTex, coord.xy()).xy();
+    e = SMAADecodeDiagBilinearAccess(e);
+
+    // Non-optimized version:
+    // e.g = SMAASampleLevelZero(edgesTex, coord.xy).g;
+    // e.r = SMAASampleLevelZeroOffset(edgesTex, coord.xy, int2(1, 0), size).r;
+
+    coord.w = math::dot(e, float2(0.5f, 0.5f));
+  }
+  return coord.zw();
+}
+
+/**
+ * Similar to SMAAArea, this calculates the area corresponding to a certain
  * diagonal distance and crossing edges 'e'.
  */
-static void area_diag(int d1, int d2, int e1, int e2, float weights[2])
+static float2 SMAAAreaDiag(SMAATexture2D(areaTex), float2 dist, float2 e, float offset)
 {
-  int x = SMAA_AREATEX_MAX_DISTANCE_DIAG * e1 + d1;
-  int y = SMAA_AREATEX_MAX_DISTANCE_DIAG * e2 + d2;
+  float2 texcoord = mad(
+      float2(SMAA_AREATEX_MAX_DISTANCE_DIAG, SMAA_AREATEX_MAX_DISTANCE_DIAG), e, dist);
 
-  const float *w = areatex_sample_internal(areatex_diag, x, y);
-  copy_v2_v2(weights, w);
+  // We do a scale and bias for mapping to texel space:
+  texcoord = mad(SMAA_AREATEX_PIXEL_SIZE, texcoord, 0.5f * SMAA_AREATEX_PIXEL_SIZE);
+
+  // Diagonal areas are on the second half of the texture:
+  texcoord.x += 0.5f;
+
+  // Move to proper place, according to the subpixel offset:
+  texcoord.y += SMAA_AREATEX_SUBTEX_SIZE * offset;
+
+  // Do it!
+  return SMAA_AREATEX_SELECT(SMAASampleLevelZero(areaTex, texcoord));
 }
 
-/*-----------------------------------------------------------------------------*/
-/* Edge Detection (First Pass) */
-/*-----------------------------------------------------------------------------*/
-
-SMAAEdgeDetectionOperation::SMAAEdgeDetectionOperation()
+/**
+ * This searches for diagonal patterns and returns the corresponding weights.
+ */
+static float2 SMAACalculateDiagWeights(SMAATexture2D(edgesTex),
+                                       SMAATexture2D(areaTex),
+                                       float2 texcoord,
+                                       float2 e,
+                                       float4 subsampleIndices,
+                                       int2 size)
 {
-  this->add_input_socket(DataType::Color); /* image */
-  this->add_input_socket(DataType::Value); /* Depth, material ID, etc. TODO: currently unused. */
-  this->add_output_socket(DataType::Color);
-  flags_.can_be_constant = true;
-  this->set_threshold(CMP_DEFAULT_SMAA_THRESHOLD);
-  this->set_local_contrast_adaptation_factor(CMP_DEFAULT_SMAA_CONTRAST_LIMIT);
+  float2 weights = float2(0.0f, 0.0f);
+
+  // Search for the line ends:
+  float4 d;
+  float2 end;
+  if (e.x > 0.0f) {
+    float2 negative_diagonal = SMAASearchDiag1(
+        SMAATexturePass2D(edgesTex), texcoord, float2(-1.0f, 1.0f), size, end);
+    d.x = negative_diagonal.x;
+    d.z = negative_diagonal.y;
+    d.x += float(end.y > 0.9f);
+  }
+  else {
+    d.x = 0.0f;
+    d.z = 0.0f;
+  }
+  float2 positive_diagonal = SMAASearchDiag1(
+      SMAATexturePass2D(edgesTex), texcoord, float2(1.0, -1.0), size, end);
+  d.y = positive_diagonal.x;
+  d.w = positive_diagonal.y;
+
+  SMAA_BRANCH
+  if (d.x + d.y > 2.0f) {  // d.x + d.y + 1 > 3
+    // Fetch the crossing edges:
+    float4 coords = float4(texcoord, texcoord) +
+                    float4(-d.x + 0.25f, d.x, d.y, -d.y - 0.25f) / float4(size, size);
+    float4 c;
+    float2 left_edge = SMAASampleLevelZeroOffset(edgesTex, coords.xy(), int2(-1, 0), size).xy();
+    float2 right_edge = SMAASampleLevelZeroOffset(edgesTex, coords.zw(), int2(1, 0), size).xy();
+    c.x = left_edge.x;
+    c.y = left_edge.y;
+    c.z = right_edge.x;
+    c.w = right_edge.y;
+    float4 decoded_access = SMAADecodeDiagBilinearAccess(c);
+    c.y = decoded_access.x;
+    c.x = decoded_access.y;
+    c.w = decoded_access.z;
+    c.z = decoded_access.w;
+
+    // Non-optimized version:
+    // float4 coords = mad(float4(-d.x, d.x, d.y, -d.y), SMAA_RT_METRICS.xyxy, texcoord.xyxy);
+    // float4 c;
+    // c.x = SMAASampleLevelZeroOffset(edgesTex, coords.xy, int2(-1,  0), size).g;
+    // c.y = SMAASampleLevelZeroOffset(edgesTex, coords.xy, int2( 0,  0), size).r;
+    // c.z = SMAASampleLevelZeroOffset(edgesTex, coords.zw, int2( 1,  0), size).g;
+    // c.w = SMAASampleLevelZeroOffset(edgesTex, coords.zw, int2( 1, -1), size).r;
+
+    // Merge crossing edges at each side into a single value:
+    float2 cc = mad(float2(2.0f, 2.0f), float2(c.x, c.z), float2(c.y, c.w));
+
+    // Remove the crossing edge if we didn't found the end of the line:
+    SMAAMovc(math::step(0.9f, d.zw()), cc, float2(0.0f, 0.0f));
+
+    // Fetch the areas for this line:
+    weights += SMAAAreaDiag(SMAATexturePass2D(areaTex), d.xy(), cc, subsampleIndices.z);
+  }
+
+  // Search for the line ends:
+  float2 negative_diagonal = SMAASearchDiag2(
+      SMAATexturePass2D(edgesTex), texcoord, float2(-1.0f, -1.0f), size, end);
+  d.x = negative_diagonal.x;
+  d.z = negative_diagonal.y;
+  if (SMAASamplePointOffset(edgesTex, texcoord, int2(1, 0), size).x > 0.0f) {
+    float2 positive_diagonal = SMAASearchDiag2(
+        SMAATexturePass2D(edgesTex), texcoord, float2(1.0f, 1.0f), size, end);
+    d.y = positive_diagonal.x;
+    d.w = positive_diagonal.y;
+    d.y += float(end.y > 0.9f);
+  }
+  else {
+    d.y = 0.0f;
+    d.w = 0.0f;
+  }
+
+  SMAA_BRANCH
+  if (d.x + d.y > 2.0f) {  // d.x + d.y + 1 > 3
+    // Fetch the crossing edges:
+    float4 coords = float4(texcoord, texcoord) + float4(-d.x, -d.x, d.y, d.y) / float4(size, size);
+    float4 c;
+    c.x = SMAASampleLevelZeroOffset(edgesTex, coords.xy(), int2(-1, 0), size).y;
+    c.y = SMAASampleLevelZeroOffset(edgesTex, coords.xy(), int2(0, -1), size).x;
+    float2 left_edge = SMAASampleLevelZeroOffset(edgesTex, coords.zw(), int2(1, 0), size).xy();
+    c.z = left_edge.y;
+    c.w = left_edge.x;
+    float2 cc = mad(float2(2.0f, 2.0f), float2(c.x, c.z), float2(c.y, c.w));
+
+    // Remove the crossing edge if we didn't found the end of the line:
+    SMAAMovc(math::step(0.9f, d.zw()), cc, float2(0.0f, 0.0f));
+
+    // Fetch the areas for this line:
+    float2 area = SMAAAreaDiag(SMAATexturePass2D(areaTex), d.xy(), cc, subsampleIndices.w).xy();
+    weights.x += area.y;
+    weights.y += area.x;
+  }
+
+  return weights;
+}
+#endif
+
+/* ----------------------------------------------------------------------------
+ * Horizontal/Vertical Search Functions */
+
+/**
+ * This allows to determine how much length should we add in the last step
+ * of the searches. It takes the bilinearly interpolated edge (see
+ * @PSEUDO_GATHER4), and adds 0, 1 or 2, depending on which edges and
+ * crossing edges are active.
+ */
+static float SMAASearchLength(SMAATexture2D(searchTex), float2 e, float offset)
+{
+  // The texture is flipped vertically, with left and right cases taking half
+  // of the space horizontally:
+  float2 scale = SMAA_SEARCHTEX_SIZE * float2(0.5f, -1.0f);
+  float2 bias = SMAA_SEARCHTEX_SIZE * float2(offset, 1.0f);
+
+  // Scale and bias to access texel centers:
+  scale += float2(-1.0f, 1.0f);
+  bias += float2(0.5f, -0.5f);
+
+  // Convert from pixel coordinates to texcoords:
+  // (We use SMAA_SEARCHTEX_PACKED_SIZE because the texture is cropped)
+  scale *= 1.0f / SMAA_SEARCHTEX_PACKED_SIZE;
+  bias *= 1.0f / SMAA_SEARCHTEX_PACKED_SIZE;
+
+  // Lookup the search texture:
+  return SMAA_SEARCHTEX_SELECT(SMAASampleLevelZero(searchTex, mad(scale, e, bias)));
 }
 
-void SMAAEdgeDetectionOperation::set_threshold(float threshold)
+/**
+ * Horizontal/vertical search functions for the 2nd pass.
+ */
+static float SMAASearchXLeft(
+    SMAATexture2D(edgesTex), SMAATexture2D(searchTex), float2 texcoord, float end, int2 size)
 {
-  /* UI values are between 0 and 1 for simplicity but algorithm expects values between 0 and 0.5 */
-  threshold_ = scalenorm(0, 0.5, threshold);
+  /**
+   * @PSEUDO_GATHER4
+   * This texcoord has been offset by (-0.25, -0.125) in the vertex shader to
+   * sample between edge, thus fetching four edges in a row.
+   * Sampling with different offsets in each direction allows to disambiguate
+   * which edges are active from the four fetched ones.
+   */
+  float2 e = float2(0.0f, 1.0f);
+  while (texcoord.x > end && e.y > 0.8281f &&  // Is there some edge not activated?
+         e.x == 0.0f)                          // Or is there a crossing edge that breaks the line?
+  {
+    e = SMAASampleLevelZero(edgesTex, texcoord).xy();
+    texcoord = texcoord - float2(2.0f, 0.0f) / float2(size);
+  }
+
+  float offset = mad(
+      -(255.0f / 127.0f), SMAASearchLength(SMAATexturePass2D(searchTex), e, 0.0f), 3.25f);
+  return texcoord.x + offset / size.x;
+
+  // Non-optimized version:
+  // We correct the previous (-0.25, -0.125) offset we applied:
+  // texcoord.x += 0.25 * SMAA_RT_METRICS.x;
+
+  // The searches are bias by 1, so adjust the coords accordingly:
+  // texcoord.x += SMAA_RT_METRICS.x;
+
+  // Disambiguate the length added by the last step:
+  // texcoord.x += 2.0 * SMAA_RT_METRICS.x; // Undo last step
+  // texcoord.x -= SMAA_RT_METRICS.x * (255.0 / 127.0) *
+  // SMAASearchLength(SMAATexturePass2D(searchTex), e, 0.0); return mad(SMAA_RT_METRICS.x, offset,
+  // texcoord.x);
 }
 
-void SMAAEdgeDetectionOperation::set_local_contrast_adaptation_factor(float factor)
+static float SMAASearchXRight(
+    SMAATexture2D(edgesTex), SMAATexture2D(searchTex), float2 texcoord, float end, int2 size)
 {
-  /* UI values are between 0 and 1 for simplicity but algorithm expects values between 1 and 10 */
-  contrast_limit_ = scalenorm(1, 10, factor);
+  float2 e = float2(0.0f, 1.0f);
+  while (texcoord.x < end && e.y > 0.8281f &&  // Is there some edge not activated?
+         e.x == 0.0f)                          // Or is there a crossing edge that breaks the line?
+  {
+    e = SMAASampleLevelZero(edgesTex, texcoord).xy();
+    texcoord = texcoord + float2(2.0f, 0.0f) / float2(size);
+  }
+  float offset = mad(
+      -(255.0f / 127.0f), SMAASearchLength(SMAATexturePass2D(searchTex), e, 0.5f), 3.25f);
+  return texcoord.x - offset / size.x;
 }
 
-void SMAAEdgeDetectionOperation::get_area_of_interest(const int /*input_idx*/,
-                                                      const rcti &output_area,
-                                                      rcti &r_input_area)
+static float SMAASearchYUp(
+    SMAATexture2D(edgesTex), SMAATexture2D(searchTex), float2 texcoord, float end, int2 size)
 {
-  r_input_area.xmax = output_area.xmax + 1;
-  r_input_area.xmin = output_area.xmin - 2;
-  r_input_area.ymax = output_area.ymax + 1;
-  r_input_area.ymin = output_area.ymin - 2;
+  float2 e = float2(1.0f, 0.0f);
+  while (texcoord.y > end && e.x > 0.8281f &&  // Is there some edge not activated?
+         e.y == 0.0f)                          // Or is there a crossing edge that breaks the line?
+  {
+    e = SMAASampleLevelZero(edgesTex, texcoord).xy();
+    texcoord = texcoord - float2(0.0f, 2.0f) / float2(size);
+  }
+  float2 flipped_edge = float2(e.y, e.x);
+  float offset = mad(-(255.0f / 127.0f),
+                     SMAASearchLength(SMAATexturePass2D(searchTex), flipped_edge, 0.0f),
+                     3.25f);
+  return texcoord.y + offset / size.y;
 }
 
-void SMAAEdgeDetectionOperation::update_memory_buffer_partial(MemoryBuffer *output,
-                                                              const rcti &area,
-                                                              Span<MemoryBuffer *> inputs)
+static float SMAASearchYDown(
+    SMAATexture2D(edgesTex), SMAATexture2D(searchTex), float2 texcoord, float end, int2 size)
 {
-  const MemoryBuffer *image = inputs[0];
-  for (BuffersIterator<float> it = output->iterate_with({}, area); !it.is_end(); ++it) {
-    float color[4];
-    const int x = it.x;
-    const int y = it.y;
+  float2 e = float2(1.0f, 0.0f);
+  while (texcoord.y < end && e.x > 0.8281f &&  // Is there some edge not activated?
+         e.y == 0.0f)                          // Or is there a crossing edge that breaks the line?
+  {
+    e = SMAASampleLevelZero(edgesTex, texcoord).xy();
+    texcoord = texcoord + float2(0.0f, 2.0f) / float2(size);
+  }
+  float2 flipped_edge = float2(e.y, e.x);
+  float offset = mad(-(255.0f / 127.0f),
+                     SMAASearchLength(SMAATexturePass2D(searchTex), flipped_edge, 0.5f),
+                     3.25f);
+  return texcoord.y - offset / size.y;
+}
 
-    /* Calculate luma deltas: */
-    image->read_elem_checked(x, y, color);
-    const float L = IMB_colormanagement_get_luminance(color);
-    image->read_elem_checked(x - 1, y, color);
-    const float Lleft = IMB_colormanagement_get_luminance(color);
-    image->read_elem_checked(x, y - 1, color);
-    const float Ltop = IMB_colormanagement_get_luminance(color);
-    const float Dleft = fabsf(L - Lleft);
-    const float Dtop = fabsf(L - Ltop);
+/**
+ * Ok, we have the distance and both crossing edges. So, what are the areas
+ * at each side of current edge?
+ */
+static float2 SMAAArea(SMAATexture2D(areaTex), float2 dist, float e1, float e2, float offset)
+{
+  // Rounding prevents precision errors of bilinear filtering:
+  float2 texcoord = mad(float2(SMAA_AREATEX_MAX_DISTANCE, SMAA_AREATEX_MAX_DISTANCE),
+                        math::round(4.0f * float2(e1, e2)),
+                        dist);
 
-    /* We do the usual threshold: */
-    it.out[0] = (x > 0 && Dleft >= threshold_) ? 1.0f : 0.0f;
-    it.out[1] = (y > 0 && Dtop >= threshold_) ? 1.0f : 0.0f;
-    it.out[2] = 0.0f;
-    it.out[3] = 1.0f;
+  // We do a scale and bias for mapping to texel space:
+  texcoord = mad(SMAA_AREATEX_PIXEL_SIZE, texcoord, 0.5f * SMAA_AREATEX_PIXEL_SIZE);
 
-    /* Then discard if there is no edge: */
-    if (is_zero_v2(it.out)) {
-      continue;
+  // Move to proper place, according to the subpixel offset:
+  texcoord.y = mad(SMAA_AREATEX_SUBTEX_SIZE, offset, texcoord.y);
+
+  // Do it!
+  return SMAA_AREATEX_SELECT(SMAASampleLevelZero(areaTex, texcoord));
+}
+
+/* ----------------------------------------------------------------------------
+ * Corner Detection Functions */
+
+static void SMAADetectHorizontalCornerPattern(SMAATexture2D(edgesTex),
+                                              float2 &weights,
+                                              float4 texcoord,
+                                              float2 d,
+                                              int2 size,
+                                              int corner_rounding)
+{
+#if !defined(SMAA_DISABLE_CORNER_DETECTION)
+  float2 leftRight = math::step(d, float2(d.y, d.x));
+  float2 rounding = (1.0f - corner_rounding / 100.0f) * leftRight;
+
+  rounding /= leftRight.x + leftRight.y;  // Reduce blending for pixels in the center of a line.
+
+  float2 factor = float2(1.0f, 1.0f);
+  factor.x -= rounding.x * SMAASampleLevelZeroOffset(edgesTex, texcoord.xy(), int2(0, 1), size).x;
+  factor.x -= rounding.y * SMAASampleLevelZeroOffset(edgesTex, texcoord.zw(), int2(1, 1), size).x;
+  factor.y -= rounding.x * SMAASampleLevelZeroOffset(edgesTex, texcoord.xy(), int2(0, -2), size).x;
+  factor.y -= rounding.y * SMAASampleLevelZeroOffset(edgesTex, texcoord.zw(), int2(1, -2), size).x;
+
+  weights *= saturate(factor);
+#endif
+}
+
+static void SMAADetectVerticalCornerPattern(SMAATexture2D(edgesTex),
+                                            float2 &weights,
+                                            float4 texcoord,
+                                            float2 d,
+                                            int2 size,
+                                            int corner_rounding)
+{
+#if !defined(SMAA_DISABLE_CORNER_DETECTION)
+  float2 leftRight = math::step(d, float2(d.y, d.x));
+  float2 rounding = (1.0f - corner_rounding / 100.0f) * leftRight;
+
+  rounding /= leftRight.x + leftRight.y;
+
+  float2 factor = float2(1.0f, 1.0f);
+  factor.x -= rounding.x * SMAASampleLevelZeroOffset(edgesTex, texcoord.xy(), int2(1, 0), size).y;
+  factor.x -= rounding.y * SMAASampleLevelZeroOffset(edgesTex, texcoord.zw(), int2(1, 1), size).y;
+  factor.y -= rounding.x * SMAASampleLevelZeroOffset(edgesTex, texcoord.xy(), int2(-2, 0), size).y;
+  factor.y -= rounding.y * SMAASampleLevelZeroOffset(edgesTex, texcoord.zw(), int2(-2, 1), size).y;
+
+  weights *= saturate(factor);
+#endif
+}
+
+/* ----------------------------------------------------------------------------
+ * Blending Weight Calculation Pixel Shader (Second Pass) */
+
+static float4 SMAABlendingWeightCalculationPS(float2 texcoord,
+                                              float2 pixcoord,
+                                              float4 offset[3],
+                                              MemoryBuffer *edgesTex,
+                                              MemoryBuffer *areaTex,
+                                              MemoryBuffer *searchTex,
+                                              float4 subsampleIndices,
+                                              int2 size,
+                                              int corner_rounding)
+{  // Just pass zero for SMAA 1x, see @SUBSAMPLE_INDICES.
+  float4 weights = float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+  float2 e = SMAASamplePoint(edgesTex, texcoord).xy();
+
+  SMAA_BRANCH
+  if (e.y > 0.0f) {  // Edge at north
+#if !defined(SMAA_DISABLE_DIAG_DETECTION)
+    // Diagonals have both north and west edges, so searching for them in
+    // one of the boundaries is enough.
+    float2 diagonal_weights = SMAACalculateDiagWeights(SMAATexturePass2D(edgesTex),
+                                                       SMAATexturePass2D(areaTex),
+                                                       texcoord,
+                                                       e,
+                                                       subsampleIndices,
+                                                       size);
+
+    weights.x = diagonal_weights.x;
+    weights.y = diagonal_weights.y;
+
+    // We give priority to diagonals, so if we find a diagonal we skip
+    // horizontal/vertical processing.
+    SMAA_BRANCH
+    if (weights.x == -weights.y) {  // weights.x + weights.y == 0.0
+#endif
+
+      float2 d;
+
+      // Find the distance to the left:
+      float3 coords;
+      coords.x = SMAASearchXLeft(SMAATexturePass2D(edgesTex),
+                                 SMAATexturePass2D(searchTex),
+                                 offset[0].xy(),
+                                 offset[2].x,
+                                 size);
+      coords.y =
+          offset[1].y;  // offset[1].y = texcoord.y - 0.25 * SMAA_RT_METRICS.y (@CROSSING_OFFSET)
+      d.x = coords.x;
+
+      // Now fetch the left crossing edges, two at a time using bilinear
+      // filtering. Sampling at -0.25 (see @CROSSING_OFFSET) enables to
+      // discern what value each edge has:
+      float e1 = SMAASampleLevelZero(edgesTex, coords.xy()).x;
+
+      // Find the distance to the right:
+      coords.z = SMAASearchXRight(SMAATexturePass2D(edgesTex),
+                                  SMAATexturePass2D(searchTex),
+                                  offset[0].zw(),
+                                  offset[2].y,
+                                  size);
+      d.y = coords.z;
+
+      // We want the distances to be in pixel units (doing this here allows
+      // better interleaving of arithmetic and memory accesses):
+      d = math::abs(math::round(mad(float2(size.x), d, -float2(pixcoord.x))));
+
+      // SMAAArea below needs a sqrt, as the areas texture is compressed
+      // quadratically:
+      float2 sqrt_d = math::sqrt(d);
+
+      // Fetch the right crossing edges:
+      float e2 =
+          SMAASampleLevelZeroOffset(edgesTex, float2(coords.z, coords.y), int2(1, 0), size).x;
+
+      // Ok, we know how this pattern looks like, now it is time for getting
+      // the actual area:
+      float2 area = SMAAArea(SMAATexturePass2D(areaTex), sqrt_d, e1, e2, subsampleIndices.y);
+      weights.x = area.x;
+      weights.y = area.y;
+
+      // Fix corners:
+      coords.y = texcoord.y;
+
+      float2 corner_weight = weights.xy();
+      SMAADetectHorizontalCornerPattern(SMAATexturePass2D(edgesTex),
+                                        corner_weight,
+                                        float4(coords.xy(), coords.z, coords.y),
+                                        d,
+                                        size,
+                                        corner_rounding);
+      weights.x = corner_weight.x;
+      weights.y = corner_weight.y;
+
+#if !defined(SMAA_DISABLE_DIAG_DETECTION)
     }
+    else
+      e.x = 0.0f;  // Skip vertical processing.
+#endif
+  }
 
-    /* Calculate right and bottom deltas: */
-    image->read_elem_checked(x + 1, y, color);
-    const float Lright = IMB_colormanagement_get_luminance(color);
-    image->read_elem_checked(x, y + 1, color);
-    const float Lbottom = IMB_colormanagement_get_luminance(color);
-    const float Dright = fabsf(L - Lright);
-    const float Dbottom = fabsf(L - Lbottom);
+  SMAA_BRANCH
+  if (e.x > 0.0f) {  // Edge at west
+    float2 d;
 
-    /* Calculate the maximum delta in the direct neighborhood: */
-    float max_delta = fmaxf(fmaxf(Dleft, Dright), fmaxf(Dtop, Dbottom));
+    // Find the distance to the top:
+    float3 coords;
+    coords.y = SMAASearchYUp(SMAATexturePass2D(edgesTex),
+                             SMAATexturePass2D(searchTex),
+                             offset[1].xy(),
+                             offset[2].z,
+                             size);
+    coords.x = offset[0].x;  // offset[1].x = texcoord.x - 0.25 * SMAA_RT_METRICS.x;
+    d.x = coords.y;
 
-    /* Calculate luma used for both left and top edges: */
-    image->read_elem_checked(x - 1, y - 1, color);
-    const float Llefttop = IMB_colormanagement_get_luminance(color);
+    // Fetch the top crossing edges:
+    float e1 = SMAASampleLevelZero(edgesTex, coords.xy()).y;
 
-    /* Left edge */
-    if (it.out[0] != 0.0f) {
-      /* Calculate deltas around the left pixel: */
-      image->read_elem_checked(x - 2, y, color);
-      const float Lleftleft = IMB_colormanagement_get_luminance(color);
-      image->read_elem_checked(x - 1, y + 1, color);
-      const float Lleftbottom = IMB_colormanagement_get_luminance(color);
-      const float Dleftleft = fabsf(Lleft - Lleftleft);
-      const float Dlefttop = fabsf(Lleft - Llefttop);
-      const float Dleftbottom = fabsf(Lleft - Lleftbottom);
+    // Find the distance to the bottom:
+    coords.z = SMAASearchYDown(SMAATexturePass2D(edgesTex),
+                               SMAATexturePass2D(searchTex),
+                               offset[1].zw(),
+                               offset[2].w,
+                               size);
+    d.y = coords.z;
 
-      /* Calculate the final maximum delta: */
-      max_delta = fmaxf(max_delta, fmaxf(Dleftleft, fmaxf(Dlefttop, Dleftbottom)));
+    // We want the distances to be in pixel units:
+    d = math::abs(math::round(mad(float2(size.y), d, -float2(pixcoord.y))));
 
-      /* Local contrast adaptation: */
-      if (max_delta > contrast_limit_ * Dleft) {
-        it.out[0] = 0.0f;
-      }
-    }
+    // SMAAArea below needs a sqrt, as the areas texture is compressed
+    // quadratically:
+    float2 sqrt_d = math::sqrt(d);
 
-    /* Top edge */
-    if (it.out[1] != 0.0f) {
-      /* Calculate top-top delta: */
-      image->read_elem_checked(x, y - 2, color);
-      const float Ltoptop = IMB_colormanagement_get_luminance(color);
-      image->read_elem_checked(x + 1, y - 1, color);
-      const float Ltopright = IMB_colormanagement_get_luminance(color);
-      const float Dtoptop = fabsf(Ltop - Ltoptop);
-      const float Dtopleft = fabsf(Ltop - Llefttop);
-      const float Dtopright = fabsf(Ltop - Ltopright);
+    // Fetch the bottom crossing edges:
+    float e2 = SMAASampleLevelZeroOffset(edgesTex, float2(coords.x, coords.z), int2(0, 1), size).y;
 
-      /* Calculate the final maximum delta: */
-      max_delta = fmaxf(max_delta, fmaxf(Dtoptop, fmaxf(Dtopleft, Dtopright)));
+    // Get the area for this direction:
+    float2 area = SMAAArea(SMAATexturePass2D(areaTex), sqrt_d, e1, e2, subsampleIndices.x);
+    weights.z = area.x;
+    weights.w = area.y;
 
-      /* Local contrast adaptation: */
-      if (max_delta > contrast_limit_ * Dtop) {
-        it.out[1] = 0.0f;
-      }
-    }
+    // Fix corners:
+    coords.x = texcoord.x;
+
+    float2 corner_weight = weights.zw();
+    SMAADetectVerticalCornerPattern(SMAATexturePass2D(edgesTex),
+                                    corner_weight,
+                                    float4(coords.xy(), coords.x, coords.z),
+                                    d,
+                                    size,
+                                    corner_rounding);
+    weights.z = corner_weight.x;
+    weights.w = corner_weight.y;
+  }
+
+  return weights;
+}
+
+/* ----------------------------------------------------------------------------
+ * Neighborhood Blending Pixel Shader (Third Pass) */
+
+static float4 SMAANeighborhoodBlendingPS(float2 texcoord,
+                                         float4 offset,
+                                         SMAATexture2D(colorTex),
+                                         SMAATexture2D(blendTex),
+#if SMAA_REPROJECTION
+                                         SMAATexture2D(velocityTex),
+#endif
+                                         int2 size)
+{
+  // Fetch the blending weights for current pixel:
+  float4 a;
+  a.x = SMAASample(blendTex, offset.xy()).w;  // Right
+  a.y = SMAASample(blendTex, offset.zw()).y;  // Top
+  a.z = SMAASample(blendTex, texcoord).z;     // Left
+  a.w = SMAASample(blendTex, texcoord).x;     // Bottom
+
+  // Is there any blending weight with a value greater than 0.0?
+  SMAA_BRANCH
+  if (math::dot(a, float4(1.0f, 1.0f, 1.0f, 1.0f)) < 1e-5f) {
+    float4 color = SMAASampleLevelZero(colorTex, texcoord);
+
+#if SMAA_REPROJECTION
+    float2 velocity = SMAA_DECODE_VELOCITY(SMAASampleLevelZero(velocityTex, texcoord));
+
+    // Pack velocity into the alpha channel:
+    color.a = math::sqrt(5.0f * math::length(velocity));
+#endif
+
+    return color;
+  }
+  else {
+    bool h = math::max(a.x, a.z) > math::max(a.y, a.w);  // max(horizontal) > max(vertical)
+
+    // Calculate the blending offsets:
+    float4 blendingOffset = float4(0.0f, a.y, 0.0f, a.w);
+    float2 blendingWeight = float2(a.y, a.w);
+    SMAAMovc(float4(h), blendingOffset, float4(a.x, 0.0f, a.z, 0.0f));
+    SMAAMovc(float2(h), blendingWeight, float2(a.x, a.z));
+    blendingWeight /= math::dot(blendingWeight, float2(1.0f, 1.0f));
+
+    // Calculate the texture coordinates:
+    float4 blendingCoord = float4(texcoord, texcoord) + blendingOffset / float4(size, -size);
+
+    // We exploit bilinear filtering to mix current pixel with the chosen
+    // neighbor:
+    float4 color = blendingWeight.x * SMAASampleLevelZero(colorTex, blendingCoord.xy());
+    color += blendingWeight.y * SMAASampleLevelZero(colorTex, blendingCoord.zw());
+
+#if SMAA_REPROJECTION
+    // Antialias velocity for proper reprojection in a later stage:
+    float2 velocity = blendingWeight.x *
+                      SMAA_DECODE_VELOCITY(SMAASampleLevelZero(velocityTex, blendingCoord.xy()));
+    velocity += blendingWeight.y *
+                SMAA_DECODE_VELOCITY(SMAASampleLevelZero(velocityTex, blendingCoord.zw()));
+
+    // Pack velocity into the alpha channel:
+    color.a = math::sqrt(5.0f * math::length(velocity));
+#endif
+
+    return color;
   }
 }
 
-/*-----------------------------------------------------------------------------*/
-/* Blending Weight Calculation (Second Pass) */
-/*-----------------------------------------------------------------------------*/
-
-SMAABlendingWeightCalculationOperation::SMAABlendingWeightCalculationOperation()
+SMAAOperation::SMAAOperation()
 {
-  this->add_input_socket(DataType::Color); /* edges */
+  this->add_input_socket(DataType::Color);
   this->add_output_socket(DataType::Color);
   flags_.can_be_constant = true;
-  this->set_corner_rounding(CMP_DEFAULT_SMAA_CORNER_ROUNDING);
 }
 
-void SMAABlendingWeightCalculationOperation::set_corner_rounding(float rounding)
-{
-  /* UI values are between 0 and 1 for simplicity but algorithm expects values between 0 and 100 */
-  corner_rounding_ = int(scalenorm(0, 100, rounding));
-}
-
-void SMAABlendingWeightCalculationOperation::update_memory_buffer_started(
-    MemoryBuffer * /*output*/, const rcti & /*out_area*/, Span<MemoryBuffer *> inputs)
-{
-  const MemoryBuffer *image = inputs[0];
-  sample_image_fn_ = [=](int x, int y, float *out) { image->read_elem_checked(x, y, out); };
-}
-
-void SMAABlendingWeightCalculationOperation::update_memory_buffer_partial(
-    MemoryBuffer *output, const rcti &out_area, Span<MemoryBuffer *> /*inputs*/)
-{
-  for (BuffersIterator<float> it = output->iterate_with({}, out_area); !it.is_end(); ++it) {
-    const int x = it.x;
-    const int y = it.y;
-    zero_v4(it.out);
-
-    float edges[4];
-    sample_image_fn_(x, y, edges);
-
-    /* Edge at north */
-    float c[4];
-    if (edges[1] > 0.0f) {
-      /* Diagonals have both north and west edges, so calculating weights for them */
-      /* in one of the boundaries is enough. */
-      calculate_diag_weights(x, y, edges, it.out);
-
-      /* We give priority to diagonals, so if we find a diagonal we skip. */
-      /* horizontal/vertical processing. */
-      if (!is_zero_v2(it.out)) {
-        continue;
-      }
-
-      /* Find the distance to the left and the right: */
-      int left = search_xleft(x, y);
-      int right = search_xright(x, y);
-      int d1 = x - left, d2 = right - x;
-
-      /* Fetch the left and right crossing edges: */
-      int e1 = 0, e2 = 0;
-      sample_image_fn_(left, y - 1, c);
-      if (c[0] > 0.0) {
-        e1 += 1;
-      }
-      sample_image_fn_(left, y, c);
-      if (c[0] > 0.0) {
-        e1 += 2;
-      }
-      sample_image_fn_(right + 1, y - 1, c);
-      if (c[0] > 0.0) {
-        e2 += 1;
-      }
-      sample_image_fn_(right + 1, y, c);
-      if (c[0] > 0.0) {
-        e2 += 2;
-      }
-
-      /* Ok, we know how this pattern looks like, now it is time for getting */
-      /* the actual area: */
-      area(d1, d2, e1, e2, it.out); /* R, G */
-
-      /* Fix corners: */
-      if (corner_rounding_) {
-        detect_horizontal_corner_pattern(it.out, left, right, y, d1, d2);
-      }
-    }
-
-    /* Edge at west */
-    if (edges[0] > 0.0f) {
-      /* Did we already do diagonal search for this west edge from the left neighboring pixel? */
-      if (is_vertical_search_unneeded(x, y)) {
-        continue;
-      }
-
-      /* Find the distance to the top and the bottom: */
-      int top = search_yup(x, y);
-      int bottom = search_ydown(x, y);
-      int d1 = y - top, d2 = bottom - y;
-
-      /* Fetch the top and bottom crossing edges: */
-      int e1 = 0, e2 = 0;
-      sample_image_fn_(x - 1, top, c);
-      if (c[1] > 0.0) {
-        e1 += 1;
-      }
-      sample_image_fn_(x, top, c);
-      if (c[1] > 0.0) {
-        e1 += 2;
-      }
-      sample_image_fn_(x - 1, bottom + 1, c);
-      if (c[1] > 0.0) {
-        e2 += 1;
-      }
-      sample_image_fn_(x, bottom + 1, c);
-      if (c[1] > 0.0) {
-        e2 += 2;
-      }
-
-      /* Get the area for this direction: */
-      area(d1, d2, e1, e2, it.out + 2); /* B, A */
-
-      /* Fix corners: */
-      if (corner_rounding_) {
-        detect_vertical_corner_pattern(it.out + 2, x, top, bottom, d1, d2);
-      }
-    }
-  }
-}
-
-void SMAABlendingWeightCalculationOperation::get_area_of_interest(const int /*input_idx*/,
-                                                                  const rcti &output_area,
-                                                                  rcti &r_input_area)
+void SMAAOperation::get_area_of_interest(const int /*input_idx*/,
+                                         const rcti &output_area,
+                                         rcti &r_input_area)
 {
   r_input_area.xmax = output_area.xmax +
-                      fmax(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG + 1);
-  r_input_area.xmin = output_area.xmin -
-                      fmax(fmax(SMAA_MAX_SEARCH_STEPS - 1, 1), SMAA_MAX_SEARCH_STEPS_DIAG + 1);
-  r_input_area.ymax = output_area.ymax + fmax(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG);
-  r_input_area.ymin = output_area.ymin -
-                      fmax(fmax(SMAA_MAX_SEARCH_STEPS - 1, 1), SMAA_MAX_SEARCH_STEPS_DIAG);
+                      math::max(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG + 1);
+  r_input_area.xmin = output_area.xmin - math::max(math::max(SMAA_MAX_SEARCH_STEPS - 1, 1),
+                                                   SMAA_MAX_SEARCH_STEPS_DIAG + 1);
+  r_input_area.ymax = output_area.ymax +
+                      math::max(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG);
+  r_input_area.ymin = output_area.ymin - math::max(math::max(SMAA_MAX_SEARCH_STEPS - 1, 1),
+                                                   SMAA_MAX_SEARCH_STEPS_DIAG);
 }
 
-/*-----------------------------------------------------------------------------*/
-/* Diagonal Search Functions */
-
-int SMAABlendingWeightCalculationOperation::search_diag1(int x, int y, int dir, bool *r_found)
+void SMAAOperation::update_memory_buffer(MemoryBuffer *output,
+                                         const rcti & /*area*/,
+                                         Span<MemoryBuffer *> inputs)
 {
-  float e[4];
-  int end = x + SMAA_MAX_SEARCH_STEPS_DIAG * dir;
-  *r_found = false;
-
-  while (x != end) {
-    x += dir;
-    y -= dir;
-    sample_image_fn_(x, y, e);
-    if (e[1] == 0.0f) {
-      *r_found = true;
-      break;
-    }
-    if (e[0] == 0.0f) {
-      *r_found = true;
-      return (dir < 0) ? x : x - dir;
-    }
-  }
-
-  return x - dir;
-}
-
-int SMAABlendingWeightCalculationOperation::search_diag2(int x, int y, int dir, bool *r_found)
-{
-  float e[4];
-  int end = x + SMAA_MAX_SEARCH_STEPS_DIAG * dir;
-  *r_found = false;
-
-  while (x != end) {
-    x += dir;
-    y += dir;
-    sample_image_fn_(x, y, e);
-    if (e[1] == 0.0f) {
-      *r_found = true;
-      break;
-    }
-    sample_image_fn_(x + 1, y, e);
-    if (e[0] == 0.0f) {
-      *r_found = true;
-      return (dir > 0) ? x : x - dir;
-    }
-  }
-
-  return x - dir;
-}
-
-void SMAABlendingWeightCalculationOperation::calculate_diag_weights(int x,
-                                                                    int y,
-                                                                    const float edges[2],
-                                                                    float weights[2])
-{
-  int d1, d2;
-  bool d1_found, d2_found;
-  float e[4], c[4];
-
-  zero_v2(weights);
-
-  if (SMAA_MAX_SEARCH_STEPS_DIAG <= 0) {
+  const MemoryBuffer *image = inputs[0];
+  if (image->is_a_single_elem()) {
+    copy_v4_v4(output->get_elem(0, 0), image->get_elem(0, 0));
     return;
   }
 
-  /* Search for the line ends: */
-  if (edges[0] > 0.0f) {
-    d1 = x - search_diag1(x, y, -1, &d1_found);
-  }
-  else {
-    d1 = 0;
-    d1_found = true;
-  }
-  d2 = search_diag1(x, y, 1, &d2_found) - x;
+  const int2 size = int2(image->get_width(), image->get_height());
+  MemoryBuffer edges(DataType::Float2, size.x, size.y);
 
-  if (d1 + d2 > 2) { /* d1 + d2 + 1 > 3 */
-    int e1 = 0, e2 = 0;
+  float3 luminance_coefficients;
+  IMB_colormanagement_get_luminance_coefficients(luminance_coefficients);
 
-    if (d1_found) {
-      /* Fetch the crossing edges: */
-      int left = x - d1, bottom = y + d1;
+  threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) {
+    for (const int64_t y : sub_y_range) {
+      for (const int64_t x : IndexRange(size.x)) {
+        int2 texel = int2(x, y);
+        float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size);
 
-      sample_image_fn_(left - 1, bottom, c);
-      if (c[1] > 0.0) {
-        e1 += 2;
-      }
-      sample_image_fn_(left, bottom, c);
-      if (c[0] > 0.0) {
-        e1 += 1;
+        float4 offset[3];
+        SMAAEdgeDetectionVS(coordinates, size, offset);
+
+        float2 edge = SMAALumaEdgeDetectionPS(coordinates,
+                                              offset,
+                                              image,
+                                              threshold_,
+                                              luminance_coefficients,
+                                              local_contrast_adaptation_factor_);
+        copy_v2_v2(edges.get_elem(texel.x, texel.y), edge);
       }
     }
+  });
 
-    if (d2_found) {
-      /* Fetch the crossing edges: */
-      int right = x + d2, top = y - d2;
+  MemoryBuffer blending_weights(DataType::Color, size.x, size.y);
 
-      sample_image_fn_(right + 1, top, c);
-      if (c[1] > 0.0) {
-        e2 += 2;
-      }
-      sample_image_fn_(right + 1, top - 1, c);
-      if (c[0] > 0.0) {
-        e2 += 1;
+  MemoryBuffer area_texture(DataType::Float2, AREATEX_WIDTH, AREATEX_HEIGHT);
+  area_texture.copy_from(areaTexBytes, area_texture.get_rect());
+
+  MemoryBuffer search_texture(DataType::Value, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT);
+  search_texture.copy_from(searchTexBytes, search_texture.get_rect());
+
+  threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) {
+    for (const int64_t y : sub_y_range) {
+      for (const int64_t x : IndexRange(size.x)) {
+        int2 texel = int2(x, y);
+        float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size);
+
+        float4 offset[3];
+        float2 pixel_coordinates;
+        SMAABlendingWeightCalculationVS(coordinates, size, pixel_coordinates, offset);
+
+        float4 weights = SMAABlendingWeightCalculationPS(coordinates,
+                                                         pixel_coordinates,
+                                                         offset,
+                                                         &edges,
+                                                         &area_texture,
+                                                         &search_texture,
+                                                         float4(0.0f),
+                                                         size,
+                                                         corner_rounding_);
+        copy_v4_v4(blending_weights.get_elem(texel.x, texel.y), weights);
       }
     }
+  });
 
-    /* Fetch the areas for this line: */
-    area_diag(d1, d2, e1, e2, weights);
-  }
+  threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) {
+    for (const int64_t y : sub_y_range) {
+      for (const int64_t x : IndexRange(size.x)) {
+        int2 texel = int2(x, y);
+        float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size);
 
-  /* Search for the line ends: */
-  d1 = x - search_diag2(x, y, -1, &d1_found);
-  sample_image_fn_(x + 1, y, e);
-  if (e[0] > 0.0f) {
-    d2 = search_diag2(x, y, 1, &d2_found) - x;
-  }
-  else {
-    d2 = 0;
-    d2_found = true;
-  }
+        float4 offset;
+        SMAANeighborhoodBlendingVS(coordinates, size, offset);
 
-  if (d1 + d2 > 2) { /* d1 + d2 + 1 > 3 */
-    int e1 = 0, e2 = 0;
-
-    if (d1_found) {
-      /* Fetch the crossing edges: */
-      int left = x - d1, top = y - d1;
-
-      sample_image_fn_(left - 1, top, c);
-      if (c[1] > 0.0) {
-        e1 += 2;
-      }
-      sample_image_fn_(left, top - 1, c);
-      if (c[0] > 0.0) {
-        e1 += 1;
+        float4 result = SMAANeighborhoodBlendingPS(
+            coordinates, offset, image, &blending_weights, size);
+        copy_v4_v4(output->get_elem(texel.x, texel.y), result);
       }
     }
-
-    if (d2_found) {
-      /* Fetch the crossing edges: */
-      int right = x + d2, bottom = y + d2;
-
-      sample_image_fn_(right + 1, bottom, c);
-      if (c[1] > 0.0) {
-        e2 += 2;
-      }
-      if (c[0] > 0.0) {
-        e2 += 1;
-      }
-    }
-
-    /* Fetch the areas for this line: */
-    float w[2];
-    area_diag(d1, d2, e1, e2, w);
-    weights[0] += w[1];
-    weights[1] += w[0];
-  }
-}
-
-bool SMAABlendingWeightCalculationOperation::is_vertical_search_unneeded(int x, int y)
-{
-  int d1, d2;
-  bool found;
-  float e[4];
-
-  if (SMAA_MAX_SEARCH_STEPS_DIAG <= 0) {
-    return false;
-  }
-
-  /* Search for the line ends: */
-  sample_image_fn_(x - 1, y, e);
-  if (e[1] > 0.0f) {
-    d1 = x - search_diag2(x - 1, y, -1, &found);
-  }
-  else {
-    d1 = 0;
-  }
-  d2 = search_diag2(x - 1, y, 1, &found) - x;
-
-  return (d1 + d2 > 2); /* d1 + d2 + 1 > 3 */
-}
-
-/*-----------------------------------------------------------------------------*/
-/* Horizontal/Vertical Search Functions */
-
-int SMAABlendingWeightCalculationOperation::search_xleft(int x, int y)
-{
-  int end = x - SMAA_MAX_SEARCH_STEPS;
-  float e[4];
-
-  while (x > end) {
-    sample_image_fn_(x, y, e);
-    if (e[1] == 0.0f) { /* Is the edge not activated? */
-      break;
-    }
-    if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
-      return x;
-    }
-    sample_image_fn_(x, y - 1, e);
-    if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
-      return x;
-    }
-    x--;
-  }
-
-  return x + 1;
-}
-
-int SMAABlendingWeightCalculationOperation::search_xright(int x, int y)
-{
-  int end = x + SMAA_MAX_SEARCH_STEPS;
-  float e[4];
-
-  while (x < end) {
-    x++;
-    sample_image_fn_(x, y, e);
-    if (e[1] == 0.0f || /* Is the edge not activated? */
-        e[0] != 0.0f)   /* Or is there a crossing edge that breaks the line? */
-    {
-      break;
-    }
-    sample_image_fn_(x, y - 1, e);
-    if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
-      break;
-    }
-  }
-
-  return x - 1;
-}
-
-int SMAABlendingWeightCalculationOperation::search_yup(int x, int y)
-{
-  int end = y - SMAA_MAX_SEARCH_STEPS;
-  float e[4];
-
-  while (y > end) {
-    sample_image_fn_(x, y, e);
-    if (e[0] == 0.0f) { /* Is the edge not activated? */
-      break;
-    }
-    if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
-      return y;
-    }
-    sample_image_fn_(x - 1, y, e);
-    if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
-      return y;
-    }
-    y--;
-  }
-
-  return y + 1;
-}
-
-int SMAABlendingWeightCalculationOperation::search_ydown(int x, int y)
-{
-  int end = y + SMAA_MAX_SEARCH_STEPS;
-  float e[4];
-
-  while (y < end) {
-    y++;
-    sample_image_fn_(x, y, e);
-    if (e[0] == 0.0f || /* Is the edge not activated? */
-        e[1] != 0.0f)   /* Or is there a crossing edge that breaks the line? */
-    {
-      break;
-    }
-    sample_image_fn_(x - 1, y, e);
-    if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */
-      break;
-    }
-  }
-
-  return y - 1;
-}
-
-/*-----------------------------------------------------------------------------*/
-/* Corner Detection Functions */
-
-void SMAABlendingWeightCalculationOperation::detect_horizontal_corner_pattern(
-    float weights[2], int left, int right, int y, int d1, int d2)
-{
-  float factor[2] = {1.0f, 1.0f};
-  float rounding = corner_rounding_ / 100.0f;
-  float e[4];
-
-  /* Reduce blending for pixels in the center of a line. */
-  rounding *= (d1 == d2) ? 0.5f : 1.0f;
-
-  /* Near the left corner */
-  if (d1 <= d2) {
-    sample_image_fn_(left, y + 1, e);
-    factor[0] -= rounding * e[0];
-    sample_image_fn_(left, y - 2, e);
-    factor[1] -= rounding * e[0];
-  }
-  /* Near the right corner */
-  if (d1 >= d2) {
-    sample_image_fn_(right + 1, y + 1, e);
-    factor[0] -= rounding * e[0];
-    sample_image_fn_(right + 1, y - 2, e);
-    factor[1] -= rounding * e[0];
-  }
-
-  weights[0] *= std::clamp(factor[0], 0.0f, 1.0f);
-  weights[1] *= std::clamp(factor[1], 0.0f, 1.0f);
-}
-
-void SMAABlendingWeightCalculationOperation::detect_vertical_corner_pattern(
-    float weights[2], int x, int top, int bottom, int d1, int d2)
-{
-  float factor[2] = {1.0f, 1.0f};
-  float rounding = corner_rounding_ / 100.0f;
-  float e[4];
-
-  /* Reduce blending for pixels in the center of a line. */
-  rounding *= (d1 == d2) ? 0.5f : 1.0f;
-
-  /* Near the top corner */
-  if (d1 <= d2) {
-    sample_image_fn_(x + 1, top, e);
-    factor[0] -= rounding * e[1];
-    sample_image_fn_(x - 2, top, e);
-    factor[1] -= rounding * e[1];
-  }
-  /* Near the bottom corner */
-  if (d1 >= d2) {
-    sample_image_fn_(x + 1, bottom + 1, e);
-    factor[0] -= rounding * e[1];
-    sample_image_fn_(x - 2, bottom + 1, e);
-    factor[1] -= rounding * e[1];
-  }
-
-  weights[0] *= std::clamp(factor[0], 0.0f, 1.0f);
-  weights[1] *= std::clamp(factor[1], 0.0f, 1.0f);
-}
-
-/*-----------------------------------------------------------------------------*/
-/* Neighborhood Blending (Third Pass) */
-/*-----------------------------------------------------------------------------*/
-
-SMAANeighborhoodBlendingOperation::SMAANeighborhoodBlendingOperation()
-{
-  this->add_input_socket(DataType::Color); /* image */
-  this->add_input_socket(DataType::Color); /* blend */
-  this->add_output_socket(DataType::Color);
-  flags_.can_be_constant = true;
-}
-
-void SMAANeighborhoodBlendingOperation::update_memory_buffer_partial(MemoryBuffer *output,
-                                                                     const rcti &out_area,
-                                                                     Span<MemoryBuffer *> inputs)
-{
-  MemoryBuffer *image1 = inputs[0];
-  MemoryBuffer *image2 = inputs[1];
-  for (BuffersIterator<float> it = output->iterate_with({}, out_area); !it.is_end(); ++it) {
-    const float x = it.x;
-    const float y = it.y;
-    float w[4];
-
-    /* Fetch the blending weights for current pixel: */
-    image2->read_elem_checked(x, y, w);
-    const float left = w[2], top = w[0];
-    image2->read_elem_checked(x + 1, y, w);
-    const float right = w[3];
-    image2->read_elem_checked(x, y + 1, w);
-    const float bottom = w[1];
-
-    /* Is there any blending weight with a value greater than 0.0? */
-    if (right + bottom + left + top < 1e-5f) {
-      image1->read_elem_checked(x, y, it.out);
-      continue;
-    }
-
-    /* Calculate the blending offsets: */
-    void (*sample_fn)(MemoryBuffer *reader, int x, int y, float xoffset, float color[4]);
-    float offset1, offset2, weight1, weight2, color1[4], color2[4];
-
-    if (fmaxf(right, left) > fmaxf(bottom, top)) { /* `max(horizontal) > max(vertical)` */
-      sample_fn = sample_bilinear_horizontal;
-      offset1 = right;
-      offset2 = -left;
-      weight1 = right / (right + left);
-      weight2 = left / (right + left);
-    }
-    else {
-      sample_fn = sample_bilinear_vertical;
-      offset1 = bottom;
-      offset2 = -top;
-      weight1 = bottom / (bottom + top);
-      weight2 = top / (bottom + top);
-    }
-
-    /* We exploit bilinear filtering to mix current pixel with the chosen neighbor: */
-    sample_fn(image1, x, y, offset1, color1);
-    sample_fn(image1, x, y, offset2, color2);
-
-    mul_v4_v4fl(it.out, color1, weight1);
-    madd_v4_v4fl(it.out, color2, weight2);
-  }
-}
-
-void SMAANeighborhoodBlendingOperation::get_area_of_interest(const int /*input_idx*/,
-                                                             const rcti &output_area,
-                                                             rcti &r_input_area)
-{
-  r_input_area = output_area;
-  expand_area_for_sampler(r_input_area, PixelSampler::Bilinear);
+  });
 }
 
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_SMAAOperation.h b/source/blender/compositor/operations/COM_SMAAOperation.h
index 845611705e2..3f379e30716 100644
--- a/source/blender/compositor/operations/COM_SMAAOperation.h
+++ b/source/blender/compositor/operations/COM_SMAAOperation.h
@@ -4,89 +4,38 @@
 
 #pragma once
 
-#include "COM_MultiThreadedOperation.h"
+#include "COM_NodeOperation.h"
 
 namespace blender::compositor {
 
-/*-----------------------------------------------------------------------------*/
-/* Edge Detection (First Pass) */
-
-class SMAAEdgeDetectionOperation : public MultiThreadedOperation {
+class SMAAOperation : public NodeOperation {
  protected:
-  float threshold_;
-  float contrast_limit_;
+  float threshold_ = 0.1f;
+  float local_contrast_adaptation_factor_ = 2.0f;
+  int corner_rounding_ = 25;
 
  public:
-  SMAAEdgeDetectionOperation();
+  SMAAOperation();
 
-  void set_threshold(float threshold);
+  void set_threshold(float threshold)
+  {
+    threshold_ = threshold;
+  }
 
-  void set_local_contrast_adaptation_factor(float factor);
+  void set_local_contrast_adaptation_factor(float factor)
+  {
+    local_contrast_adaptation_factor_ = factor;
+  }
+
+  void set_corner_rounding(int corner_rounding)
+  {
+    corner_rounding_ = corner_rounding;
+  }
 
   void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
-  void update_memory_buffer_partial(MemoryBuffer *output,
-                                    const rcti &area,
-                                    Span<MemoryBuffer *> inputs) override;
-};
-
-/*-----------------------------------------------------------------------------*/
-/*  Blending Weight Calculation (Second Pass) */
-
-class SMAABlendingWeightCalculationOperation : public MultiThreadedOperation {
- private:
-  std::function<void(int x, int y, float *out)> sample_image_fn_;
-  int corner_rounding_;
-
- public:
-  SMAABlendingWeightCalculationOperation();
-
-  void set_corner_rounding(float rounding);
-
-  void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
-  void update_memory_buffer_started(MemoryBuffer *output,
-                                    const rcti &area,
-                                    Span<MemoryBuffer *> inputs) override;
-  void update_memory_buffer_partial(MemoryBuffer *output,
-                                    const rcti &area,
-                                    Span<MemoryBuffer *> inputs) override;
-
- private:
-  /* Diagonal Search Functions */
-  /**
-   * These functions allows to perform diagonal pattern searches.
-   */
-  int search_diag1(int x, int y, int dir, bool *r_found);
-  int search_diag2(int x, int y, int dir, bool *r_found);
-  /**
-   * This searches for diagonal patterns and returns the corresponding weights.
-   */
-  void calculate_diag_weights(int x, int y, const float edges[2], float weights[2]);
-  bool is_vertical_search_unneeded(int x, int y);
-
-  /* Horizontal/Vertical Search Functions */
-  int search_xleft(int x, int y);
-  int search_xright(int x, int y);
-  int search_yup(int x, int y);
-  int search_ydown(int x, int y);
-
-  /*  Corner Detection Functions */
-  void detect_horizontal_corner_pattern(
-      float weights[2], int left, int right, int y, int d1, int d2);
-  void detect_vertical_corner_pattern(
-      float weights[2], int x, int top, int bottom, int d1, int d2);
-};
-
-/*-----------------------------------------------------------------------------*/
-/* Neighborhood Blending (Third Pass) */
-
-class SMAANeighborhoodBlendingOperation : public MultiThreadedOperation {
- public:
-  SMAANeighborhoodBlendingOperation();
-
-  void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
-  void update_memory_buffer_partial(MemoryBuffer *output,
-                                    const rcti &area,
-                                    Span<MemoryBuffer *> inputs) override;
+  void update_memory_buffer(MemoryBuffer *output,
+                            const rcti &area,
+                            Span<MemoryBuffer *> inputs) override;
 };
 
 }  // namespace blender::compositor
diff --git a/source/blender/compositor/operations/COM_TranslateOperation.h b/source/blender/compositor/operations/COM_TranslateOperation.h
index 0179cc62c95..5beaa234093 100644
--- a/source/blender/compositor/operations/COM_TranslateOperation.h
+++ b/source/blender/compositor/operations/COM_TranslateOperation.h
@@ -7,6 +7,8 @@
 #include "COM_ConstantOperation.h"
 #include "COM_MultiThreadedOperation.h"
 
+#include <mutex>
+
 namespace blender::compositor {
 
 class TranslateOperation : public MultiThreadedOperation {
@@ -21,6 +23,8 @@ class TranslateOperation : public MultiThreadedOperation {
   bool is_delta_set_;
   bool is_relative_;
 
+  std::mutex mutex_;
+
  protected:
   MemoryBufferExtend x_extend_mode_;
   MemoryBufferExtend y_extend_mode_;
@@ -50,6 +54,11 @@ class TranslateOperation : public MultiThreadedOperation {
   inline void ensure_delta()
   {
     if (!is_delta_set_) {
+      std::unique_lock lock(mutex_);
+      if (is_delta_set_) {
+        return;
+      }
+
       delta_x_ = get_input_operation(X_INPUT_INDEX)->get_constant_value_default(0.0f);
       delta_y_ = get_input_operation(Y_INPUT_INDEX)->get_constant_value_default(0.0f);
       if (get_is_relative()) {
diff --git a/source/blender/compositor/realtime_compositor/cached_resources/intern/cached_image.cc b/source/blender/compositor/realtime_compositor/cached_resources/intern/cached_image.cc
index 2121f13a1e6..29ed5b0909d 100644
--- a/source/blender/compositor/realtime_compositor/cached_resources/intern/cached_image.cc
+++ b/source/blender/compositor/realtime_compositor/cached_resources/intern/cached_image.cc
@@ -15,6 +15,7 @@
 #include "GPU_shader.hh"
 #include "GPU_texture.hh"
 
+#include "IMB_colormanagement.hh"
 #include "IMB_imbuf.hh"
 #include "IMB_imbuf_types.hh"
 
@@ -56,74 +57,6 @@ bool operator==(const CachedImageKey &a, const CachedImageKey &b)
  * Cached Image.
  */
 
-/* Returns a new texture of the given format and precision preprocessed using the given shader. The
- * input texture is freed. */
-static GPUTexture *preprocess_texture(Context &context,
-                                      GPUTexture *input_texture,
-                                      eGPUTextureFormat target_format,
-                                      ResultPrecision precision,
-                                      const char *shader_name)
-{
-  const int2 size = int2(GPU_texture_width(input_texture), GPU_texture_height(input_texture));
-
-  GPUTexture *preprocessed_texture = GPU_texture_create_2d(
-      "Cached Image", size.x, size.y, 1, target_format, GPU_TEXTURE_USAGE_GENERAL, nullptr);
-
-  GPUShader *shader = context.get_shader(shader_name, precision);
-  GPU_shader_bind(shader);
-
-  const int input_unit = GPU_shader_get_sampler_binding(shader, "input_tx");
-  GPU_texture_bind(input_texture, input_unit);
-
-  const int image_unit = GPU_shader_get_sampler_binding(shader, "output_img");
-  GPU_texture_image_bind(preprocessed_texture, image_unit);
-
-  compute_dispatch_threads_at_least(shader, size);
-
-  GPU_shader_unbind();
-  GPU_texture_unbind(input_texture);
-  GPU_texture_image_unbind(preprocessed_texture);
-  GPU_texture_free(input_texture);
-
-  return preprocessed_texture;
-}
-
-/* Compositor images are expected to be always pre-multiplied, so identify if the GPU texture
- * returned by the IMB module is straight and needs to be pre-multiplied. An exception is when
- * the image has an alpha mode of channel packed or alpha ignore, in which case, we always ignore
- * pre-multiplication. */
-static bool should_premultiply_alpha(Image *image, ImBuf *image_buffer)
-{
-  if (ELEM(image->alpha_mode, IMA_ALPHA_CHANNEL_PACKED, IMA_ALPHA_IGNORE)) {
-    return false;
-  }
-
-  return !BKE_image_has_gpu_texture_premultiplied_alpha(image, image_buffer);
-}
-
-/* Get a suitable texture format supported by the compositor given the format of the texture
- * returned by the IMB module. See imb_gpu_get_format for the formats that needs to be handled. */
-static eGPUTextureFormat get_compatible_texture_format(eGPUTextureFormat original_format)
-{
-  switch (original_format) {
-    case GPU_R16F:
-    case GPU_R32F:
-    case GPU_RGBA16F:
-    case GPU_RGBA32F:
-      return original_format;
-    case GPU_R8:
-      return GPU_R16F;
-    case GPU_RGBA8:
-    case GPU_SRGB8_A8:
-      return GPU_RGBA16F;
-    default:
-      break;
-  }
-
-  BLI_assert_unreachable();
-  return original_format;
-}
-
 /* Get the selected render layer selected assuming the image is a multilayer image. */
 static RenderLayer *get_render_layer(Image *image, ImageUser &image_user)
 {
@@ -205,6 +138,56 @@ static ImageUser compute_image_user_for_pass(Context &context,
   return image_user_for_pass;
 }
 
+/* The image buffer might be stored as an sRGB 8-bit image, while the compositor expects linear
+ * float images, so compute a linear float buffer for the image buffer. This will also do linear
+ * space conversion and alpha pre-multiplication as needed. We could store those images in sRGB GPU
+ * textures and let the GPU do the linear space conversion, but the issues is that we don't control
+ * how the GPU does the conversion and so we get tiny differences across CPU and GPU compositing,
+ * and potentially even across GPUs/Drivers. Further, if alpha pre-multiplication is needed, we
+ * would need to do it ourself, which means alpha pre-multiplication will happen before linear
+ * space conversion, which would produce yet another difference. So we just do everything on the
+ * CPU, since this is already a cached resource.
+ *
+ * To avoid conflicts with other threads, create a new image buffer and assign all the necessary
+ * information to it, with IB_DO_NOT_TAKE_OWNERSHIP for buffers since a deep copy is not needed.
+ *
+ * The caller should free the returned image buffer. */
+static ImBuf *compute_linear_buffer(ImBuf *image_buffer)
+{
+  /* Do not pass the flags to the allocation function to avoid buffer allocation, but assign them
+   * after to retain important information like precision and alpha mode. */
+  ImBuf *linear_image_buffer = IMB_allocImBuf(
+      image_buffer->x, image_buffer->y, image_buffer->planes, 0);
+  linear_image_buffer->flags = image_buffer->flags;
+
+  /* Assign the float buffer if it exists, as well as its number of channels. */
+  IMB_assign_float_buffer(
+      linear_image_buffer, image_buffer->float_buffer, IB_DO_NOT_TAKE_OWNERSHIP);
+  linear_image_buffer->channels = image_buffer->channels;
+
+  /* If no float buffer exists, assign it then compute a float buffer from it. This is the main
+   * call of this function. */
+  if (!linear_image_buffer->float_buffer.data) {
+    IMB_assign_byte_buffer(
+        linear_image_buffer, image_buffer->byte_buffer, IB_DO_NOT_TAKE_OWNERSHIP);
+    IMB_float_from_rect(linear_image_buffer);
+  }
+
+  /* If the image buffer contained compressed data, assign them as well, but only if the color
+   * space of the buffer is linear or data, since we need linear data and can't preprocess the
+   * compressed buffer. If not, we fallback to the float buffer already assigned, which is
+   * guaranteed to exist as a fallback for compressed textures. */
+  const bool is_suitable_compressed_color_space =
+      IMB_colormanagement_space_is_data(image_buffer->byte_buffer.colorspace) ||
+      IMB_colormanagement_space_is_scene_linear(image_buffer->byte_buffer.colorspace);
+  if (image_buffer->ftype == IMB_FTYPE_DDS && is_suitable_compressed_color_space) {
+    linear_image_buffer->ftype = IMB_FTYPE_DDS;
+    IMB_assign_dds_data(linear_image_buffer, image_buffer->dds_data, IB_DO_NOT_TAKE_OWNERSHIP);
+  }
+
+  return linear_image_buffer;
+}
+
 CachedImage::CachedImage(Context &context,
                          Image *image,
                          ImageUser *image_user,
@@ -227,34 +210,12 @@ CachedImage::CachedImage(Context &context,
       context, image, image_user, pass_name);
 
   ImBuf *image_buffer = BKE_image_acquire_ibuf(image, &image_user_for_pass, nullptr);
-  const bool is_premultiplied = BKE_image_has_gpu_texture_premultiplied_alpha(image, image_buffer);
-  texture_ = IMB_create_gpu_texture("Image Texture", image_buffer, true, is_premultiplied);
+  ImBuf *linear_image_buffer = compute_linear_buffer(image_buffer);
+
+  texture_ = IMB_create_gpu_texture("Image Texture", linear_image_buffer, true, true);
   GPU_texture_update_mipmap_chain(texture_);
 
-  const eGPUTextureFormat original_format = GPU_texture_format(texture_);
-  const eGPUTextureFormat target_format = get_compatible_texture_format(original_format);
-  const ResultType result_type = Result::type(target_format);
-  const ResultPrecision precision = Result::precision(target_format);
-
-  /* The GPU image returned by the IMB module can be in a format not supported by the compositor,
-   * or it might need pre-multiplication, so preprocess them first. */
-  if (result_type == ResultType::Color && should_premultiply_alpha(image, image_buffer)) {
-    texture_ = preprocess_texture(
-        context, texture_, target_format, precision, "compositor_premultiply_alpha");
-  }
-  else if (original_format != target_format) {
-    const char *conversion_shader_name = result_type == ResultType::Float ?
-                                             "compositor_convert_float_to_float" :
-                                             "compositor_convert_color_to_color";
-    texture_ = preprocess_texture(
-        context, texture_, target_format, precision, conversion_shader_name);
-  }
-
-  /* Set the alpha to 1 using swizzling if alpha is ignored. */
-  if (result_type == ResultType::Color && image->alpha_mode == IMA_ALPHA_IGNORE) {
-    GPU_texture_swizzle_set(texture_, "rgb1");
-  }
-
+  IMB_freeImBuf(linear_image_buffer);
   BKE_image_release_ibuf(image, image_buffer, nullptr);
 }
 
diff --git a/source/blender/editors/animation/anim_filter.cc b/source/blender/editors/animation/anim_filter.cc
index 9c8d4699c7d..8f4820f4542 100644
--- a/source/blender/editors/animation/anim_filter.cc
+++ b/source/blender/editors/animation/anim_filter.cc
@@ -3674,138 +3674,136 @@ size_t ANIM_animdata_filter(bAnimContext *ac,
                             void *data,
                             eAnimCont_Types datatype)
 {
+  if (!data || !anim_data) {
+    return 0;
+  }
+
   size_t items = 0;
+  switch (datatype) {
+    /* Action-Editing Modes */
+    case ANIMCONT_ACTION: /* 'Action Editor' */
+    {
+      Object *obact = ac->obact;
+      SpaceAction *saction = (SpaceAction *)ac->sl;
+      bDopeSheet *ads = (saction) ? &saction->ads : nullptr;
 
-  /* only filter data if there's somewhere to put it */
-  if (data && anim_data) {
-    /* firstly filter the data */
-    switch (datatype) {
-      /* Action-Editing Modes */
-      case ANIMCONT_ACTION: /* 'Action Editor' */
-      {
-        Object *obact = ac->obact;
-        SpaceAction *saction = (SpaceAction *)ac->sl;
-        bDopeSheet *ads = (saction) ? &saction->ads : nullptr;
-
-        /* specially check for AnimData filter, see #36687. */
-        if (UNLIKELY(filter_mode & ANIMFILTER_ANIMDATA)) {
-          /* all channels here are within the same AnimData block, hence this special case */
-          if (LIKELY(obact->adt)) {
-            ANIMCHANNEL_NEW_CHANNEL(obact->adt, ANIMTYPE_ANIMDATA, (ID *)obact, nullptr);
-          }
+      /* specially check for AnimData filter, see #36687. */
+      if (UNLIKELY(filter_mode & ANIMFILTER_ANIMDATA)) {
+        /* all channels here are within the same AnimData block, hence this special case */
+        if (LIKELY(obact->adt)) {
+          ANIMCHANNEL_NEW_CHANNEL(obact->adt, ANIMTYPE_ANIMDATA, (ID *)obact, nullptr);
         }
-        else {
-          /* The check for the DopeSheet summary is included here
-           * since the summary works here too. */
-          if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
-            items += animfilter_action(
-                ac, anim_data, ads, static_cast<bAction *>(data), filter_mode, (ID *)obact);
-          }
-        }
-
-        break;
       }
-      case ANIMCONT_SHAPEKEY: /* 'ShapeKey Editor' */
-      {
-        Key *key = (Key *)data;
-
-        /* specially check for AnimData filter, see #36687. */
-        if (UNLIKELY(filter_mode & ANIMFILTER_ANIMDATA)) {
-          /* all channels here are within the same AnimData block, hence this special case */
-          if (LIKELY(key->adt)) {
-            ANIMCHANNEL_NEW_CHANNEL(key->adt, ANIMTYPE_ANIMDATA, (ID *)key, nullptr);
-          }
-        }
-        else {
-          /* The check for the DopeSheet summary is included here
-           * since the summary works here too. */
-          if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
-            items = animdata_filter_shapekey(ac, anim_data, key, filter_mode);
-          }
-        }
-
-        break;
-      }
-
-      /* Modes for Specialty Data Types (i.e. not keyframes) */
-      case ANIMCONT_GPENCIL: {
+      else {
+        /* The check for the DopeSheet summary is included here
+         * since the summary works here too. */
         if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
-          if (U.experimental.use_grease_pencil_version3) {
-            items = animdata_filter_grease_pencil(ac, anim_data, filter_mode);
-          }
-          else {
-            items = animdata_filter_gpencil_legacy(ac, anim_data, data, filter_mode);
-          }
+          items += animfilter_action(
+              ac, anim_data, ads, static_cast<bAction *>(data), filter_mode, (ID *)obact);
         }
-        break;
       }
-      case ANIMCONT_MASK: {
+
+      break;
+    }
+    case ANIMCONT_SHAPEKEY: /* 'ShapeKey Editor' */
+    {
+      Key *key = (Key *)data;
+
+      /* specially check for AnimData filter, see #36687. */
+      if (UNLIKELY(filter_mode & ANIMFILTER_ANIMDATA)) {
+        /* all channels here are within the same AnimData block, hence this special case */
+        if (LIKELY(key->adt)) {
+          ANIMCHANNEL_NEW_CHANNEL(key->adt, ANIMTYPE_ANIMDATA, (ID *)key, nullptr);
+        }
+      }
+      else {
+        /* The check for the DopeSheet summary is included here
+         * since the summary works here too. */
         if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
-          items = animdata_filter_mask(ac->bmain, anim_data, data, filter_mode);
+          items = animdata_filter_shapekey(ac, anim_data, key, filter_mode);
         }
-        break;
       }
 
-      /* DopeSheet Based Modes */
-      case ANIMCONT_DOPESHEET: /* 'DopeSheet Editor' */
-      {
-        /* the DopeSheet editor is the primary place where the DopeSheet summaries are useful */
-        if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
-          items += animdata_filter_dopesheet(
-              ac, anim_data, static_cast<bDopeSheet *>(data), filter_mode);
-        }
-        break;
-      }
-      case ANIMCONT_FCURVES: /* Graph Editor -> F-Curves/Animation Editing */
-      case ANIMCONT_DRIVERS: /* Graph Editor -> Drivers Editing */
-      case ANIMCONT_NLA:     /* NLA Editor */
-      {
-        /* all of these editors use the basic DopeSheet data for filtering options,
-         * but don't have all the same features */
-        items = animdata_filter_dopesheet(
-            ac, anim_data, static_cast<bDopeSheet *>(data), filter_mode);
-        break;
-      }
-
-      /* Timeline Mode - Basically the same as dopesheet,
-       * except we only have the summary for now */
-      case ANIMCONT_TIMELINE: {
-        /* the DopeSheet editor is the primary place where the DopeSheet summaries are useful */
-        if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
-          items += animdata_filter_dopesheet(
-              ac, anim_data, static_cast<bDopeSheet *>(data), filter_mode);
-        }
-        break;
-      }
-
-      /* Special/Internal Use */
-      case ANIMCONT_CHANNEL: /* animation channel */
-      {
-        bDopeSheet *ads = ac->ads;
-
-        /* based on the channel type, filter relevant data for this */
-        items = animdata_filter_animchan(
-            ac, anim_data, ads, static_cast<bAnimListElem *>(data), filter_mode);
-        break;
-      }
-
-      /* unhandled */
-      default: {
-        printf("ANIM_animdata_filter() - Invalid datatype argument %i\n", datatype);
-        break;
-      }
+      break;
     }
 
-    /* remove any 'weedy' entries */
-    items = animdata_filter_remove_invalid(anim_data);
+    /* Modes for Specialty Data Types (i.e. not keyframes) */
+    case ANIMCONT_GPENCIL: {
+      if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
+        if (U.experimental.use_grease_pencil_version3) {
+          items = animdata_filter_grease_pencil(ac, anim_data, filter_mode);
+        }
+        else {
+          items = animdata_filter_gpencil_legacy(ac, anim_data, data, filter_mode);
+        }
+      }
+      break;
+    }
+    case ANIMCONT_MASK: {
+      if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
+        items = animdata_filter_mask(ac->bmain, anim_data, data, filter_mode);
+      }
+      break;
+    }
 
-    /* remove duplicates (if required) */
-    if (filter_mode & ANIMFILTER_NODUPLIS) {
-      items = animdata_filter_remove_duplis(anim_data);
+    /* DopeSheet Based Modes */
+    case ANIMCONT_DOPESHEET: /* 'DopeSheet Editor' */
+    {
+      /* the DopeSheet editor is the primary place where the DopeSheet summaries are useful */
+      if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
+        items += animdata_filter_dopesheet(
+            ac, anim_data, static_cast<bDopeSheet *>(data), filter_mode);
+      }
+      break;
+    }
+    case ANIMCONT_FCURVES: /* Graph Editor -> F-Curves/Animation Editing */
+    case ANIMCONT_DRIVERS: /* Graph Editor -> Drivers Editing */
+    case ANIMCONT_NLA:     /* NLA Editor */
+    {
+      /* all of these editors use the basic DopeSheet data for filtering options,
+       * but don't have all the same features */
+      items = animdata_filter_dopesheet(
+          ac, anim_data, static_cast<bDopeSheet *>(data), filter_mode);
+      break;
+    }
+
+    /* Timeline Mode - Basically the same as dopesheet,
+     * except we only have the summary for now */
+    case ANIMCONT_TIMELINE: {
+      /* the DopeSheet editor is the primary place where the DopeSheet summaries are useful */
+      if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) {
+        items += animdata_filter_dopesheet(
+            ac, anim_data, static_cast<bDopeSheet *>(data), filter_mode);
+      }
+      break;
+    }
+
+    /* Special/Internal Use */
+    case ANIMCONT_CHANNEL: /* animation channel */
+    {
+      bDopeSheet *ads = ac->ads;
+
+      /* based on the channel type, filter relevant data for this */
+      items = animdata_filter_animchan(
+          ac, anim_data, ads, static_cast<bAnimListElem *>(data), filter_mode);
+      break;
+    }
+
+    /* unhandled */
+    default: {
+      printf("ANIM_animdata_filter() - Invalid datatype argument %i\n", datatype);
+      break;
     }
   }
 
-  /* return the number of items in the list */
+  /* remove any 'weedy' entries */
+  items = animdata_filter_remove_invalid(anim_data);
+
+  /* remove duplicates (if required) */
+  if (filter_mode & ANIMFILTER_NODUPLIS) {
+    items = animdata_filter_remove_duplis(anim_data);
+  }
+
   return items;
 }
 
diff --git a/source/blender/editors/asset/intern/asset_ops.cc b/source/blender/editors/asset/intern/asset_ops.cc
index 99150ad06ef..5d9d1c740db 100644
--- a/source/blender/editors/asset/intern/asset_ops.cc
+++ b/source/blender/editors/asset/intern/asset_ops.cc
@@ -582,7 +582,7 @@ static void ASSET_OT_catalog_delete(wmOperatorType *ot)
 static asset_system::AssetCatalogService *get_catalog_service(bContext *C)
 {
   const SpaceFile *sfile = CTX_wm_space_file(C);
-  if (!sfile) {
+  if (!sfile || ED_fileselect_is_file_browser(sfile)) {
     return nullptr;
   }
 
diff --git a/source/blender/editors/grease_pencil/intern/grease_pencil_edit.cc b/source/blender/editors/grease_pencil/intern/grease_pencil_edit.cc
index 57f1e1696d8..3524274ef81 100644
--- a/source/blender/editors/grease_pencil/intern/grease_pencil_edit.cc
+++ b/source/blender/editors/grease_pencil/intern/grease_pencil_edit.cc
@@ -1406,7 +1406,7 @@ static void GREASE_PENCIL_OT_clean_loose(wmOperatorType *ot)
 
   ot->invoke = WM_operator_props_popup_confirm;
   ot->exec = grease_pencil_clean_loose_exec;
-  ot->poll = editable_grease_pencil_poll;
+  ot->poll = active_grease_pencil_layer_poll;
 
   ot->flag = OPTYPE_REGISTER | OPTYPE_UNDO;
 
diff --git a/source/blender/editors/io/io_cache.cc b/source/blender/editors/io/io_cache.cc
index 49ac8f29383..4884318f952 100644
--- a/source/blender/editors/io/io_cache.cc
+++ b/source/blender/editors/io/io_cache.cc
@@ -53,8 +53,8 @@ static int cachefile_open_invoke(bContext *C, wmOperator *op, const wmEvent * /*
     char filepath[FILE_MAX];
     Main *bmain = CTX_data_main(C);
 
-    STRNCPY(filepath, BKE_main_blendfile_path(bmain));
-    BLI_path_extension_replace(filepath, sizeof(filepath), ".abc");
+    /* Default to the same directory as the blend file. */
+    BLI_path_split_dir_part(BKE_main_blendfile_path(bmain), filepath, sizeof(filepath));
     RNA_string_set(op->ptr, "filepath", filepath);
   }
 
@@ -119,7 +119,7 @@ void CACHEFILE_OT_open(wmOperatorType *ot)
   ot->cancel = open_cancel;
 
   WM_operator_properties_filesel(ot,
-                                 FILE_TYPE_ALEMBIC | FILE_TYPE_FOLDER,
+                                 FILE_TYPE_ALEMBIC | FILE_TYPE_USD | FILE_TYPE_FOLDER,
                                  FILE_BLENDER,
                                  FILE_OPENFILE,
                                  WM_FILESEL_FILEPATH | WM_FILESEL_RELPATH,
@@ -163,8 +163,8 @@ static int cachefile_layer_open_invoke(bContext *C, wmOperator *op, const wmEven
     char filepath[FILE_MAX];
     Main *bmain = CTX_data_main(C);
 
-    STRNCPY(filepath, BKE_main_blendfile_path(bmain));
-    BLI_path_extension_replace(filepath, sizeof(filepath), ".abc");
+    /* Default to the same directory as the blend file. */
+    BLI_path_split_dir_part(BKE_main_blendfile_path(bmain), filepath, sizeof(filepath));
     RNA_string_set(op->ptr, "filepath", filepath);
   }
 
@@ -215,7 +215,7 @@ void CACHEFILE_OT_layer_add(wmOperatorType *ot)
   ot->exec = cachefile_layer_add_exec;
 
   WM_operator_properties_filesel(ot,
-                                 FILE_TYPE_ALEMBIC | FILE_TYPE_FOLDER,
+                                 FILE_TYPE_ALEMBIC | FILE_TYPE_USD | FILE_TYPE_FOLDER,
                                  FILE_BLENDER,
                                  FILE_OPENFILE,
                                  WM_FILESEL_FILEPATH | WM_FILESEL_RELPATH,
diff --git a/source/blender/editors/mesh/editmesh_tools.cc b/source/blender/editors/mesh/editmesh_tools.cc
index 37c7c5c9628..673c69d1cd1 100644
--- a/source/blender/editors/mesh/editmesh_tools.cc
+++ b/source/blender/editors/mesh/editmesh_tools.cc
@@ -3584,7 +3584,10 @@ static int edbm_remove_doubles_exec(bContext *C, wmOperator *op)
     }
   }
 
-  BKE_reportf(op->reports, RPT_INFO, "Removed %d vertice(s)", count_multi);
+  BKE_reportf(op->reports,
+              RPT_INFO,
+              count_multi == 1 ? "Removed %d vertex" : "Removed %d vertices",
+              count_multi);
 
   return OPERATOR_FINISHED;
 }
diff --git a/source/blender/editors/object/object_edit.cc b/source/blender/editors/object/object_edit.cc
index 93fbd07f300..9805b9fc286 100644
--- a/source/blender/editors/object/object_edit.cc
+++ b/source/blender/editors/object/object_edit.cc
@@ -1613,7 +1613,6 @@ static int shade_smooth_exec(bContext *C, wmOperator *op)
         const float angle = RNA_float_get(op->ptr, "angle");
         bke::mesh_sharp_edges_set_from_angle(mesh, angle, keep_sharp_edges);
       }
-      mesh.tag_sharpness_changed();
       BKE_mesh_batch_cache_dirty_tag(static_cast<Mesh *>(ob->data), BKE_MESH_BATCH_DIRTY_ALL);
       changed = true;
     }
diff --git a/source/blender/editors/space_view3d/view3d_gizmo_preselect_type.cc b/source/blender/editors/space_view3d/view3d_gizmo_preselect_type.cc
index 01f1ed1e320..f5738fd6340 100644
--- a/source/blender/editors/space_view3d/view3d_gizmo_preselect_type.cc
+++ b/source/blender/editors/space_view3d/view3d_gizmo_preselect_type.cc
@@ -506,7 +506,7 @@ void ED_view3d_gizmo_mesh_preselect_get_active(const bContext *C,
   /* weak, allocate an array just to access the index. */
   Base *base = nullptr;
   Object *obedit = nullptr;
-  {
+  if (object_index != -1) {
     Vector<Base *> bases = BKE_view_layer_array_from_bases_in_edit_mode(
         scene, view_layer, CTX_wm_view3d(C));
     if (object_index < bases.size()) {
diff --git a/source/blender/editors/transform/transform_ops.cc b/source/blender/editors/transform/transform_ops.cc
index c43f6c5621f..6e1345d8e3d 100644
--- a/source/blender/editors/transform/transform_ops.cc
+++ b/source/blender/editors/transform/transform_ops.cc
@@ -705,11 +705,15 @@ void Transform_Properties(wmOperatorType *ot, int flags)
 
       RNA_def_boolean(ot->srna, "use_snap_project", false, "Project Individual Elements", "");
 
-      /* TODO(@gfxcoder): Rename `snap_target` to `snap_source` to avoid previous ambiguity of
-       * "target" (now, "source" is geometry to be moved and "target" is geometry to which moved
-       * geometry is snapped).  Use "Source snap point" and "Point on source that will snap to
-       * target" for name and description, respectively. */
-      prop = RNA_def_enum(ot->srna, "snap_target", rna_enum_snap_source_items, 0, "Snap With", "");
+      /* TODO(@gfxcoder): Rename `snap_target` to `snap_base` to avoid previous ambiguity of
+       * "target" (now, "base" or "source" is geometry to be moved and "target" is geometry to
+       * which moved geometry is snapped). */
+      prop = RNA_def_enum(ot->srna,
+                          "snap_target",
+                          rna_enum_snap_source_items,
+                          0,
+                          "Snap Base",
+                          "Point on source that will snap to target");
       RNA_def_property_flag(prop, PROP_HIDDEN);
 
       /* Target selection. */
diff --git a/source/blender/geometry/GEO_join_geometries.hh b/source/blender/geometry/GEO_join_geometries.hh
index 8003cf5028b..0bdb179c5db 100644
--- a/source/blender/geometry/GEO_join_geometries.hh
+++ b/source/blender/geometry/GEO_join_geometries.hh
@@ -13,6 +13,6 @@ bke::GeometrySet join_geometries(Span<bke::GeometrySet> geometries,
                                  const bke::AnonymousAttributePropagationInfo &propagation_info);
 
 void join_attributes(Span<const bke::GeometryComponent *> src_components,
-                     bke::GeometryComponent &r_result,
-                     Span<StringRef> ignored_attributes = {});
+                     Span<StringRef> ignored_attributes,
+                     bke::GeometryComponent &r_result);
 }  // namespace blender::geometry
diff --git a/source/blender/geometry/GEO_realize_instances.hh b/source/blender/geometry/GEO_realize_instances.hh
index 08a0d8a6da1..fe69ecddb5a 100644
--- a/source/blender/geometry/GEO_realize_instances.hh
+++ b/source/blender/geometry/GEO_realize_instances.hh
@@ -62,16 +62,8 @@ bke::GeometrySet realize_instances(bke::GeometrySet geometry_set,
                                    const RealizeInstancesOptions &options);
 
 /**
- * Join all instances into a single geometry component for each geometry type. For example, all
- * mesh instances (including the already realized mesh) are joined into a single mesh. The output
- * geometry set does not contain any instances. If the input did not contain any instances, it is
- * returned directly.
- *
- * The `id` attribute has special handling. If there is an id attribute on any component, the
- * output will contain an `id` attribute as well. The output id is generated by mixing/hashing ids
- * of instances and of the instanced geometry data.
- *
- * Will realize only the instances chosen by varied_depth_option to there chosen depth.
+ * Same #realize_instances but will realize only the instances chosen by
+ * varied_depth_option to there chosen depth.
  */
 bke::GeometrySet realize_instances(bke::GeometrySet geometry_set,
                                    const RealizeInstancesOptions &options,
diff --git a/source/blender/geometry/intern/join_geometries.cc b/source/blender/geometry/intern/join_geometries.cc
index 92912e9fa1a..1b1ac578dc8 100644
--- a/source/blender/geometry/intern/join_geometries.cc
+++ b/source/blender/geometry/intern/join_geometries.cc
@@ -74,8 +74,8 @@ static void fill_new_attribute(const Span<const GeometryComponent *> src_compone
 }
 
 void join_attributes(const Span<const GeometryComponent *> src_components,
-                     GeometryComponent &r_result,
-                     const Span<StringRef> ignored_attributes)
+                     const Span<StringRef> ignored_attributes,
+                     GeometryComponent &r_result)
 {
   const Map<AttributeIDRef, AttributeMetaData> info = get_final_attribute_info(src_components,
                                                                                ignored_attributes);
@@ -129,7 +129,7 @@ static void join_instances(const Span<const GeometryComponent *> src_components,
 
   r_result.replace_instances(dst_instances.release());
   auto &dst_component = r_result.get_component_for_write<bke::InstancesComponent>();
-  join_attributes(src_components, dst_component, {".reference_index"});
+  join_attributes(src_components, {".reference_index"}, dst_component);
 }
 
 static void join_volumes(const Span<const GeometryComponent *> /*src_components*/,
diff --git a/source/blender/geometry/intern/realize_instances.cc b/source/blender/geometry/intern/realize_instances.cc
index 45b6d3c1285..27a735a5516 100644
--- a/source/blender/geometry/intern/realize_instances.cc
+++ b/source/blender/geometry/intern/realize_instances.cc
@@ -298,7 +298,7 @@ struct InstanceContext {
         curves(gather_info.curves.attributes.size()),
         instances(gather_info.instances_attriubutes.size())
   {
-    //empty
+    // empty
   }
 };
 
@@ -753,7 +753,7 @@ static void gather_realize_tasks_recursive(GatherTasksInfo &gather_info,
  * is an instance, the condition is true only when the depth is exactly 0. Additionally, the
  * function extends its operation to instances if any of their nested children meet the first
  * condition.
- * 
+ *
  * Based on bke::GeometrySet::attribute_foreach
  */
 static bool attribute_foreach(const bke::GeometrySet &geometry_set,
@@ -823,12 +823,11 @@ static bool attribute_foreach(const bke::GeometrySet &geometry_set,
   return is_relevant;
 }
 
-
 /**
  * Based on bke::GeometrySet::gather_attributes_for_propagation.
  * Specialized for Specialized attribute_foreach to get:
  * current_depth, depth_target, instance_depth and selection.
-*/
+ */
 void static gather_attributes_for_propagation(
     bke::GeometrySet re_geometry_set,
     const Span<bke::GeometryComponent::Type> component_types,
@@ -869,7 +868,8 @@ void static gather_attributes_for_propagation(
 
                       AttrDomain domain = meta_data.domain;
                       if (dst_component_type != bke::GeometryComponent::Type::Instance &&
-                          domain == AttrDomain::Instance) {
+                          domain == AttrDomain::Instance)
+                      {
                         domain = AttrDomain::Point;
                       }
 
@@ -983,7 +983,7 @@ static void execute_instances_tasks(
         continue;
       }
 
-      const void *attribute_ptr;  
+      const void *attribute_ptr;
       if (attribute_fallback_array[attribute_index] != nullptr) {
         attribute_ptr = attribute_fallback_array[attribute_index];
       }
@@ -1014,7 +1014,7 @@ static void execute_instances_tasks(
   }
 
   join_attributes(
-      for_join_attributes, dst_component, {"position", ".reference_index", "instance_transform"});
+      for_join_attributes, {"position", ".reference_index", "instance_transform"}, dst_component);
 }
 
 /** \} */
@@ -1990,9 +1990,7 @@ bke::GeometrySet realize_instances(bke::GeometrySet geometry_set,
   VariedDepthOption all_instances;
   all_instances.depths = VArray<int>::ForSingle(VariedDepthOption::MAX_DEPTH,
                                                 geometry_set.get_instances()->instances_num());
-  IndexMaskMemory memory;
-  all_instances.selection = IndexMask::from_bools(
-      VArray<bool>::ForSingle(true, geometry_set.get_instances()->instances_num()), memory);
+  all_instances.selection = IndexMask(geometry_set.get_instances()->instances_num());
   return realize_instances(geometry_set, options, all_instances);
 }
 
@@ -2049,12 +2047,8 @@ bke::GeometrySet realize_instances(bke::GeometrySet geometry_set,
   const float4x4 transform = float4x4::identity();
   InstanceContext attribute_fallbacks(gather_info);
 
-  gather_realize_tasks_recursive(gather_info,
-                                 0,
-                                 VariedDepthOption::MAX_DEPTH,
-                                 geometry_set,
-                                 transform,
-                                 attribute_fallbacks);
+  gather_realize_tasks_recursive(
+      gather_info, 0, VariedDepthOption::MAX_DEPTH, geometry_set, transform, attribute_fallbacks);
 
   bke::GeometrySet new_geometry_set;
   execute_instances_tasks(gather_info.instances.instances_components_to_merge,
@@ -2062,28 +2056,28 @@ bke::GeometrySet realize_instances(bke::GeometrySet geometry_set,
                           all_instance_attributes,
                           gather_info.instances.attribute_fallback,
                           new_geometry_set);
-                          
+
   const int64_t total_points_num = get_final_points_num(gather_info.r_tasks);
   /* This doesn't have to be exact at all, it's just a rough estimate ot make decisions about
    * multi-threading (overhead). */
   const int64_t approximate_used_bytes_num = total_points_num * 32;
   threading::memory_bandwidth_bound_task(approximate_used_bytes_num, [&]() {
-  execute_realize_pointcloud_tasks(options.keep_original_ids,
-                                   all_pointclouds_info,
-                                   gather_info.r_tasks.pointcloud_tasks,
-                                   all_pointclouds_info.attributes,
-                                   new_geometry_set);
-  execute_realize_mesh_tasks(options.keep_original_ids,
-                             all_meshes_info,
-                             gather_info.r_tasks.mesh_tasks,
-                             all_meshes_info.attributes,
-                             all_meshes_info.materials,
-                             new_geometry_set);
-  execute_realize_curve_tasks(options.keep_original_ids,
-                              all_curves_info,
-                              gather_info.r_tasks.curve_tasks,
-                              all_curves_info.attributes,
-                              new_geometry_set);
+    execute_realize_pointcloud_tasks(options.keep_original_ids,
+                                     all_pointclouds_info,
+                                     gather_info.r_tasks.pointcloud_tasks,
+                                     all_pointclouds_info.attributes,
+                                     new_geometry_set);
+    execute_realize_mesh_tasks(options.keep_original_ids,
+                               all_meshes_info,
+                               gather_info.r_tasks.mesh_tasks,
+                               all_meshes_info.attributes,
+                               all_meshes_info.materials,
+                               new_geometry_set);
+    execute_realize_curve_tasks(options.keep_original_ids,
+                                all_curves_info,
+                                gather_info.r_tasks.curve_tasks,
+                                all_curves_info.attributes,
+                                new_geometry_set);
   });
   if (gather_info.r_tasks.first_volume) {
     new_geometry_set.add(*gather_info.r_tasks.first_volume);
diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm
index ca73c206e38..f9d46627ada 100644
--- a/source/blender/gpu/metal/mtl_texture.mm
+++ b/source/blender/gpu/metal/mtl_texture.mm
@@ -527,6 +527,8 @@ void gpu::MTLTexture::update_sub(
     }
   }
 
+  const bool is_compressed = (format_flag_ & GPU_FORMAT_COMPRESSED);
+
   @autoreleasepool {
     /* Determine totalsize of INPUT Data. */
     int num_channels = to_component_len(format_);
@@ -593,10 +595,12 @@ void gpu::MTLTexture::update_sub(
         false /* Not a clear. */
     };
 
-    /* Determine whether we can do direct BLIT or not. */
+    /* Determine whether we can do direct BLIT or not. For compressed textures,
+     * always assume a direct blit (input data pretends to be float, but it is
+     * not). */
     bool can_use_direct_blit = true;
-    if (expected_dst_bytes_per_pixel != input_bytes_per_pixel ||
-        num_channels != destination_num_channels)
+    if (!is_compressed && (expected_dst_bytes_per_pixel != input_bytes_per_pixel ||
+                           num_channels != destination_num_channels))
     {
       can_use_direct_blit = false;
     }
@@ -620,7 +624,7 @@ void gpu::MTLTexture::update_sub(
 
     /* Safety Checks. */
     if (type == GPU_DATA_UINT_24_8 || type == GPU_DATA_10_11_11_REV ||
-        type == GPU_DATA_2_10_10_10_REV)
+        type == GPU_DATA_2_10_10_10_REV || is_compressed)
     {
       BLI_assert(can_use_direct_blit &&
                  "Special input data type must be a 1-1 mapping with destination texture as it "
@@ -755,6 +759,12 @@ void gpu::MTLTexture::update_sub(
                                       extent[0] :
                                       ctx->pipeline_state.unpack_row_length);
           size_t bytes_per_image = bytes_per_row;
+          if (is_compressed) {
+            size_t block_size = to_block_size(format_);
+            size_t blocks_x = divide_ceil_u(extent[0], 4);
+            bytes_per_row = blocks_x * block_size;
+            bytes_per_image = bytes_per_row;
+          }
           int max_array_index = ((type_ == GPU_TEXTURE_1D_ARRAY) ? extent[1] : 1);
           for (int array_index = 0; array_index < max_array_index; array_index++) {
 
@@ -827,6 +837,13 @@ void gpu::MTLTexture::update_sub(
                                       extent[0] :
                                       ctx->pipeline_state.unpack_row_length);
           size_t bytes_per_image = bytes_per_row * extent[1];
+          if (is_compressed) {
+            size_t block_size = to_block_size(format_);
+            size_t blocks_x = divide_ceil_u(extent[0], 4);
+            size_t blocks_y = divide_ceil_u(extent[1], 4);
+            bytes_per_row = blocks_x * block_size;
+            bytes_per_image = bytes_per_row * blocks_y;
+          }
 
           size_t texture_array_relative_offset = 0;
           int base_slice = (type_ == GPU_TEXTURE_2D_ARRAY) ? offset[2] : 0;
@@ -1218,6 +1235,12 @@ void gpu::MTLTexture::ensure_mipmaps(int miplvl)
 
 void gpu::MTLTexture::generate_mipmap()
 {
+  /* Compressed textures allow users to provide their own custom mipmaps. And
+   * we can't generate them at runtime anyway. */
+  if (format_flag_ & GPU_FORMAT_COMPRESSED) {
+    return;
+  }
+
   /* Fetch Active Context. */
   MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
   BLI_assert(ctx);
diff --git a/source/blender/gpu/metal/mtl_texture_util.mm b/source/blender/gpu/metal/mtl_texture_util.mm
index 86773f048b7..8b29a582833 100644
--- a/source/blender/gpu/metal/mtl_texture_util.mm
+++ b/source/blender/gpu/metal/mtl_texture_util.mm
@@ -160,13 +160,17 @@ MTLPixelFormat gpu_texture_format_to_metal(eGPUTextureFormat tex_format)
       return MTLPixelFormatR8Snorm;
     /* Special formats, texture only. */
     case GPU_SRGB8_A8_DXT1:
+      return MTLPixelFormatBC1_RGBA_sRGB;
     case GPU_SRGB8_A8_DXT3:
+      return MTLPixelFormatBC2_RGBA_sRGB;
     case GPU_SRGB8_A8_DXT5:
+      return MTLPixelFormatBC3_RGBA_sRGB;
     case GPU_RGBA8_DXT1:
+      return MTLPixelFormatBC1_RGBA;
     case GPU_RGBA8_DXT3:
+      return MTLPixelFormatBC2_RGBA;
     case GPU_RGBA8_DXT5:
-      BLI_assert_msg(false, "Compressed texture not implemented yet!\n");
-      return MTLPixelFormatRGBA8Unorm;
+      return MTLPixelFormatBC3_RGBA;
     case GPU_SRGB8:
       /* 24-Bit pixel format are not supported. Emulate using a padded type with alpha. */
       return MTLPixelFormatRGBA8Unorm_sRGB;
@@ -247,6 +251,14 @@ size_t get_mtl_format_bytesize(MTLPixelFormat tex_format)
       return 4;
     case MTLPixelFormatDepth16Unorm:
       return 2;
+    case MTLPixelFormatBC1_RGBA:
+    case MTLPixelFormatBC1_RGBA_sRGB:
+      return 1; /* Note: not quite correct (BC1 is 0.5 bpp). */
+    case MTLPixelFormatBC2_RGBA:
+    case MTLPixelFormatBC2_RGBA_sRGB:
+    case MTLPixelFormatBC3_RGBA:
+    case MTLPixelFormatBC3_RGBA_sRGB:
+      return 1;
 
     default:
       BLI_assert_msg(false, "Unrecognised GPU pixel format!\n");
@@ -272,6 +284,12 @@ int get_mtl_format_num_components(MTLPixelFormat tex_format)
     case MTLPixelFormatRGBA8Unorm_sRGB:
     case MTLPixelFormatRGB10A2Uint:
     case MTLPixelFormatRGB10A2Unorm:
+    case MTLPixelFormatBC1_RGBA_sRGB:
+    case MTLPixelFormatBC2_RGBA_sRGB:
+    case MTLPixelFormatBC3_RGBA_sRGB:
+    case MTLPixelFormatBC1_RGBA:
+    case MTLPixelFormatBC2_RGBA:
+    case MTLPixelFormatBC3_RGBA:
       return 4;
 
     case MTLPixelFormatRG11B10Float:
diff --git a/source/blender/imbuf/IMB_imbuf.hh b/source/blender/imbuf/IMB_imbuf.hh
index 16377b37edd..a945811b406 100644
--- a/source/blender/imbuf/IMB_imbuf.hh
+++ b/source/blender/imbuf/IMB_imbuf.hh
@@ -118,6 +118,21 @@ ImBuf *IMB_allocFromBuffer(const uint8_t *byte_buffer,
 void IMB_assign_byte_buffer(ImBuf *ibuf, uint8_t *buffer_data, ImBufOwnership ownership);
 void IMB_assign_float_buffer(ImBuf *ibuf, float *buffer_data, ImBufOwnership ownership);
 
+/**
+ * Assign the content and the color space of the corresponding buffer the data from the given
+ * buffer.
+ *
+ * \note Does not modify the topology (width, height, number of channels)
+ * or the mipmaps in any way.
+ *
+ * \note The ownership of the data in the source buffer is ignored.
+ */
+void IMB_assign_byte_buffer(ImBuf *ibuf, const ImBufByteBuffer &buffer, ImBufOwnership ownership);
+void IMB_assign_float_buffer(ImBuf *ibuf,
+                             const ImBufFloatBuffer &buffer,
+                             ImBufOwnership ownership);
+void IMB_assign_dds_data(ImBuf *ibuf, const DDSData &data, ImBufOwnership ownership);
+
 /**
  * Make corresponding buffers available for modification.
  * Is achieved by ensuring that the given ImBuf is the only owner of the underlying buffer data.
diff --git a/source/blender/imbuf/IMB_imbuf_types.hh b/source/blender/imbuf/IMB_imbuf_types.hh
index e40f109c8a0..629bf94fb1e 100644
--- a/source/blender/imbuf/IMB_imbuf_types.hh
+++ b/source/blender/imbuf/IMB_imbuf_types.hh
@@ -34,17 +34,6 @@ struct IDProperty;
 #define IMB_MIPMAP_LEVELS 20
 #define IMB_FILEPATH_SIZE 1024
 
-struct DDSData {
-  /** DDS fourcc info */
-  unsigned int fourcc;
-  /** The number of mipmaps in the dds file */
-  unsigned int nummipmaps;
-  /** The compressed image data */
-  unsigned char *data;
-  /** The size of the compressed data */
-  unsigned int size;
-};
-
 /**
  * \ingroup imbuf
  * This is the abstraction of an image. ImBuf is the basic type used for all imbuf operations.
@@ -143,6 +132,19 @@ enum ImBufOwnership {
   IB_TAKE_OWNERSHIP = 1,
 };
 
+struct DDSData {
+  /** DDS fourcc info */
+  unsigned int fourcc;
+  /** The number of mipmaps in the dds file */
+  unsigned int nummipmaps;
+  /** The compressed image data */
+  unsigned char *data;
+  /** The size of the compressed data */
+  unsigned int size;
+  /** Who owns the data buffer. */
+  ImBufOwnership ownership;
+};
+
 /* Different storage specialization.
  *
  * NOTE: Avoid direct assignments and allocations, use the buffer utilities from the IMB_imbuf.hh
diff --git a/source/blender/imbuf/intern/allocimbuf.cc b/source/blender/imbuf/intern/allocimbuf.cc
index 5cfea171dea..40b2e3aac97 100644
--- a/source/blender/imbuf/intern/allocimbuf.cc
+++ b/source/blender/imbuf/intern/allocimbuf.cc
@@ -84,6 +84,27 @@ template<class BufferType> static void imb_free_buffer(BufferType &buffer)
   buffer.ownership = IB_DO_NOT_TAKE_OWNERSHIP;
 }
 
+/* Free the specified DDS buffer storage, freeing memory when needed and restoring the state of the
+ * buffer to its defaults. */
+static void imb_free_dds_buffer(DDSData &dds_data)
+{
+  if (dds_data.data) {
+    switch (dds_data.ownership) {
+      case IB_DO_NOT_TAKE_OWNERSHIP:
+        break;
+
+      case IB_TAKE_OWNERSHIP:
+        /* dds_data.data is allocated by DirectDrawSurface::readData(), so don't use MEM_freeN! */
+        free(dds_data.data);
+        break;
+    }
+  }
+
+  /* Reset buffer to defaults. */
+  dds_data.data = nullptr;
+  dds_data.ownership = IB_DO_NOT_TAKE_OWNERSHIP;
+}
+
 /* Allocate pixel storage of the given buffer. The buffer owns the allocated memory.
  * Returns true of allocation succeeded, false otherwise. */
 template<class BufferType>
@@ -249,11 +270,7 @@ void IMB_freeImBuf(ImBuf *ibuf)
     IMB_free_gpu_textures(ibuf);
     IMB_metadata_free(ibuf->metadata);
     colormanage_cache_free(ibuf);
-
-    if (ibuf->dds_data.data != nullptr) {
-      /* dds_data.data is allocated by DirectDrawSurface::readData(), so don't use MEM_freeN! */
-      free(ibuf->dds_data.data);
-    }
+    imb_free_dds_buffer(ibuf->dds_data);
     MEM_freeN(ibuf);
   }
 }
@@ -472,6 +489,32 @@ void IMB_assign_float_buffer(ImBuf *ibuf, float *buffer_data, const ImBufOwnersh
   }
 }
 
+void IMB_assign_byte_buffer(ImBuf *ibuf,
+                            const ImBufByteBuffer &buffer,
+                            const ImBufOwnership ownership)
+{
+  IMB_assign_byte_buffer(ibuf, buffer.data, ownership);
+  ibuf->byte_buffer.colorspace = buffer.colorspace;
+}
+
+void IMB_assign_float_buffer(ImBuf *ibuf,
+                             const ImBufFloatBuffer &buffer,
+                             const ImBufOwnership ownership)
+{
+  IMB_assign_float_buffer(ibuf, buffer.data, ownership);
+  ibuf->float_buffer.colorspace = buffer.colorspace;
+}
+
+void IMB_assign_dds_data(ImBuf *ibuf, const DDSData &data, const ImBufOwnership ownership)
+{
+  BLI_assert(ibuf->ftype == IMB_FTYPE_DDS);
+
+  imb_free_dds_buffer(ibuf->dds_data);
+
+  ibuf->dds_data = data;
+  ibuf->dds_data.ownership = ownership;
+}
+
 ImBuf *IMB_allocFromBufferOwn(
     uint8_t *byte_buffer, float *float_buffer, uint w, uint h, uint channels)
 {
diff --git a/source/blender/imbuf/intern/format_dds.cc b/source/blender/imbuf/intern/format_dds.cc
index b5b26a10ca0..0b6a88dbcb6 100644
--- a/source/blender/imbuf/intern/format_dds.cc
+++ b/source/blender/imbuf/intern/format_dds.cc
@@ -330,6 +330,7 @@ static void LoadDXTCImage(ImBuf *ibuf, Filesystem::IOMemReader &mem_reader)
     ibuf->dds_data.size = mem_reader.size() - dds_header_size;
     ibuf->dds_data.data = (uchar *)malloc(ibuf->dds_data.size);
     mem_reader.pread(ibuf->dds_data.data, ibuf->dds_data.size, dds_header_size);
+    ibuf->dds_data.ownership = IB_TAKE_OWNERSHIP;
 
     /* Flip compressed image data to match OpenGL convention. */
     FlipDXTCImage(ibuf);
diff --git a/source/blender/makesrna/intern/rna_modifier.cc b/source/blender/makesrna/intern/rna_modifier.cc
index 862ede4a6d4..59cd6da07eb 100644
--- a/source/blender/makesrna/intern/rna_modifier.cc
+++ b/source/blender/makesrna/intern/rna_modifier.cc
@@ -7703,6 +7703,8 @@ static void rna_def_modifier_nodes_bake(BlenderRNA *brna)
   StructRNA *srna;
   PropertyRNA *prop;
 
+  RNA_define_lib_overridable(true);
+
   srna = RNA_def_struct(brna, "NodesModifierBake", nullptr);
   RNA_def_struct_ui_text(srna, "Nodes Modifier Bake", "");
 
@@ -7758,6 +7760,8 @@ static void rna_def_modifier_nodes_bake(BlenderRNA *brna)
   RNA_def_property_struct_type(prop, "NodesModifierDataBlock");
   RNA_def_property_collection_sdna(prop, nullptr, "data_blocks", "data_blocks_num");
   RNA_def_property_srna(prop, "NodesModifierBakeDataBlocks");
+
+  RNA_define_lib_overridable(false);
 }
 
 static void rna_def_modifier_nodes_bakes(BlenderRNA *brna)
diff --git a/source/blender/makesrna/intern/rna_scene.cc b/source/blender/makesrna/intern/rna_scene.cc
index 1cece2ee975..a4bbd1de0ff 100644
--- a/source/blender/makesrna/intern/rna_scene.cc
+++ b/source/blender/makesrna/intern/rna_scene.cc
@@ -3574,7 +3574,7 @@ static void rna_def_tool_settings(BlenderRNA *brna)
       prop, "rna_ToolSettings_snap_mode_get", "rna_ToolSettings_snap_mode_set", nullptr);
   RNA_def_property_flag(prop, PROP_ENUM_FLAG);
   RNA_def_property_ui_text(
-      prop, "Snap Element", "Type of element for the \"Snap With\" to snap to");
+      prop, "Snap Element", "Type of element for the \"Snap Base\" to snap to");
   RNA_def_property_update(prop, NC_SCENE | ND_TOOLSETTINGS, nullptr); /* header redraw */
 
   prop = RNA_def_property(srna, "snap_elements_individual", PROP_ENUM, PROP_NONE);
diff --git a/source/blender/nodes/geometry/nodes/node_geo_realize_instances.cc b/source/blender/nodes/geometry/nodes/node_geo_realize_instances.cc
index 2fd19b7b9df..3f9f64e6a47 100644
--- a/source/blender/nodes/geometry/nodes/node_geo_realize_instances.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_realize_instances.cc
@@ -18,13 +18,13 @@ static void node_declare(NodeDeclarationBuilder &b)
   b.add_input<decl::Bool>("Selection")
       .default_value(true)
       .hide_value()
-      .supports_field()
+      .field_on_all()
       .description("Which top-level instances to realize");
   b.add_input<decl::Bool>("Realize All")
       .default_value(true)
-      .supports_field()
+      .field_on_all()
       .description("Determine wether to realize nested instances completly");
-  b.add_input<decl::Int>("Depth").default_value(0).min(0).supports_field().description(
+  b.add_input<decl::Int>("Depth").default_value(0).min(0).field_on_all().description(
       "Number of levels of nested instances to realize for each top-level instance");
   b.add_output<decl::Geometry>("Geometry").propagate_all();
 }
@@ -38,24 +38,29 @@ static void node_geo_exec(GeoNodeExecParams params)
   }
 
   GeometryComponentEditData::remember_deformed_positions_if_necessary(geometry_set);
-  Field<bool> selection_field = params.extract_input<Field<bool>>("Selection");
-  Field<bool> realize_all_filed = params.extract_input<Field<bool>>("Realize All");
+
+  Field<bool> realize_all_field = params.extract_input<Field<bool>>("Realize All");
   Field<int> depth_field = params.extract_input<Field<int>>("Depth");
 
   static auto depth_override = mf::build::SI2_SO<int, bool, int>(
       "depth_override",
-      [](int value, bool realize) { return realize ? -1 : std::max(value, 0); },
+      [](int depth, bool realize_all_field) {
+        return realize_all_field ? geometry::VariedDepthOption::MAX_DEPTH : std::max(depth, 0);
+      },
       mf::build::exec_presets::AllSpanOrSingle());
 
+  Field<int> depth_field_overrided(FieldOperation::Create(
+      depth_override, {std::move(depth_field), std::move(realize_all_field)}));
+
+  Field<bool> selection_field = params.extract_input<Field<bool>>("Selection");
+
   static auto selection_override = mf::build::SI2_SO<int, bool, bool>(
       "selection_override",
-      [](int value, bool selection) { return value == 0 ? false : selection; },
+      [](int depth_override, bool selection) { return depth_override == 0 ? false : selection; },
       mf::build::exec_presets::AllSpanOrSingle());
 
-  Field<int> depth_field_overrided(
-      FieldOperation::Create(depth_override, {depth_field, realize_all_filed}));
-  Field<bool> selection_field_overrided(
-      FieldOperation::Create(selection_override, {depth_field_overrided, selection_field}));
+  Field<bool> selection_field_overrided(FieldOperation::Create(
+      selection_override, {depth_field_overrided, std::move(selection_field)}));
 
   const bke::Instances &instances = *geometry_set.get_instances();
   const bke::InstancesFieldContext field_context(instances);
diff --git a/source/blender/nodes/geometry/nodes/node_geo_set_curve_normal.cc b/source/blender/nodes/geometry/nodes/node_geo_set_curve_normal.cc
index d97c8561b69..ded077ee86f 100644
--- a/source/blender/nodes/geometry/nodes/node_geo_set_curve_normal.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_set_curve_normal.cc
@@ -21,7 +21,10 @@ static void node_declare(NodeDeclarationBuilder &b)
   b.add_input<decl::Geometry>("Curve").supported_type(
       {GeometryComponent::Type::Curve, GeometryComponent::Type::GreasePencil});
   b.add_input<decl::Bool>("Selection").default_value(true).hide_value().field_on_all();
-  b.add_input<decl::Vector>("Normal").default_value({0.0f, 0.0f, 1.0f}).field_on_all();
+  b.add_input<decl::Vector>("Normal")
+      .default_value({0.0f, 0.0f, 1.0f})
+      .subtype(PROP_XYZ)
+      .field_on_all();
   b.add_output<decl::Geometry>("Curve").propagate_all();
 }
 
diff --git a/source/blender/python/gpu/gpu_py_vertex_buffer.cc b/source/blender/python/gpu/gpu_py_vertex_buffer.cc
index 310e4b5aa21..0602e5ecf98 100644
--- a/source/blender/python/gpu/gpu_py_vertex_buffer.cc
+++ b/source/blender/python/gpu/gpu_py_vertex_buffer.cc
@@ -241,7 +241,7 @@ static PyObject *pygpu_vertbuf__tp_new(PyTypeObject * /*type*/, PyObject *args,
       PY_ARG_PARSER_HEAD_COMPAT()
       "O!" /* `format` */
       "I"  /* `len` */
-      ":blender::gpu::VertBuf.__new__",
+      ":GPUVertBuf.__new__",
       _keywords,
       nullptr,
   };
@@ -307,7 +307,7 @@ static PyObject *pygpu_vertbuf_attr_fill(BPyGPUVertBuf *self, PyObject *args, Py
     return nullptr;
   }
 
-  if (!pygpu_vertbuf_fill(self->buf, id, data, "blender::gpu::VertBuf.attr_fill")) {
+  if (!pygpu_vertbuf_fill(self->buf, id, data, "GPUVertBuf.attr_fill")) {
     return nullptr;
   }
 
@@ -340,7 +340,7 @@ static void pygpu_vertbuf__tp_dealloc(BPyGPUVertBuf *self)
 PyDoc_STRVAR(
     /* Wrap. */
     pygpu_vertbuf__tp_doc,
-    ".. class:: blender::gpu::VertBuf(format, len)\n"
+    ".. class:: GPUVertBuf(format, len)\n"
     "\n"
     "   Contains a VBO.\n"
     "\n"
@@ -350,7 +350,7 @@ PyDoc_STRVAR(
     "   :type len: int\n");
 PyTypeObject BPyGPUVertBuf_Type = {
     /*ob_base*/ PyVarObject_HEAD_INIT(nullptr, 0)
-    /*tp_name*/ "blender::gpu::VertBuf",
+    /*tp_name*/ "GPUVertBuf",
     /*tp_basicsize*/ sizeof(BPyGPUVertBuf),
     /*tp_itemsize*/ 0,
     /*tp_dealloc*/ (destructor)pygpu_vertbuf__tp_dealloc,
diff --git a/source/blender/render/intern/engine.cc b/source/blender/render/intern/engine.cc
index 5c68fc0ee98..d079fc1b70a 100644
--- a/source/blender/render/intern/engine.cc
+++ b/source/blender/render/intern/engine.cc
@@ -1314,8 +1314,8 @@ bool RE_engine_gpu_context_enable(RenderEngine *engine)
     /* Activate RenderEngine System and Blender GPU Context. */
     WM_system_gpu_context_activate(engine->system_gpu_context);
     if (engine->blender_gpu_context) {
-      GPU_context_active_set(engine->blender_gpu_context);
       GPU_render_begin();
+      GPU_context_active_set(engine->blender_gpu_context);
     }
     return true;
   }
@@ -1330,8 +1330,8 @@ void RE_engine_gpu_context_disable(RenderEngine *engine)
   else {
     if (engine->system_gpu_context) {
       if (engine->blender_gpu_context) {
-        GPU_render_end();
         GPU_context_active_set(nullptr);
+        GPU_render_end();
       }
       WM_system_gpu_context_release(engine->system_gpu_context);
       /* Restore DRW state context if previously active. */
diff --git a/tests/data b/tests/data
index 5038ad7165f..bf5c7083054 160000
--- a/tests/data
+++ b/tests/data
@@ -1 +1 @@
-Subproject commit 5038ad7165fd1a77e61e0d2d6efdadd6ea7c0dfb
+Subproject commit bf5c70830540b215a3b1df21f28e0e80ead230f7