diff --git a/doc/python_api/sphinx_doc_gen.py b/doc/python_api/sphinx_doc_gen.py index e87815821c5..a4d66f5c5fe 100644 --- a/doc/python_api/sphinx_doc_gen.py +++ b/doc/python_api/sphinx_doc_gen.py @@ -1949,7 +1949,7 @@ if html_theme == "furo": "sidebar/scroll-start.html", "sidebar/navigation.html", "sidebar/scroll-end.html", - # "sidebar/variant-selector.html", + "sidebar/variant-selector.html", ] } """) diff --git a/extern/CMakeLists.txt b/extern/CMakeLists.txt index 30842f32441..c9b8a2deea2 100644 --- a/extern/CMakeLists.txt +++ b/extern/CMakeLists.txt @@ -104,10 +104,6 @@ if(WITH_MOD_FLUID) add_subdirectory(mantaflow) endif() -if(WITH_COMPOSITOR_CPU) - add_subdirectory(smaa_areatex) -endif() - if(WITH_VULKAN_BACKEND) add_subdirectory(vulkan_memory_allocator) endif() diff --git a/extern/smaa_areatex/CMakeLists.txt b/extern/smaa_areatex/CMakeLists.txt deleted file mode 100644 index ace1406c37f..00000000000 --- a/extern/smaa_areatex/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -# SPDX-FileCopyrightText: 2017 Blender Foundation -# -# SPDX-License-Identifier: GPL-2.0-or-later - -add_executable(smaa_areatex smaa_areatex.cpp) diff --git a/extern/smaa_areatex/README.blender b/extern/smaa_areatex/README.blender deleted file mode 100644 index 9c409142ae8..00000000000 --- a/extern/smaa_areatex/README.blender +++ /dev/null @@ -1,5 +0,0 @@ -Project: smaa-cpp -URL: https://github.com/iRi-E/smaa-cpp -License: MIT -Upstream version: 0.4.0 -Local modifications: diff --git a/extern/smaa_areatex/smaa_areatex.cpp b/extern/smaa_areatex/smaa_areatex.cpp deleted file mode 100644 index c61543e10a0..00000000000 --- a/extern/smaa_areatex/smaa_areatex.cpp +++ /dev/null @@ -1,1210 +0,0 @@ -/** - * Copyright (C) 2016-2017 IRIE Shinsuke - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * smaa_areatex.cpp version 0.4.0 - * - * This is a part of smaa-cpp that is an implementation of - * Enhanced Subpixel Morphological Antialiasing (SMAA) written in C++. - * - * This program is C++ rewrite of AreaTex.py included in the original - * SMAA ditribution: - * - * https://github.com/iryoku/smaa/tree/master/Scripts - */ - -#include -#include -#include - -#include - -/*------------------------------------------------------------------------------*/ -/* Type Definitions */ - -class Int2; -class Dbl2; - -class Int2 { -public: - int x, y; - - Int2() { this->x = this->y = 0; } - Int2(int x) { this->x = this->y = x; } - Int2(int x, int y) { this->x = x; this->y = y; } - - operator Dbl2(); - - Int2 operator + (Int2 other) { return Int2(x + other.x, y + other.y); } - Int2 operator * (Int2 other) { return Int2(x * other.x, y * other.y); } -}; - -class Dbl2 { -public: - double x, y; - - Dbl2() { this->x = this->y = 0.0; } - Dbl2(double x) { this->x = this->y = x; } - Dbl2(double x, double y) { this->x = x; this->y = y; } - - Dbl2 apply(double (* func)(double)) { return Dbl2(func(x), func(y)); } - - operator Int2(); - - Dbl2 operator + (Dbl2 other) { return Dbl2(x + other.x, y + other.y); } - Dbl2 operator - (Dbl2 other) { return Dbl2(x - other.x, y - other.y); } - Dbl2 operator * (Dbl2 other) { return Dbl2(x * other.x, y * other.y); } - Dbl2 operator / (Dbl2 other) { return Dbl2(x / other.x, y / other.y); } - Dbl2 operator += (Dbl2 other) { return Dbl2(x += other.x, y += other.y); } - bool operator == (Dbl2 other) { return (x == other.x && y == other.y); } -}; - -Int2::operator Dbl2() { return Dbl2((double)x, (double)y); } -Dbl2::operator Int2() { return Int2((int)x, (int)y); } - -/*------------------------------------------------------------------------------*/ -/* Data to Calculate Areatex */ - -/* Texture sizes: */ -/* (it's quite possible that this is not easily configurable) */ -static const int SUBSAMPLES_ORTHO = 7; -static const int SUBSAMPLES_DIAG = 5; -static const int MAX_DIST_ORTHO_COMPAT = 16; -static const int MAX_DIST_ORTHO = 20; -static const int MAX_DIST_DIAG = 20; -static const int TEX_SIZE_ORTHO = 80; /* 16 * 5 slots = 80 */ -static const int TEX_SIZE_DIAG = 80; /* 20 * 4 slots = 80 */ - -/* Number of samples for calculating areas in the diagonal textures: */ -/* (diagonal areas are calculated using brute force sampling) */ -static const int SAMPLES_DIAG = 30; - -/* Maximum distance for smoothing u-shapes: */ -static const int SMOOTH_MAX_DISTANCE = 32; - -/*------------------------------------------------------------------------------*/ -/* Offset Tables */ - -/* Offsets for subsample rendering */ -static const double subsample_offsets_ortho[SUBSAMPLES_ORTHO] = { - 0.0, /* 0 */ - -0.25, /* 1 */ - 0.25, /* 2 */ - -0.125, /* 3 */ - 0.125, /* 4 */ - -0.375, /* 5 */ - 0.375 /* 6 */ -}; - -static const Dbl2 subsample_offsets_diag[SUBSAMPLES_DIAG] = { - { 0.00, 0.00}, /* 0 */ - { 0.25, -0.25}, /* 1 */ - {-0.25, 0.25}, /* 2 */ - { 0.125, -0.125}, /* 3 */ - {-0.125, 0.125} /* 4 */ -}; - -/* Mapping offsets for placing each pattern subtexture into its place */ -enum edgesorthoIndices -{ - EDGESORTHO_NONE_NONE = 0, - EDGESORTHO_NONE_NEGA = 1, - EDGESORTHO_NONE_POSI = 2, - EDGESORTHO_NONE_BOTH = 3, - EDGESORTHO_NEGA_NONE = 4, - EDGESORTHO_NEGA_NEGA = 5, - EDGESORTHO_NEGA_POSI = 6, - EDGESORTHO_NEGA_BOTH = 7, - EDGESORTHO_POSI_NONE = 8, - EDGESORTHO_POSI_NEGA = 9, - EDGESORTHO_POSI_POSI = 10, - EDGESORTHO_POSI_BOTH = 11, - EDGESORTHO_BOTH_NONE = 12, - EDGESORTHO_BOTH_NEGA = 13, - EDGESORTHO_BOTH_POSI = 14, - EDGESORTHO_BOTH_BOTH = 15, -}; - -static const Int2 edgesortho_compat[16] = { - {0, 0}, {0, 1}, {0, 3}, {0, 4}, {1, 0}, {1, 1}, {1, 3}, {1, 4}, - {3, 0}, {3, 1}, {3, 3}, {3, 4}, {4, 0}, {4, 1}, {4, 3}, {4, 4} -}; - -static const Int2 edgesortho[16] = { - {0, 0}, {0, 1}, {0, 2}, {0, 3}, {1, 0}, {1, 1}, {1, 2}, {1, 3}, - {2, 0}, {2, 1}, {2, 2}, {2, 3}, {3, 0}, {3, 1}, {3, 2}, {3, 3} -}; - -enum edgesdiagIndices -{ - EDGESDIAG_NONE_NONE = 0, - EDGESDIAG_NONE_VERT = 1, - EDGESDIAG_NONE_HORZ = 2, - EDGESDIAG_NONE_BOTH = 3, - EDGESDIAG_VERT_NONE = 4, - EDGESDIAG_VERT_VERT = 5, - EDGESDIAG_VERT_HORZ = 6, - EDGESDIAG_VERT_BOTH = 7, - EDGESDIAG_HORZ_NONE = 8, - EDGESDIAG_HORZ_VERT = 9, - EDGESDIAG_HORZ_HORZ = 10, - EDGESDIAG_HORZ_BOTH = 11, - EDGESDIAG_BOTH_NONE = 12, - EDGESDIAG_BOTH_VERT = 13, - EDGESDIAG_BOTH_HORZ = 14, - EDGESDIAG_BOTH_BOTH = 15, -}; - -static const Int2 edgesdiag[16] = { - {0, 0}, {0, 1}, {0, 2}, {0, 3}, {1, 0}, {1, 1}, {1, 2}, {1, 3}, - {2, 0}, {2, 1}, {2, 2}, {2, 3}, {3, 0}, {3, 1}, {3, 2}, {3, 3} -}; - -/*------------------------------------------------------------------------------*/ -/* Miscellaneous Utility Functions */ - -/* Linear interpolation: */ -static Dbl2 lerp(Dbl2 a, Dbl2 b, double p) -{ - return a + (b - a) * Dbl2(p); -} - -/* Saturates a value to [0..1] range: */ -static double saturate(double x) -{ - return 0.0 < x ? (x < 1.0 ? x : 1.0) : 0.0; -} - -/*------------------------------------------------------------------------------*/ -/* Horizontal/Vertical Areas */ - -class AreaOrtho { - double m_data[SUBSAMPLES_ORTHO][TEX_SIZE_ORTHO][TEX_SIZE_ORTHO][2]; - bool m_compat; - bool m_orig_u; -public: - AreaOrtho(bool compat, bool orig_u) : m_compat(compat), m_orig_u(orig_u) {} - - double *getData() { return (double *)&m_data; } - Dbl2 getPixel(int offset_index, Int2 coords) { - return Dbl2(m_data[offset_index][coords.y][coords.x][0], - m_data[offset_index][coords.y][coords.x][1]); - } - - void areaTex(int offset_index); -private: - void putPixel(int offset_index, Int2 coords, Dbl2 pixel) { - m_data[offset_index][coords.y][coords.x][0] = pixel.x; - m_data[offset_index][coords.y][coords.x][1] = pixel.y; - } - - Dbl2 smoothArea(double d, Dbl2 a1, Dbl2 a2); - Dbl2 makeQuad(int x, double d, double o); - Dbl2 area(Dbl2 p1, Dbl2 p2, int x); - Dbl2 calculate(int pattern, int left, int right, double offset); -}; - -/* Smoothing function for small u-patterns: */ -Dbl2 AreaOrtho::smoothArea(double d, Dbl2 a1, Dbl2 a2) -{ - Dbl2 b1 = (a1 * Dbl2(2.0)).apply(sqrt) * Dbl2(0.5); - Dbl2 b2 = (a2 * Dbl2(2.0)).apply(sqrt) * Dbl2(0.5); - double p = saturate(d / (double)SMOOTH_MAX_DISTANCE); - return lerp(b1, a1, p) + lerp(b2, a2, p); -} - -/* Smoothing u-patterns by quadratic function: */ -Dbl2 AreaOrtho::makeQuad(int x, double d, double o) -{ - double r = (double)x; - - /* fmin() below is a trick to smooth tiny u-patterns: */ - return Dbl2(r, (1.0 - fmin(4.0, d) * r * (d - r) / (d * d)) * o); -} - -/* Calculates the area under the line p1->p2, for the pixel x..x+1: */ -Dbl2 AreaOrtho::area(Dbl2 p1, Dbl2 p2, int x) -{ - Dbl2 d = p2 - p1; - double x1 = (double)x; - double x2 = x1 + 1.0; - - if ((x1 >= p1.x && x1 < p2.x) || (x2 > p1.x && x2 <= p2.x)) { /* inside? */ - double y1 = p1.y + (x1 - p1.x) * d.y / d.x; - double y2 = p1.y + (x2 - p1.x) * d.y / d.x; - - if ((copysign(1.0, y1) == copysign(1.0, y2) || - fabs(y1) < 1e-4 || fabs(y2) < 1e-4)) { /* trapezoid? */ - double a = (y1 + y2) / 2.0; - if (a < 0.0) - return Dbl2(fabs(a), 0.0); - else - return Dbl2(0.0, fabs(a)); - } - else { /* Then, we got two triangles: */ - double x = p1.x - p1.y * d.x / d.y, xi; - double a1 = x > p1.x ? y1 * modf(x, &xi) / 2.0 : 0.0; - double a2 = x < p2.x ? y2 * (1.0 - modf(x, &xi)) / 2.0 : 0.0; - double a = fabs(a1) > fabs(a2) ? a1 : -a2; - if (a < 0.0) - return Dbl2(fabs(a1), fabs(a2)); - else - return Dbl2(fabs(a2), fabs(a1)); - } - } - else - return Dbl2(0.0, 0.0); -} - -/* Calculates the area for a given pattern and distances to the left and to the */ -/* right, biased by an offset: */ -Dbl2 AreaOrtho::calculate(int pattern, int left, int right, double offset) -{ - Dbl2 a1, a2; - - /* - * o1 | - * .-------´ - * o2 | - * - * <---d---> - */ - double d = (double)(left + right + 1); - - double o1 = 0.5 + offset; - double o2 = 0.5 + offset - 1.0; - - switch (pattern) { - case EDGESORTHO_NONE_NONE: - { - /* - * - * ------ - * - */ - return Dbl2(0.0, 0.0); - break; - } - case EDGESORTHO_POSI_NONE: - { - /* - * - * .------ - * | - * - * We only offset L patterns in the crossing edge side, to make it - * converge with the unfiltered pattern 0 (we don't want to filter the - * pattern 0 to avoid artifacts). - */ - if (left <= right) - return area(Dbl2(0.0, o2), Dbl2(d / 2.0, 0.0), left); - else - return Dbl2(0.0, 0.0); - break; - } - case EDGESORTHO_NONE_POSI: - { - /* - * - * ------. - * | - */ - if (left >= right) - return area(Dbl2(d / 2.0, 0.0), Dbl2(d, o2), left); - else - return Dbl2(0.0, 0.0); - break; - } - case EDGESORTHO_POSI_POSI: - { - /* - * - * .------. - * | | - */ - if (m_orig_u) { - a1 = area(Dbl2(0.0, o2), Dbl2(d / 2.0, 0.0), left); - a2 = area(Dbl2(d / 2.0, 0.0), Dbl2(d, o2), left); - return smoothArea(d, a1, a2); - } - else - return area(makeQuad(left, d, o2), makeQuad(left + 1, d, o2), left); - break; - } - case EDGESORTHO_NEGA_NONE: - { - /* - * | - * `------ - * - */ - if (left <= right) - return area(Dbl2(0.0, o1), Dbl2(d / 2.0, 0.0), left); - else - return Dbl2(0.0, 0.0); - break; - } - case EDGESORTHO_BOTH_NONE: - { - /* - * | - * +------ - * | - */ - return Dbl2(0.0, 0.0); - break; - } - case EDGESORTHO_NEGA_POSI: - { - /* - * | - * `------. - * | - * - * A problem of not offseting L patterns (see above), is that for certain - * max search distances, the pixels in the center of a Z pattern will - * detect the full Z pattern, while the pixels in the sides will detect a - * L pattern. To avoid discontinuities, we blend the full offsetted Z - * revectorization with partially offsetted L patterns. - */ - if (fabs(offset) > 0.0) { - a1 = area(Dbl2(0.0, o1), Dbl2(d, o2), left); - a2 = area(Dbl2(0.0, o1), Dbl2(d / 2.0, 0.0), left); - a2 += area(Dbl2(d / 2.0, 0.0), Dbl2(d, o2), left); - return (a1 + a2) / Dbl2(2.0); - } - else - return area(Dbl2(0.0, o1), Dbl2(d, o2), left); - break; - } - case EDGESORTHO_BOTH_POSI: - { - /* - * | - * +------. - * | | - */ - return area(Dbl2(0.0, o1), Dbl2(d, o2), left); - break; - } - case EDGESORTHO_NONE_NEGA: - { - /* - * | - * ------´ - * - */ - if (left >= right) - return area(Dbl2(d / 2.0, 0.0), Dbl2(d, o1), left); - else - return Dbl2(0.0, 0.0); - break; - } - case EDGESORTHO_POSI_NEGA: - { - /* - * | - * .------´ - * | - */ - if (fabs(offset) > 0.0) { - a1 = area(Dbl2(0.0, o2), Dbl2(d, o1), left); - a2 = area(Dbl2(0.0, o2), Dbl2(d / 2.0, 0.0), left); - a2 += area(Dbl2(d / 2.0, 0.0), Dbl2(d, o1), left); - return (a1 + a2) / Dbl2(2.0); - } - else - return area(Dbl2(0.0, o2), Dbl2(d, o1), left); - break; - } - case EDGESORTHO_NONE_BOTH: - { - /* - * | - * ------+ - * | - */ - return Dbl2(0.0, 0.0); - break; - } - case EDGESORTHO_POSI_BOTH: - { - /* - * | - * .------+ - * | | - */ - return area(Dbl2(0.0, o2), Dbl2(d, o1), left); - break; - } - case EDGESORTHO_NEGA_NEGA: - { - /* - * | | - * `------´ - * - */ - if (m_orig_u) { - a1 = area(Dbl2(0.0, o1), Dbl2(d / 2.0, 0.0), left); - a2 = area(Dbl2(d / 2.0, 0.0), Dbl2(d, o1), left); - return smoothArea(d, a1, a2); - } - else - return area(makeQuad(left, d, o1), makeQuad(left + 1, d, o1), left); - break; - } - case EDGESORTHO_BOTH_NEGA: - { - /* - * | | - * +------´ - * | - */ - return area(Dbl2(0.0, o2), Dbl2(d, o1), left); - break; - } - case EDGESORTHO_NEGA_BOTH: - { - /* - * | | - * `------+ - * | - */ - return area(Dbl2(0.0, o1), Dbl2(d, o2), left); - break; - } - case EDGESORTHO_BOTH_BOTH: - { - /* - * | | - * +------+ - * | | - */ - return Dbl2(0.0, 0.0); - break; - } - } - - return Dbl2(0.0, 0.0); -} - -/*------------------------------------------------------------------------------*/ -/* Diagonal Areas */ - -class AreaDiag { - double m_data[SUBSAMPLES_DIAG][TEX_SIZE_DIAG][TEX_SIZE_DIAG][2]; - bool m_numeric; - bool m_orig_u; -public: - AreaDiag(bool numeric, bool orig_u) : m_numeric(numeric), m_orig_u(orig_u) {} - - double *getData() { return (double *)&m_data; } - Dbl2 getPixel(int offset_index, Int2 coords) { - return Dbl2(m_data[offset_index][coords.y][coords.x][0], - m_data[offset_index][coords.y][coords.x][1]); - } - - void areaTex(int offset_index); -private: - void putPixel(int offset_index, Int2 coords, Dbl2 pixel) { - m_data[offset_index][coords.y][coords.x][0] = pixel.x; - m_data[offset_index][coords.y][coords.x][1] = pixel.y; - } - - double area1(Dbl2 p1, Dbl2 p2, Int2 p); - Dbl2 area(Dbl2 p1, Dbl2 p2, int left); - Dbl2 areaTriangle(Dbl2 p1L, Dbl2 p2L, Dbl2 p1R, Dbl2 p2R, int left); - Dbl2 calculate(int pattern, int left, int right, Dbl2 offset); -}; - -/* Calculates the area under the line p1->p2 for the pixel 'p' using brute */ -/* force sampling: */ -/* (quick and dirty solution, but it works) */ -double AreaDiag::area1(Dbl2 p1, Dbl2 p2, Int2 p) -{ - if (p1 == p2) - return 1.0; - - double xm = (p1.x + p2.x) / 2.0, ym = (p1.y + p2.y) / 2.0; - double a = p2.y - p1.y; - double b = p1.x - p2.x; - int count = 0; - - for (int ix = 0; ix < SAMPLES_DIAG; ix++) { - double x = (double)p.x + (double)ix / (double)(SAMPLES_DIAG - 1); - for (int iy = 0; iy < SAMPLES_DIAG; iy++) { - double y = (double)p.y + (double)iy / (double)(SAMPLES_DIAG - 1); - if (a * (x - xm) + b * (y - ym) > 0.0) /* inside? */ - count++; - } - } - return (double)count / (double)(SAMPLES_DIAG * SAMPLES_DIAG); -} - -/* Calculates the area under the line p1->p2: */ -/* (includes the pixel and its opposite) */ -Dbl2 AreaDiag::area(Dbl2 p1, Dbl2 p2, int left) -{ - if (m_numeric) { - double a1 = area1(p1, p2, Int2(1, 0) + Int2(left)); - double a2 = area1(p1, p2, Int2(1, 1) + Int2(left)); - return Dbl2(1.0 - a1, a2); - } - - /* Calculates the area under the line p1->p2 for the pixel 'p' analytically */ - Dbl2 d = p2 - p1; - if (d.x == 0.0) - return Dbl2(0.0, 1.0); - if (d.y == 0.0) - return Dbl2(1.0, 0.0); - - double x1 = (double)(1 + left); - double x2 = x1 + 1.0; - double ymid = x1; - double xtop = p1.x + (ymid + 1.0 - p1.y) * d.x / d.y; - double xmid = p1.x + (ymid - p1.y) * d.x / d.y; - double xbot = p1.x + (ymid - 1.0 - p1.y) * d.x / d.y; - - double y1 = p1.y + (x1 - p1.x) * d.y / d.x; - double y2 = p1.y + (x2 - p1.x) * d.y / d.x; - double fy1 = y1 - floor(y1); - double fy2 = y2 - floor(y2); - int iy1 = (int)floor(y1 - ymid); - int iy2 = (int)floor(y2 - ymid); - - if (iy1 <= -2) { - if (iy2 == -1) - return Dbl2(1.0 - (x2 - xbot) * fy2 * 0.5, 0.0); - else if (iy2 == 0) - return Dbl2((xmid + xbot) * 0.5 - x1, (x2 - xmid) * fy2 * 0.5); - else if (iy2 >= 1) - return Dbl2((xmid + xbot) * 0.5 - x1, x2 - (xtop + xmid) * 0.5); - else /* iy2 < -1 */ - return Dbl2(1.0, 0.0); - } - else if (iy1 == -1) { - if (iy2 == -1) - return Dbl2(1.0 - (fy1 + fy2) * 0.5, 0.0); - else if (iy2 == 0) - return Dbl2((xmid - x1) * (1.0 - fy1) * 0.5, (x2 - xmid) * fy2 * 0.5); - else if (iy2 >= 1) - return Dbl2((xmid - x1) * (1.0 - fy1) * 0.5, x2 - (xtop + xmid) * 0.5); - else /* iy2 < -1 */ - return Dbl2(1.0 - (xbot - x1) * fy1 * 0.5, 0.0); - } - else if (iy1 == 0) { - if (iy2 == -1) - return Dbl2((x2 - xmid) * (1.0 - fy2) * 0.5, (xmid - x1) * fy1 * 0.5); - else if (iy2 == 0) - return Dbl2(0.0, (fy1 + fy2) * 0.5); - else if (iy2 >= 1) - return Dbl2(0.0, 1.0 - (xtop - x1) * (1.0 - fy1) * 0.5); - else /* iy2 < -1 */ - return Dbl2(x2 - (xmid + xbot) * 0.5, (xmid - x1) * fy1 * 0.5); - } - else { /* iy1 > 0 */ - if (iy2 == -1) - return Dbl2((x2 - xtop) * (1.0 - fy2) * 0.5, (xtop + xmid) * 0.5 - x1); - else if (iy2 == 0) - return Dbl2(0.0, 1.0 - (x1 - xtop) * (1.0 - fy2) * 0.5); - else if (iy2 >= 1) - return Dbl2(0.0, 1.0); - else /* iy2 < -1 */ - return Dbl2(x2 - (xmid + xbot) * 0.5, (xtop + xmid) * 0.5 - x1); - } -} - -/* Calculate u-patterns using a triangle: */ -Dbl2 AreaDiag::areaTriangle(Dbl2 p1L, Dbl2 p2L, Dbl2 p1R, Dbl2 p2R, int left) -{ - double x1 = (double)(1 + left); - double x2 = x1 + 1.0; - - Dbl2 dL = p2L - p1L; - Dbl2 dR = p2R - p1R; - double xm = ((p1L.x * dL.y / dL.x - p1L.y) - (p1R.x * dR.y / dR.x - p1R.y)) / (dL.y / dL.x - dR.y / dR.x); - - double y1 = (x1 < xm) ? p1L.y + (x1 - p1L.x) * dL.y / dL.x : p1R.y + (x1 - p1R.x) * dR.y / dR.x; - double y2 = (x2 < xm) ? p1L.y + (x2 - p1L.x) * dL.y / dL.x : p1R.y + (x2 - p1R.x) * dR.y / dR.x; - - return area(Dbl2(x1, y1), Dbl2(x2, y2), left); -} - -/* Calculates the area for a given pattern and distances to the left and to the */ -/* right, biased by an offset: */ -Dbl2 AreaDiag::calculate(int pattern, int left, int right, Dbl2 offset) -{ - Dbl2 a1, a2; - - double d = (double)(left + right + 1); - - /* - * There is some Black Magic around diagonal area calculations. Unlike - * orthogonal patterns, the 'null' pattern (one without crossing edges) must be - * filtered, and the ends of both the 'null' and L patterns are not known: L - * and U patterns have different endings, and we don't know what is the - * adjacent pattern. So, what we do is calculate a blend of both possibilites. - */ - switch (pattern) { - case EDGESDIAG_NONE_NONE: - { - /* - * - * .-´ - * .-´ - * .-´ - * .-´ - * ´ - * - */ - a1 = area(Dbl2(1.0, 1.0), Dbl2(1.0, 1.0) + Dbl2(d), left); /* 1st possibility */ - a2 = area(Dbl2(1.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d), left); /* 2nd possibility */ - return (a1 + a2) / Dbl2(2.0); /* Blend them */ - break; - } - case EDGESDIAG_VERT_NONE: - { - /* - * - * .-´ - * .-´ - * .-´ - * .-´ - * | - * | - */ - a1 = area(Dbl2(1.0, 0.0) + offset, Dbl2(0.0, 0.0) + Dbl2(d), left); - a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d), left); - return (a1 + a2) / Dbl2(2.0); - break; - } - case EDGESDIAG_NONE_HORZ: - { - /* - * - * .---- - * .-´ - * .-´ - * .-´ - * ´ - * - */ - a1 = area(Dbl2(0.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d) + offset, left); - a2 = area(Dbl2(1.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d) + offset, left); - return (a1 + a2) / Dbl2(2.0); - break; - } - case EDGESDIAG_VERT_HORZ: - { - /* - * - * .---- - * .-´ - * .-´ - * .-´ - * | - * | - */ - if (m_orig_u) - return area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left); - else - return areaTriangle(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d), - Dbl2(0.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d) + offset, left); - break; - } - case EDGESDIAG_HORZ_NONE: - { - /* - * - * .-´ - * .-´ - * .-´ - * ----´ - * - * - */ - a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(0.0, 0.0) + Dbl2(d), left); - a2 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d), left); - return (a1 + a2) / Dbl2(2.0); - break; - } - case EDGESDIAG_BOTH_NONE: - { - /* - * - * .-´ - * .-´ - * .-´ - * --.-´ - * | - * | - */ - a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(0.0, 0.0) + Dbl2(d), left); - a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d), left); - return (a1 + a2) / Dbl2(2.0); - break; - } - case EDGESDIAG_HORZ_HORZ: - { - /* - * - * .---- - * .-´ - * .-´ - * ----´ - * - * - */ - return area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left); - break; - } - case EDGESDIAG_BOTH_HORZ: - { - /* - * - * .---- - * .-´ - * .-´ - * --.-´ - * | - * | - */ - a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left); - a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left); - return (a1 + a2) / Dbl2(2.0); - break; - } - case EDGESDIAG_NONE_VERT: - { - /* - * | - * | - * .-´ - * .-´ - * .-´ - * ´ - * - */ - a1 = area(Dbl2(0.0, 0.0), Dbl2(1.0, 1.0) + Dbl2(d) + offset, left); - a2 = area(Dbl2(1.0, 0.0), Dbl2(1.0, 1.0) + Dbl2(d) + offset, left); - return (a1 + a2) / Dbl2(2.0); - break; - } - case EDGESDIAG_VERT_VERT: - { - /* - * | - * | - * .-´ - * .-´ - * .-´ - * | - * | - */ - return area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left); - break; - } - case EDGESDIAG_NONE_BOTH: - { - /* - * | - * .---- - * .-´ - * .-´ - * .-´ - * ´ - * - */ - a1 = area(Dbl2(0.0, 0.0), Dbl2(1.0, 1.0) + Dbl2(d) + offset, left); - a2 = area(Dbl2(1.0, 0.0), Dbl2(1.0, 0.0) + Dbl2(d) + offset, left); - return (a1 + a2) / Dbl2(2.0); - break; - } - case EDGESDIAG_VERT_BOTH: - { - /* - * | - * .---- - * .-´ - * .-´ - * .-´ - * | - * | - */ - a1 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left); - a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left); - return (a1 + a2) / Dbl2(2.0); - break; - } - case EDGESDIAG_HORZ_VERT: - { - /* - * | - * | - * .-´ - * .-´ - * ----´ - * - * - */ - if (m_orig_u) - return area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left); - else - return areaTriangle(Dbl2(1.0, 1.0) + offset, Dbl2(2.0, 1.0) + Dbl2(d), - Dbl2(1.0, 0.0), Dbl2(1.0, 1.0) + Dbl2(d) + offset, left); - break; - } - case EDGESDIAG_BOTH_VERT: - { - /* - * | - * | - * .-´ - * .-´ - * --.-´ - * | - * | - */ - a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left); - a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left); - return (a1 + a2) / Dbl2(2.0); - break; - } - case EDGESDIAG_HORZ_BOTH: - { - /* - * | - * .---- - * .-´ - * .-´ - * ----´ - * - * - */ - a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left); - a2 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left); - return (a1 + a2) / Dbl2(2.0); - break; - } - case EDGESDIAG_BOTH_BOTH: - { - /* - * | - * .---- - * .-´ - * .-´ - * --.-´ - * | - * | - */ - a1 = area(Dbl2(1.0, 1.0) + offset, Dbl2(1.0, 1.0) + Dbl2(d) + offset, left); - a2 = area(Dbl2(1.0, 0.0) + offset, Dbl2(1.0, 0.0) + Dbl2(d) + offset, left); - return (a1 + a2) / Dbl2(2.0); - break; - } - } - - return Dbl2(0.0, 0.0); -} - -/*------------------------------------------------------------------------------*/ -/* Main Loops */ - -void AreaOrtho::areaTex(int offset_index) -{ - double offset = subsample_offsets_ortho[offset_index]; - int max_dist = m_compat ? MAX_DIST_ORTHO_COMPAT : MAX_DIST_ORTHO; - - for (int pattern = 0; pattern < 16; pattern++) { - Int2 e = Int2(max_dist) * (m_compat ? edgesortho_compat : edgesortho)[pattern]; - for (int left = 0; left < max_dist; left++) { - for (int right = 0; right < max_dist; right++) { - Dbl2 p = calculate(pattern, left * left, right * right, offset); - Int2 coords = e + Int2(left, right); - - putPixel(offset_index, coords, p); - } - } - } - return; -} - -void AreaDiag::areaTex(int offset_index) -{ - Dbl2 offset = subsample_offsets_diag[offset_index]; - - for (int pattern = 0; pattern < 16; pattern++) { - Int2 e = Int2(MAX_DIST_DIAG) * edgesdiag[pattern]; - for (int left = 0; left < MAX_DIST_DIAG; left++) { - for (int right = 0; right < MAX_DIST_DIAG; right++) { - Dbl2 p = calculate(pattern, left, right, offset); - Int2 coords = e + Int2(left, right); - - putPixel(offset_index, coords, p); - } - } - } - return; -} - -/*------------------------------------------------------------------------------*/ -/* Write File to Specified Location on Disk */ - -/* C/C++ source code (arrays of floats) */ -static void write_double_array(FILE *fp, const double *ptr, int length, const char *array_name, bool quantize) -{ - fprintf(fp, "static const float %s[%d] = {", array_name, length); - - for (int n = 0; n < length; n++) { - if (n > 0) - fprintf(fp, ","); - fprintf(fp, (n % 8 != 0) ? " " : "\n\t"); - - if (quantize) - fprintf(fp, "%3d / 255.0", (int)(*(ptr++) * 255.0)); - else - fprintf(fp, "%1.8lf", *(ptr++)); - } - - fprintf(fp, "\n};\n"); -} - -static void write_csource(AreaOrtho *ortho, AreaDiag *diag, FILE *fp, bool subsampling, bool quantize) -{ - fprintf(fp, "/* This file was generated by smaa_areatex.cpp */\n"); - - fprintf(fp, "\n/* Horizontal/Vertical Areas */\n"); - write_double_array(fp, ortho->getData(), - TEX_SIZE_ORTHO * TEX_SIZE_ORTHO * 2 * (subsampling ? SUBSAMPLES_ORTHO : 1), - "areatex", quantize); - - fprintf(fp, "\n/* Diagonal Areas */\n"); - write_double_array(fp, diag->getData(), - TEX_SIZE_DIAG * TEX_SIZE_DIAG * 2 * (subsampling ? SUBSAMPLES_DIAG : 1), - "areatex_diag", quantize); -} - -/* .tga File (RGBA 32bit uncompressed) */ -static void write_tga(AreaOrtho *ortho, AreaDiag *diag, FILE *fp, bool subsampling) -{ - int subsamples = subsampling ? SUBSAMPLES_ORTHO : 1; - unsigned char header[18] = {0, 0, - 2, /* uncompressed RGB */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 32, /* 32bit */ - 8}; /* 8bit alpha, left to right, bottom to top */ - - /* Set width and height */ - header[12] = (TEX_SIZE_ORTHO + TEX_SIZE_DIAG) & 0xff; - header[13] = ((TEX_SIZE_ORTHO + TEX_SIZE_DIAG) >> 8) & 0xff; - header[14] = (subsamples * TEX_SIZE_ORTHO) & 0xff; - header[15] = ((subsamples * TEX_SIZE_ORTHO) >> 8) & 0xff; - - /* Write .tga header */ - fwrite(header, sizeof(unsigned char), sizeof(header) / sizeof(unsigned char), fp); - - /* Write pixel data */ - for (int i = subsamples - 1; i >= 0; i--) { - for (int y = TEX_SIZE_ORTHO - 1; y >= 0; y--) { - for (int x = 0; x < TEX_SIZE_ORTHO; x++) { - Dbl2 p = ortho->getPixel(i, Int2(x, y)); - fputc(0, fp); /* B */ - fputc((unsigned char)(p.y * 255.0), fp); /* G */ - fputc((unsigned char)(p.x * 255.0), fp); /* R */ - fputc(0, fp); /* A */ - } - - for (int x = 0; x < TEX_SIZE_DIAG; x++) { - if (i < SUBSAMPLES_DIAG) { - Dbl2 p = diag->getPixel(i, Int2(x, y)); - fputc(0, fp); /* B */ - fputc((unsigned char)(p.y * 255.0), fp); /* G */ - fputc((unsigned char)(p.x * 255.0), fp); /* R */ - fputc(0, fp); /* A */ - } - else { - fputc(0, fp); - fputc(0, fp); - fputc(0, fp); - fputc(0, fp); - } - } - } - } -} - -/* .raw File (R8G8 raw data) */ -static void write_raw(AreaOrtho *ortho, AreaDiag *diag, FILE *fp, bool subsampling) -{ - int subsamples = subsampling ? SUBSAMPLES_ORTHO : 1; - - /* Write pixel data */ - for (int i = 0; i < subsamples; i++) { - for (int y = 0; y < TEX_SIZE_ORTHO; y++) { - for (int x = 0; x < TEX_SIZE_ORTHO; x++) { - Dbl2 p = ortho->getPixel(i, Int2(x, y)); - fputc((unsigned char)(p.x * 255.0), fp); /* R */ - fputc((unsigned char)(p.y * 255.0), fp); /* G */ - } - - for (int x = 0; x < TEX_SIZE_DIAG; x++) { - if (i < SUBSAMPLES_DIAG) { - Dbl2 p = diag->getPixel(i, Int2(x, y)); - fputc((unsigned char)(p.x * 255.0), fp); /* R */ - fputc((unsigned char)(p.y * 255.0), fp); /* G */ - } - else { - fputc(0, fp); - fputc(0, fp); - } - } - } - } -} - -static int generate_file(AreaOrtho *ortho, AreaDiag *diag, const char *path, bool subsampling, bool quantize, bool tga, bool raw) -{ - FILE *fp = fopen(path, tga ? "wb" : "w"); - - if (!fp) { - fprintf(stderr, "Unable to open file: %s\n", path); - return 1; - } - - // fprintf(stderr, "Generating %s\n", path); - - if (tga) - write_tga(ortho, diag, fp, subsampling); - else if (raw) - write_raw(ortho, diag, fp, subsampling); - else - write_csource(ortho, diag, fp, subsampling, quantize); - - fclose(fp); - - return 0; -} - -int main(int argc, char **argv) -{ - bool subsampling = false; - bool quantize = false; - bool tga = false; - bool raw = false; - bool compat = false; - bool numeric = false; - bool orig_u = false; - bool help = false; - char *outfile = NULL; - int status = 0; - - for (int i = 1; i < argc; i++) { - char *ptr = argv[i]; - if (*ptr++ == '-' && *ptr != '\0') { - char c; - while ((c = *ptr++) != '\0') { - if (c == 's') - subsampling = true; - else if (c == 'q') - quantize = true; - else if (c == 't') - tga = true; - else if (c == 'r') - raw = true; - else if (c == 'c') - compat = true; - else if (c == 'n') - numeric = true; - else if (c == 'u') - orig_u = true; - else if (c == 'h') - help = true; - else { - fprintf(stderr, "Unknown option: -%c\n", c); - status = 1; - break; - } - } - } - else if (outfile) { - fprintf(stderr, "Too much file names: %s, %s\n", outfile, argv[i]); - status = 1; - } - else - outfile = argv[i]; - - if (status != 0) - break; - } - - if (status == 0 && !help && !outfile) { - fprintf(stderr, "File name was not specified.\n"); - status = 1; - } - - if (status != 0 || help) { - fprintf(stderr, "Usage: %s [OPTION]... OUTFILE\n", argv[0]); - fprintf(stderr, "Options:\n"); - fprintf(stderr, " -s Calculate data for subpixel rendering\n"); - fprintf(stderr, " -q Quantize data to 256 levels\n"); - fprintf(stderr, " -t Write TGA image instead of C/C++ source\n"); - fprintf(stderr, " -r Write R8G8 raw image instead of C/C++ source\n"); - fprintf(stderr, " -c Generate compatible orthogonal data that subtexture size is 16\n"); - fprintf(stderr, " -n Numerically calculate diagonal data using brute force sampling\n"); - fprintf(stderr, " -u Process orthogonal / diagonal U patterns in older ways\n"); - fprintf(stderr, " -h Print this help and exit\n"); - fprintf(stderr, "File name OUTFILE usually should have an extension such as .c, .h, or .tga,\n"); - fprintf(stderr, "except for a special name '-' that means standard output.\n\n"); - fprintf(stderr, "Example:\n"); - fprintf(stderr, " Generate TGA file exactly same as AreaTexDX10.tga bundled with the\n"); - fprintf(stderr, " original implementation:\n\n"); - fprintf(stderr, " $ smaa_areatex -stcnu AreaTexDX10.tga\n\n"); - return status; - } - - AreaOrtho *ortho = new AreaOrtho(compat, orig_u); - AreaDiag *diag = new AreaDiag(numeric, orig_u); - - /* Calculate areatex data */ - for (int i = 0; i < (subsampling ? SUBSAMPLES_ORTHO : 1); i++) - ortho->areaTex(i); - - for (int i = 0; i < (subsampling ? SUBSAMPLES_DIAG : 1); i++) - diag->areaTex(i); - - /* Generate .tga, .raw, or C/C++ source file, or write the data to stdout */ - if (strcmp(outfile, "-") != 0) - status = generate_file(ortho, diag, outfile, subsampling, quantize, tga, raw); - else if (tga) - write_tga(ortho, diag, stdout, subsampling); - else if (raw) - write_raw(ortho, diag, stdout, subsampling); - else - write_csource(ortho, diag, stdout, subsampling, quantize); - - delete ortho; - delete diag; - - return status; -} - -/* smaa_areatex.cpp ends here */ diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index b9da74e2ff3..125c7129de0 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -106,7 +106,7 @@ struct ShaderCache { friend ShaderCache *get_shader_cache(id mtlDevice); - void compile_thread_func(int thread_index); + void compile_thread_func(); using PipelineCollection = std::vector>; @@ -174,7 +174,7 @@ void ShaderCache::wait_for_all() } } -void ShaderCache::compile_thread_func(int /*thread_index*/) +void ShaderCache::compile_thread_func() { while (running) { @@ -309,7 +309,7 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel, metal_printf("Spawning %d Cycles kernel compilation threads\n", max_mtlcompiler_threads); for (int i = 0; i < max_mtlcompiler_threads; i++) { - compile_threads.push_back(std::thread([&] { compile_thread_func(i); })); + compile_threads.push_back(std::thread([this] { this->compile_thread_func(); })); } } } diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index d94a29b7f49..ad94e46ebbb 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -64,6 +64,11 @@ typedef struct VolumeShaderCoefficients { Spectrum emission; } VolumeShaderCoefficients; +typedef struct EquiangularCoefficients { + float3 P; + float2 t_range; +} EquiangularCoefficients; + /* Evaluate shader to get extinction coefficient at P. */ ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg, IntegratorShadowState state, @@ -264,18 +269,18 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, # define VOLUME_SAMPLE_PDF_CUTOFF 1e-8f ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict ray, - const float3 light_P, + ccl_private const EquiangularCoefficients &coeffs, const float xi, ccl_private float *pdf) { - const float tmin = ray->tmin; - const float tmax = ray->tmax; - const float delta = dot((light_P - ray->P), ray->D); - const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta); + const float delta = dot((coeffs.P - ray->P), ray->D); + const float D = safe_sqrtf(len_squared(coeffs.P - ray->P) - delta * delta); if (UNLIKELY(D == 0.0f)) { *pdf = 0.0f; return 0.0f; } + const float tmin = coeffs.t_range.x; + const float tmax = coeffs.t_range.y; const float theta_a = atan2f(tmin - delta, D); const float theta_b = atan2f(tmax - delta, D); const float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a); @@ -289,17 +294,17 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r } ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray, - const float3 light_P, + ccl_private const EquiangularCoefficients &coeffs, const float sample_t) { - const float delta = dot((light_P - ray->P), ray->D); - const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta); + const float delta = dot((coeffs.P - ray->P), ray->D); + const float D = safe_sqrtf(len_squared(coeffs.P - ray->P) - delta * delta); if (UNLIKELY(D == 0.0f)) { return 0.0f; } - const float tmin = ray->tmin; - const float tmax = ray->tmax; + const float tmin = coeffs.t_range.x; + const float tmax = coeffs.t_range.y; const float t_ = sample_t - delta; const float theta_a = atan2f(tmin - delta, D); @@ -313,6 +318,29 @@ ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray, return pdf; } +ccl_device_inline bool volume_equiangular_valid_ray_segment(KernelGlobals kg, + const float3 ray_P, + const float3 ray_D, + ccl_private float2 *t_range, + const ccl_private LightSample *ls) +{ + if (ls->type == LIGHT_SPOT) { + ccl_global const KernelLight *klight = &kernel_data_fetch(lights, ls->lamp); + return spot_light_valid_ray_segment(klight, ray_P, ray_D, t_range); + } + if (ls->type == LIGHT_AREA) { + ccl_global const KernelLight *klight = &kernel_data_fetch(lights, ls->lamp); + return area_light_valid_ray_segment(&klight->area, ray_P - klight->co, ray_D, t_range); + } + if (ls->type == LIGHT_TRIANGLE) { + return triangle_light_valid_ray_segment(kg, ray_P - ls->P, ray_D, t_range, ls); + } + + /* Point light, the whole range of the ray is visible. */ + kernel_assert(ls->type == LIGHT_POINT); + return true; +} + /* Distance sampling */ ccl_device float volume_distance_sample(float max_t, @@ -403,7 +431,7 @@ typedef struct VolumeIntegrateState { ccl_device_forceinline void volume_integrate_step_scattering( ccl_private const ShaderData *sd, ccl_private const Ray *ray, - const float3 equiangular_light_P, + ccl_private const EquiangularCoefficients &equiangular_coeffs, ccl_private const VolumeShaderCoefficients &ccl_restrict coeff, const Spectrum transmittance, ccl_private VolumeIntegrateState &ccl_restrict vstate, @@ -474,7 +502,7 @@ ccl_device_forceinline void volume_integrate_step_scattering( /* Multiple importance sampling. */ if (vstate.use_mis) { - const float equiangular_pdf = volume_equiangular_pdf(ray, equiangular_light_P, new_t); + const float equiangular_pdf = volume_equiangular_pdf(ray, equiangular_coeffs, new_t); const float mis_weight = power_heuristic(vstate.distance_pdf * distance_pdf, equiangular_pdf); result.direct_throughput *= 2.0f * mis_weight; @@ -509,7 +537,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous( ccl_global float *ccl_restrict render_buffer, const float object_step_size, const VolumeSampleMethod direct_sample_method, - const float3 equiangular_light_P, + ccl_private const EquiangularCoefficients &equiangular_coeffs, ccl_private VolumeIntegrateResult &result) { PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INTEGRATE); @@ -560,7 +588,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* Equiangular sampling: compute distance and PDF in advance. */ if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR) { result.direct_t = volume_equiangular_sample( - ray, equiangular_light_P, vstate.rscatter, &vstate.equiangular_pdf); + ray, equiangular_coeffs, vstate.rscatter, &vstate.equiangular_pdf); } # ifdef __PATH_GUIDING__ result.direct_sample_method = vstate.direct_sample_method; @@ -614,7 +642,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* Scattering and absorption. */ volume_integrate_step_scattering( - sd, ray, equiangular_light_P, coeff, transmittance, vstate, result); + sd, ray, equiangular_coeffs, coeff, transmittance, vstate, result); } else { /* Absorption only. */ @@ -673,7 +701,8 @@ ccl_device_forceinline bool integrate_volume_equiangular_sample_light( ccl_private const Ray *ccl_restrict ray, ccl_private const ShaderData *ccl_restrict sd, ccl_private const RNGState *ccl_restrict rng_state, - ccl_private float3 *ccl_restrict P) + ccl_private EquiangularCoefficients *ccl_restrict equiangular_coeffs, + ccl_private LightSample &ccl_restrict ls) { /* Test if there is a light or BSDF that needs direct light. */ if (!kernel_data.integrator.use_direct_light) { @@ -685,7 +714,6 @@ ccl_device_forceinline bool integrate_volume_equiangular_sample_light( const uint bounce = INTEGRATOR_STATE(state, path, bounce); const float3 rand_light = path_state_rng_3D(kg, rng_state, PRNG_LIGHT); - LightSample ls ccl_optional_struct_init; if (!light_sample_from_volume_segment(kg, rand_light, sd->time, @@ -708,9 +736,10 @@ ccl_device_forceinline bool integrate_volume_equiangular_sample_light( return false; } - *P = ls.P; + equiangular_coeffs->P = ls.P; - return true; + return volume_equiangular_valid_ray_segment( + kg, ray->P, ray->D, &equiangular_coeffs->t_range, &ls); } /* Path tracing: sample point on light and evaluate light shader, then @@ -725,41 +754,26 @@ ccl_device_forceinline void integrate_volume_direct_light( # ifdef __PATH_GUIDING__ ccl_private const Spectrum unlit_throughput, # endif - ccl_private const Spectrum throughput) + ccl_private const Spectrum throughput, + ccl_private LightSample &ccl_restrict ls) { PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT); - if (!kernel_data.integrator.use_direct_light) { + if (!kernel_data.integrator.use_direct_light || ls.emitter_id == EMITTER_NONE) { return; } - /* Sample position on the same light again, now from the shading point where we scattered. - * - * Note that this means we sample the light tree twice when equiangular sampling is used. - * We could consider sampling the light tree just once and use the same light position again. - * - * This would make the PDFs for MIS weights more complicated due to having to account for - * both distance/equiangular and direct/indirect light sampling, but could be more accurate. - * Additionally we could end up behind the light or outside a spot light cone, which might - * waste a sample. Though on the other hand it would be possible to prevent that with - * equiangular sampling restricted to a smaller sub-segment where the light has influence. */ - LightSample ls ccl_optional_struct_init; + /* Sample position on the same light again, now from the shading point where we scattered. */ { const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); const uint bounce = INTEGRATOR_STATE(state, path, bounce); const float3 rand_light = path_state_rng_3D(kg, rng_state, PRNG_LIGHT); + const float3 N = zero_float3(); + const int object_receiver = light_link_receiver_nee(kg, sd); + const int shader_flags = SD_BSDF_HAS_TRANSMISSION; - if (!light_sample_from_position(kg, - rng_state, - rand_light, - sd->time, - P, - zero_float3(), - light_link_receiver_nee(kg, sd), - SD_BSDF_HAS_TRANSMISSION, - bounce, - path_flag, - &ls)) + if (!light_sample( + kg, rand_light, sd->time, P, N, object_receiver, shader_flags, bounce, path_flag, &ls)) { return; } @@ -877,6 +891,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter( KernelGlobals kg, IntegratorState state, ccl_private ShaderData *sd, + ccl_private const Ray *ray, ccl_private const RNGState *rng_state, ccl_private const ShaderVolumePhases *phases) { @@ -929,6 +944,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter( INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_wo); INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f; + INTEGRATOR_STATE_WRITE(state, ray, previous_dt) = ray->tmax - ray->tmin; INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX; # ifdef __RAY_DIFFERENTIALS__ INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); @@ -957,7 +973,8 @@ ccl_device_forceinline bool integrate_volume_phase_scatter( /* Update path state */ INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf; - INTEGRATOR_STATE_WRITE(state, path, mis_origin_n) = zero_float3(); + const float3 previous_P = ray->P + ray->D * ray->tmin; + INTEGRATOR_STATE_WRITE(state, path, mis_origin_n) = sd->P - previous_P; INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf( unguided_phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf)); @@ -989,11 +1006,15 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg, /* Sample light ahead of volume stepping, for equiangular sampling. */ /* TODO: distant lights are ignored now, but could instead use even distribution. */ + LightSample ls ccl_optional_struct_init; + ls.emitter_id = EMITTER_NONE; const bool need_light_sample = !(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_TERMINATE); - float3 equiangular_P = zero_float3(); - const bool have_equiangular_sample = need_light_sample && - integrate_volume_equiangular_sample_light( - kg, state, ray, &sd, &rng_state, &equiangular_P); + + EquiangularCoefficients equiangular_coeffs = {zero_float3(), make_float2(ray->tmin, ray->tmax)}; + + const bool have_equiangular_sample = + need_light_sample && integrate_volume_equiangular_sample_light( + kg, state, ray, &sd, &rng_state, &equiangular_coeffs, ls); VolumeSampleMethod direct_sample_method = (have_equiangular_sample) ? volume_stack_sample_method(kg, state) : @@ -1023,7 +1044,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg, render_buffer, step_size, direct_sample_method, - equiangular_P, + equiangular_coeffs, result); /* Perform path termination. The intersect_closest will have already marked this path @@ -1091,7 +1112,8 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg, # ifdef __PATH_GUIDING__ unlit_throughput, # endif - result.direct_throughput); + result.direct_throughput, + ls); } /* Indirect light. @@ -1130,7 +1152,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg, # endif # endif - if (integrate_volume_phase_scatter(kg, state, &sd, &rng_state, &result.indirect_phases)) { + if (integrate_volume_phase_scatter(kg, state, &sd, ray, &rng_state, &result.indirect_phases)) { return VOLUME_PATH_SCATTERED; } else { diff --git a/intern/cycles/kernel/integrator/state_template.h b/intern/cycles/kernel/integrator/state_template.h index e8683ed9179..34154d1c7fa 100644 --- a/intern/cycles/kernel/integrator/state_template.h +++ b/intern/cycles/kernel/integrator/state_template.h @@ -75,6 +75,9 @@ KERNEL_STRUCT_MEMBER(ray, float, tmax, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, float, time, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, float, dP, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, float, dD, KERNEL_FEATURE_PATH_TRACING) +#ifdef __LIGHT_TREE__ +KERNEL_STRUCT_MEMBER(ray, float, previous_dt, KERNEL_FEATURE_PATH_TRACING) +#endif KERNEL_STRUCT_END(ray) /*************************** Intersection result ******************************/ diff --git a/intern/cycles/kernel/light/area.h b/intern/cycles/kernel/light/area.h index eb03ca866ef..d9188355984 100644 --- a/intern/cycles/kernel/light/area.h +++ b/intern/cycles/kernel/light/area.h @@ -233,6 +233,11 @@ ccl_device bool area_light_spread_clamp_light(const float3 P, return true; } +ccl_device_forceinline bool area_light_is_ellipse(const ccl_global KernelAreaLight *light) +{ + return light->invarea < 0.0f; +} + /* Common API. */ /* Compute `eval_fac` and `pdf`. Also sample a new position on the light if `sample_coord`. */ template @@ -338,7 +343,7 @@ ccl_device_inline bool area_light_sample(const ccl_global KernelLight *klight, const float light_v = dot(inplane, klight->area.axis_v) / klight->area.len_v; if (!in_volume_segment) { - const bool is_ellipse = (klight->area.invarea < 0.0f); + const bool is_ellipse = area_light_is_ellipse(&klight->area); /* Sampled point lies outside of the area light. */ if (is_ellipse && (sqr(light_u) + sqr(light_v) > 0.25f)) { @@ -380,7 +385,7 @@ ccl_device_inline bool area_light_intersect(const ccl_global KernelLight *klight { /* Area light. */ const float invarea = fabsf(klight->area.invarea); - const bool is_ellipse = (klight->area.invarea < 0.0f); + const bool is_ellipse = area_light_is_ellipse(&klight->area); if (invarea == 0.0f) { return false; } @@ -428,6 +433,55 @@ ccl_device_inline bool area_light_sample_from_intersection( return area_light_eval(klight, ray_P, &light_P, ls, zero_float2(), false); } +/* Returns the maximal distance between the light center and the boundary. */ +ccl_device_forceinline float area_light_max_extent(const ccl_global KernelAreaLight *light) +{ + return 0.5f * (area_light_is_ellipse(light) ? fmaxf(light->len_u, light->len_v) : + len(make_float2(light->len_u, light->len_v))); +} + +/* Find the ray segment lit by the area light. */ +ccl_device_inline bool area_light_valid_ray_segment(const ccl_global KernelAreaLight *light, + float3 P, + float3 D, + ccl_private float2 *t_range) +{ + bool valid; + const float tan_half_spread = light->tan_half_spread; + float3 axis = light->dir; + + const bool angle_almost_zero = (tan_half_spread < 1e-5f); + if (angle_almost_zero) { + /* Map to local coordinate of the light. Do not use `itfm` in `KernelLight` as there might be + * additional scaling in the light size. */ + const Transform tfm = make_transform(light->axis_u, light->axis_v, axis); + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D); + axis = make_float3(0.0f, 0.0f, 1.0f); + + const float half_len_u = 0.5f * light->len_u; + const float half_len_v = 0.5f * light->len_v; + if (area_light_is_ellipse(light)) { + valid = ray_infinite_cylinder_intersect(P, D, half_len_u, half_len_v, t_range); + } + else { + const float3 bbox_min = make_float3(-half_len_u, -half_len_v, 0.0f); + const float3 bbox_max = make_float3(half_len_u, half_len_v, FLT_MAX); + valid = ray_aabb_intersect(bbox_min, bbox_max, P, D, t_range); + } + } + else { + /* Conservative estimation with the smallest possible cone covering the whole spread. */ + const float3 apex_to_point = P + area_light_max_extent(light) / tan_half_spread * axis; + const float cos_angle_sq = 1.0f / (1.0f + sqr(tan_half_spread)); + + valid = ray_cone_intersect(axis, apex_to_point, D, cos_angle_sq, t_range); + } + + /* Limit the range to the positive side of the area light. */ + return valid && ray_plane_intersect(axis, P, D, t_range); +} + template ccl_device_forceinline bool area_light_tree_parameters(const ccl_global KernelLight *klight, const float3 centroid, @@ -464,9 +518,8 @@ ccl_device_forceinline bool area_light_tree_parameters(const ccl_global KernelLi const bool shape_above_surface = dot(N, centroid - P) + fabsf(dot(N, extentu)) + fabsf(dot(N, extentv)) > 0; - const bool in_volume = is_zero(N); - return (front_facing && shape_above_surface) || in_volume; + return front_facing && shape_above_surface; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/light/common.h b/intern/cycles/kernel/light/common.h index ea724991817..7ea7519f94f 100644 --- a/intern/cycles/kernel/light/common.h +++ b/intern/cycles/kernel/light/common.h @@ -12,9 +12,9 @@ CCL_NAMESPACE_BEGIN typedef struct LightSample { float3 P; /* position on light, or direction for distant light */ - float3 Ng; /* normal on light */ - float3 D; /* direction from shading point to light */ + packed_float3 Ng; /* normal on light */ float t; /* distance to light (FLT_MAX for distant light) */ + float3 D; /* direction from shading point to light */ float u, v; /* parametric coordinate on primitive */ float pdf; /* pdf for selecting light and point on light */ float pdf_selection; /* pdf for selecting light */ @@ -25,6 +25,7 @@ typedef struct LightSample { int lamp; /* lamp id */ int group; /* lightgroup */ LightType type; /* type of light */ + int emitter_id; /* index in the emitter array */ } LightSample; /* Utilities */ diff --git a/intern/cycles/kernel/light/distribution.h b/intern/cycles/kernel/light/distribution.h index 23cdaa6dff5..b6b9f2e035c 100644 --- a/intern/cycles/kernel/light/distribution.h +++ b/intern/cycles/kernel/light/distribution.h @@ -41,36 +41,14 @@ ccl_device int light_distribution_sample(KernelGlobals kg, const float rand) return index; } -template ccl_device_noinline bool light_distribution_sample(KernelGlobals kg, - const float3 rand, - const float time, - const float3 P, - const float3 N, - const int object_receiver, - const int shader_flags, - const int bounce, - const uint32_t path_flag, + const float rand, ccl_private LightSample *ls) { /* Sample light index from distribution. */ - /* The first two dimensions of the Sobol sequence have better stratification. */ - const int index = light_distribution_sample(kg, rand.z); - const float pdf_selection = kernel_data.integrator.distribution_pdf_lights; - const float2 rand_uv = float3_to_float2(rand); - return light_sample(kg, - rand_uv, - time, - P, - N, - object_receiver, - shader_flags, - bounce, - path_flag, - index, - 0, - pdf_selection, - ls); + ls->emitter_id = light_distribution_sample(kg, rand); + ls->pdf_selection = kernel_data.integrator.distribution_pdf_lights; + return true; } ccl_device_inline float light_distribution_pdf_lamp(KernelGlobals kg) diff --git a/intern/cycles/kernel/light/light.h b/intern/cycles/kernel/light/light.h index 8f18e5ce7a5..39a22f92d9d 100644 --- a/intern/cycles/kernel/light/light.h +++ b/intern/cycles/kernel/light/light.h @@ -177,7 +177,7 @@ ccl_device_inline bool light_sample(KernelGlobals kg, template ccl_device_noinline bool light_sample(KernelGlobals kg, - const float2 rand, + const float3 rand_light, const float time, const float3 P, const float3 N, @@ -185,33 +185,31 @@ ccl_device_noinline bool light_sample(KernelGlobals kg, const int shader_flags, const int bounce, const uint32_t path_flag, - const int emitter_index, - const int object_id, - const float pdf_selection, ccl_private LightSample *ls) { + /* The first two dimensions of the Sobol sequence have better stratification, use them to sample + * position on the light. */ + const float2 rand = float3_to_float2(rand_light); + int prim; MeshLight mesh_light; #ifdef __LIGHT_TREE__ if (kernel_data.integrator.use_light_tree) { ccl_global const KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters, - emitter_index); + ls->emitter_id); prim = kemitter->light.id; mesh_light.shader_flag = kemitter->mesh_light.shader_flag; - mesh_light.object_id = object_id; + mesh_light.object_id = ls->object; } else #endif { ccl_global const KernelLightDistribution *kdistribution = &kernel_data_fetch( - light_distribution, emitter_index); + light_distribution, ls->emitter_id); prim = kdistribution->prim; mesh_light = kdistribution->mesh_light; } - /* A different value would be assigned in `triangle_light_sample()` if `!use_light_tree`. */ - ls->pdf_selection = pdf_selection; - if (prim >= 0) { /* Mesh light. */ const int object = mesh_light.object_id; diff --git a/intern/cycles/kernel/light/sample.h b/intern/cycles/kernel/light/sample.h index afc4537c671..434383ebc2b 100644 --- a/intern/cycles/kernel/light/sample.h +++ b/intern/cycles/kernel/light/sample.h @@ -329,17 +329,25 @@ ccl_device_inline bool light_sample_from_volume_segment(KernelGlobals kg, const uint32_t path_flag, ccl_private LightSample *ls) { + const int shader_flags = SD_BSDF_HAS_TRANSMISSION; + #ifdef __LIGHT_TREE__ if (kernel_data.integrator.use_light_tree) { - return light_tree_sample( - kg, rand, time, P, D, t, object_receiver, SD_BSDF_HAS_TRANSMISSION, bounce, path_flag, ls); + if (!light_tree_sample(kg, rand.z, P, D, t, object_receiver, shader_flags, ls)) { + return false; + } } else #endif { - return light_distribution_sample( - kg, rand, time, P, D, object_receiver, SD_BSDF_HAS_TRANSMISSION, bounce, path_flag, ls); + if (!light_distribution_sample(kg, rand.z, ls)) { + return false; + } } + + /* Sample position on the selected light. */ + return light_sample( + kg, rand, time, P, D, object_receiver, shader_flags, bounce, path_flag, ls); } ccl_device bool light_sample_from_position(KernelGlobals kg, @@ -354,17 +362,24 @@ ccl_device bool light_sample_from_position(KernelGlobals kg, const uint32_t path_flag, ccl_private LightSample *ls) { + /* Randomly select a light. */ #ifdef __LIGHT_TREE__ if (kernel_data.integrator.use_light_tree) { - return light_tree_sample( - kg, rand, time, P, N, 0.0f, object_receiver, shader_flags, bounce, path_flag, ls); + if (!light_tree_sample(kg, rand.z, P, N, 0.0f, object_receiver, shader_flags, ls)) { + return false; + } } else #endif { - return light_distribution_sample( - kg, rand, time, P, N, object_receiver, shader_flags, bounce, path_flag, ls); + if (!light_distribution_sample(kg, rand.z, ls)) { + return false; + } } + + /* Sample position on the selected light. */ + return light_sample( + kg, rand, time, P, N, object_receiver, shader_flags, bounce, path_flag, ls); } /* Update light sample with new shading point position for MNEE. The position on the light is fixed @@ -415,13 +430,15 @@ ccl_device_inline float light_sample_mis_weight_forward_surface(KernelGlobals kg #ifdef __LIGHT_TREE__ if (kernel_data.integrator.use_light_tree) { float3 ray_P = INTEGRATOR_STATE(state, ray, P); + const float dt = INTEGRATOR_STATE(state, ray, previous_dt); const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n); + uint lookup_offset = kernel_data_fetch(object_lookup_offset, sd->object); uint prim_offset = kernel_data_fetch(object_prim_offset, sd->object); uint triangle = kernel_data_fetch(triangle_to_tree, sd->prim - prim_offset + lookup_offset); pdf *= light_tree_pdf( - kg, ray_P, N, path_flag, sd->object, triangle, light_link_receiver_forward(kg, state)); + kg, ray_P, N, dt, path_flag, sd->object, triangle, light_link_receiver_forward(kg, state)); } else #endif @@ -445,9 +462,11 @@ ccl_device_inline float light_sample_mis_weight_forward_lamp(KernelGlobals kg, #ifdef __LIGHT_TREE__ if (kernel_data.integrator.use_light_tree) { const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n); + const float dt = INTEGRATOR_STATE(state, ray, previous_dt); pdf *= light_tree_pdf(kg, P, N, + dt, path_flag, 0, kernel_data_fetch(light_to_tree, ls->lamp), @@ -485,9 +504,10 @@ ccl_device_inline float light_sample_mis_weight_forward_background(KernelGlobals #ifdef __LIGHT_TREE__ if (kernel_data.integrator.use_light_tree) { const float3 N = INTEGRATOR_STATE(state, path, mis_origin_n); + const float dt = INTEGRATOR_STATE(state, ray, previous_dt); uint light = kernel_data_fetch(light_to_tree, kernel_data.background.light_index); pdf *= light_tree_pdf( - kg, ray_P, N, path_flag, 0, light, light_link_receiver_forward(kg, state)); + kg, ray_P, N, dt, path_flag, 0, light, light_link_receiver_forward(kg, state)); } else #endif diff --git a/intern/cycles/kernel/light/spot.h b/intern/cycles/kernel/light/spot.h index c5090573d4d..56989933ded 100644 --- a/intern/cycles/kernel/light/spot.h +++ b/intern/cycles/kernel/light/spot.h @@ -265,6 +265,24 @@ ccl_device_inline bool spot_light_sample_from_intersection( return true; } +/* Find the ray segment lit by the spot light. */ +ccl_device_inline bool spot_light_valid_ray_segment(const ccl_global KernelLight *klight, + const float3 P, + const float3 D, + ccl_private float2 *t_range) +{ + /* Convert to local space of the spot light. */ + const Transform itfm = klight->itfm; + float3 local_P = P + klight->spot.dir * klight->spot.ray_segment_dp; + local_P = transform_point(&itfm, local_P); + const float3 local_D = transform_direction(&itfm, D); + const float3 axis = make_float3(0.0f, 0.0f, -1.0f); + + /* Intersect the ray with the smallest enclosing cone of the light spread. */ + return ray_cone_intersect( + axis, local_P, local_D, sqr(klight->spot.cos_half_spot_angle), t_range); +} + template ccl_device_forceinline bool spot_light_tree_parameters(const ccl_global KernelLight *klight, const float3 centroid, diff --git a/intern/cycles/kernel/light/tree.h b/intern/cycles/kernel/light/tree.h index 3fd49e30cbe..bbca17e5f75 100644 --- a/intern/cycles/kernel/light/tree.h +++ b/intern/cycles/kernel/light/tree.h @@ -148,10 +148,7 @@ ccl_device void light_tree_importance(const float3 N_or_D, float cos_min_incidence_angle = 1.0f; float cos_max_incidence_angle = 1.0f; - /* When sampling the light tree for the second time in `shade_volume.h` and when query the pdf in - * `sample.h`. */ - const bool in_volume = is_zero(N_or_D); - if (!in_volume_segment && !in_volume) { + if (!in_volume_segment) { const float3 N = N_or_D; const float cos_theta_i = has_transmission ? fabsf(dot(point_to_centroid, N)) : dot(point_to_centroid, N); @@ -221,9 +218,9 @@ ccl_device void light_tree_importance(const float3 N_or_D, max_importance = fabsf(f_a * cos_min_incidence_angle * energy * cos_min_outgoing_angle / (in_volume_segment ? min_distance : sqr(min_distance))); - /* TODO: also min importance for volume? */ + /* TODO: compute proper min importance for volume. */ if (in_volume_segment) { - min_importance = max_importance; + min_importance = 0.0f; return; } @@ -270,10 +267,10 @@ ccl_device bool compute_emitter_centroid_and_dir(KernelGlobals kg, /* Arbitrary centroid and direction. */ centroid = make_float3(0.0f, 0.0f, 1.0f); dir = make_float3(0.0f, 0.0f, -1.0f); - return !in_volume_segment; + break; case LIGHT_DISTANT: dir = centroid; - return !in_volume_segment; + break; default: return false; } @@ -323,12 +320,13 @@ ccl_device void light_tree_node_importance(KernelGlobals kg, float cos_theta_u; float distance; if (knode->type == LIGHT_TREE_DISTANT) { - if (in_volume_segment) { - return; - } point_to_centroid = -bcone.axis; cos_theta_u = fast_cosf(bcone.theta_o + bcone.theta_e); distance = 1.0f; + if (t == FLT_MAX) { + /* In world volume, distant light has no contribution. */ + return; + } } else { const float3 centroid = 0.5f * (bbox.min + bbox.max); @@ -339,6 +337,9 @@ ccl_device void light_tree_node_importance(KernelGlobals kg, /* Minimal distance of the ray to the cluster. */ distance = len(centroid - closest_point); point_to_centroid = -compute_v(centroid, P, D, bcone.axis, t); + /* FIXME(weizhen): it is not clear from which point the `cos_theta_u` should be computed in + * volume segment. We could use `closest_point` as a conservative measure, but then + * `point_to_centroid` should also use `closest_point`. */ cos_theta_u = light_tree_cos_bounding_box_angle(bbox, closest_point, point_to_centroid); } else { @@ -697,17 +698,16 @@ ccl_device int light_tree_root_node_index(KernelGlobals kg, const int object_rec return 0; } +/* Pick a random light from the light tree from a given shading point P, write to the picked light + * index and the probability of picking the light. */ template ccl_device_noinline bool light_tree_sample(KernelGlobals kg, - const float3 rand, - const float time, + const float rand, const float3 P, float3 N_or_D, float t, const int object_receiver, const int shader_flags, - const int bounce, - const uint32_t path_flag, ccl_private LightSample *ls) { if (!kernel_data.integrator.use_direct_light) { @@ -718,10 +718,8 @@ ccl_device_noinline bool light_tree_sample(KernelGlobals kg, float pdf_leaf = 1.0f; float pdf_selection = 1.0f; int selected_emitter = -1; - int object_emitter = 0; int node_index = light_tree_root_node_index(kg, object_receiver); - /* The first two dimensions of the Sobol sequence have better stratification. */ - float rand_selection = rand.z; + float rand_selection = rand; float3 local_P = P; @@ -743,7 +741,7 @@ ccl_device_noinline bool light_tree_sample(KernelGlobals kg, } /* Continue with the picked mesh light. */ - object_emitter = kernel_data_fetch(light_tree_emitters, selected_emitter).mesh.object_id; + ls->object = kernel_data_fetch(light_tree_emitters, selected_emitter).mesh.object_id; continue; } @@ -766,27 +764,18 @@ ccl_device_noinline bool light_tree_sample(KernelGlobals kg, pdf_leaf *= (node_index == left_index) ? left_prob : (1.0f - left_prob); } - pdf_selection *= pdf_leaf; + ls->emitter_id = selected_emitter; + ls->pdf_selection = pdf_selection * pdf_leaf; - return light_sample(kg, - float3_to_float2(rand), - time, - P, - N_or_D, - object_receiver, - shader_flags, - bounce, - path_flag, - selected_emitter, - object_emitter, - pdf_selection, - ls); + return true; } /* We need to be able to find the probability of selecting a given light for MIS. */ +template ccl_device float light_tree_pdf(KernelGlobals kg, float3 P, float3 N, + const float dt, const int path_flag, const int object_emitter, const uint index_emitter, @@ -796,7 +785,7 @@ ccl_device float light_tree_pdf(KernelGlobals kg, ccl_global const KernelLightTreeEmitter *kemitter = &kernel_data_fetch(light_tree_emitters, index_emitter); - int root_index; + int subtree_root_index; uint bit_trail, target_emitter; if (is_triangle(kemitter)) { @@ -805,16 +794,17 @@ ccl_device float light_tree_pdf(KernelGlobals kg, target_emitter = kernel_data_fetch(object_to_tree, object_emitter); ccl_global const KernelLightTreeEmitter *kmesh = &kernel_data_fetch(light_tree_emitters, target_emitter); - root_index = kmesh->mesh.node_id; - ccl_global const KernelLightTreeNode *kroot = &kernel_data_fetch(light_tree_nodes, root_index); + subtree_root_index = kmesh->mesh.node_id; + ccl_global const KernelLightTreeNode *kroot = &kernel_data_fetch(light_tree_nodes, + subtree_root_index); bit_trail = kroot->bit_trail; if (kroot->type == LIGHT_TREE_INSTANCE) { - root_index = kroot->instance.reference; + subtree_root_index = kroot->instance.reference; } } else { - root_index = 0; + subtree_root_index = -1; bit_trail = kemitter->bit_trail; target_emitter = index_emitter; } @@ -836,8 +826,8 @@ ccl_device float light_tree_pdf(KernelGlobals kg, for (int i = 0; i < knode->num_emitters; i++) { const int emitter = knode->leaf.first_emitter + i; float max_importance, min_importance; - light_tree_emitter_importance( - kg, P, N, 0, has_transmission, emitter, max_importance, min_importance); + light_tree_emitter_importance( + kg, P, N, dt, has_transmission, emitter, max_importance, min_importance); num_has_importance += (max_importance > 0); if (emitter == target_emitter) { target_max_importance = max_importance; @@ -856,13 +846,13 @@ ccl_device float light_tree_pdf(KernelGlobals kg, return 0.0f; } - if (root_index) { + if (subtree_root_index != -1) { /* Arrived at the mesh light. Continue with the subtree. */ float unused; - light_tree_to_local_space(kg, object_emitter, P, N, unused); + light_tree_to_local_space(kg, object_emitter, P, N, unused); - node_index = root_index; - root_index = 0; + node_index = subtree_root_index; + subtree_root_index = -1; target_emitter = index_emitter; bit_trail = kemitter->bit_trail; continue; @@ -877,8 +867,8 @@ ccl_device float light_tree_pdf(KernelGlobals kg, const int right_index = knode->inner.right_child; float left_prob; - if (!get_left_probability( - kg, P, N, 0, has_transmission, left_index, right_index, left_prob)) + if (!get_left_probability( + kg, P, N, dt, has_transmission, left_index, right_index, left_prob)) { return 0.0f; } @@ -896,4 +886,27 @@ ccl_device float light_tree_pdf(KernelGlobals kg, } } +/* If the function is called in volume, retrieve the previous point in volume segment, and compute + * pdf from there. Otherwise compute from the current shading point. */ +ccl_device_inline float light_tree_pdf(KernelGlobals kg, + float3 P, + float3 N, + const float dt, + const int path_flag, + const int emitter_object, + const uint emitter_id, + const int object_receiver) +{ + if (path_flag & PATH_RAY_VOLUME_SCATTER) { + const float3 D_times_t = N; + const float3 D = normalize(D_times_t); + P = P - D_times_t; + return light_tree_pdf( + kg, P, D, dt, path_flag, emitter_object, emitter_id, object_receiver); + } + + return light_tree_pdf( + kg, P, N, 0.0f, path_flag, emitter_object, emitter_id, object_receiver); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/light/triangle.h b/intern/cycles/kernel/light/triangle.h index 58fc8ea1d92..8007c8d6123 100644 --- a/intern/cycles/kernel/light/triangle.h +++ b/intern/cycles/kernel/light/triangle.h @@ -269,6 +269,26 @@ ccl_device_forceinline bool triangle_light_sample(KernelGlobals kg, return (ls->pdf > 0.0f); } +/* Find the ray segment lit by the triangle light. */ +ccl_device_inline bool triangle_light_valid_ray_segment(KernelGlobals kg, + const float3 P, + const float3 D, + ccl_private float2 *t_range, + const ccl_private LightSample *ls) +{ + const int shader_flag = kernel_data_fetch(shaders, ls->shader & SHADER_MASK).flags; + const int SD_MIS_BOTH = SD_MIS_BACK | SD_MIS_FRONT; + if ((shader_flag & SD_MIS_BOTH) == SD_MIS_BOTH) { + /* Both sides are sampled, the complete ray segment is visible. */ + return true; + } + + /* Only one side is sampled, intersect the ray and the triangle light plane to find the visible + * ray segment. Flip normal if Emission Sampling is set to back. */ + const float3 N = ls->Ng; + return ray_plane_intersect((shader_flag & SD_MIS_BACK) ? -N : N, P, D, t_range); +} + template ccl_device_forceinline bool triangle_light_tree_parameters( KernelGlobals kg, @@ -307,9 +327,8 @@ ccl_device_forceinline bool triangle_light_tree_parameters( } const bool front_facing = bcone.theta_o != 0.0f || dot(bcone.axis, point_to_centroid) < 0; - const bool in_volume = is_zero(N); - return (front_facing && shape_above_surface) || in_volume; + return front_facing && shape_above_surface; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 2ed5a790199..ce1c1da5168 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -45,6 +45,7 @@ CCL_NAMESPACE_BEGIN #define OBJECT_NONE (~0) #define PRIM_NONE (~0) #define LAMP_NONE (~0) +#define EMITTER_NONE (~0) #define ID_NONE (0.0f) #define PASS_UNUSED (~0) #define LIGHTGROUP_NONE (~0) @@ -1376,6 +1377,8 @@ typedef struct KernelSpotLight { int is_sphere; /* For non-uniform object scaling, the actual spread might be different. */ float cos_half_larger_spread; + /* Distance from the apex of the smallest enclosing cone of the light spread to light center. */ + float ray_segment_dp; } KernelSpotLight; /* PointLight is SpotLight with only radius and invarea being used. */ diff --git a/intern/cycles/scene/light.cpp b/intern/cycles/scene/light.cpp index 78d237bcd8c..fb424a8fadf 100644 --- a/intern/cycles/scene/light.cpp +++ b/intern/cycles/scene/light.cpp @@ -1362,6 +1362,9 @@ void LightManager::device_update_lights(Device *device, DeviceScene *dscene, Sce /* Choose the angle which spans a larger cone. */ klights[light_index].spot.cos_half_larger_spread = inversesqrtf( 1.0f + tan_sq * fmaxf(len_u_sq, len_v_sq) / len_w_sq); + /* radius / sin(half_angle_small) */ + klights[light_index].spot.ray_segment_dp = + light->size * sqrtf(1.0f + len_w_sq / (tan_sq * fminf(len_u_sq, len_v_sq))); } klights[light_index].shader_id = shader_id; diff --git a/intern/cycles/util/math.h b/intern/cycles/util/math.h index 7d5cab7e30c..cdea258c916 100644 --- a/intern/cycles/util/math.h +++ b/intern/cycles/util/math.h @@ -1030,6 +1030,46 @@ ccl_device_inline uint32_t reverse_integer_bits(uint32_t x) #endif } +/* Check if intervals (first->x, first->y) and (second.x, second.y) intersect, and replace the + * first interval with their intersection. */ +ccl_device_inline bool intervals_intersect(ccl_private float2 *first, const float2 second) +{ + first->x = fmaxf(first->x, second.x); + first->y = fminf(first->y, second.y); + + return first->x < first->y; +} + +/* Solve quadratic equation a*x^2 + b*x + c = 0, adapted from Mitsuba 3 + * The solution is ordered so that x1 <= x2. + * Returns true if at least one solution is found. */ +ccl_device_inline bool solve_quadratic( + const float a, const float b, const float c, ccl_private float &x1, ccl_private float &x2) +{ + /* If the equation is linear, the solution is -c/b, but b has to be non-zero. */ + const bool valid_linear = (a == 0.0f) && (b != 0.0f); + x1 = x2 = -c / b; + + const float discriminant = sqr(b) - 4.0f * a * c; + /* Allow slightly negative discriminant in case of numerical precision issues. */ + const bool valid_quadratic = (a != 0.0f) && (discriminant > -1e-5f); + + if (valid_quadratic) { + /* Numerically stable version of (-b ± sqrt(discriminant)) / (2 * a), avoiding catastrophic + * cancellation when `b` is very close to `sqrt(discriminant)`, by finding the solution of + * greater magnitude which does not suffer from loss of precision, then using the identity + * x1 * x2 = c / a. */ + const float temp = -0.5f * (b + copysignf(safe_sqrtf(discriminant), b)); + const float r1 = temp / a; + const float r2 = c / temp; + + x1 = fminf(r1, r2); + x2 = fmaxf(r1, r2); + } + + return (valid_linear || valid_quadratic); +} + CCL_NAMESPACE_END #endif /* __UTIL_MATH_H__ */ diff --git a/intern/cycles/util/math_intersect.h b/intern/cycles/util/math_intersect.h index b09cf2a4b1b..2e4b9c979f7 100644 --- a/intern/cycles/util/math_intersect.h +++ b/intern/cycles/util/math_intersect.h @@ -302,6 +302,140 @@ ccl_device bool ray_quad_intersect(float3 ray_P, return true; } +/* Find the ray segment that lies in the same side as the normal `N` of the plane. + * `P` is the vector pointing from any point on the plane to the ray origin. */ +ccl_device bool ray_plane_intersect(const float3 N, + const float3 P, + const float3 ray_D, + ccl_private float2 *t_range) +{ + const float DN = dot(ray_D, N); + + /* Distance from P to the plane. */ + const float t = -dot(P, N) / DN; + + /* Limit the range to the positive side. */ + if (DN > 0.0f) { + t_range->x = fmaxf(t_range->x, t); + } + else { + t_range->y = fminf(t_range->y, t); + } + + return t_range->x < t_range->y; +} + +/* Find the ray segment inside an axis-aligned bounding box. */ +ccl_device bool ray_aabb_intersect(const float3 bbox_min, + const float3 bbox_max, + const float3 ray_P, + const float3 ray_D, + ccl_private float2 *t_range) +{ + const float3 inv_ray_D = rcp(ray_D); + + /* Absolute distances to lower and upper box coordinates; */ + const float3 t_lower = (bbox_min - ray_P) * inv_ray_D; + const float3 t_upper = (bbox_max - ray_P) * inv_ray_D; + + /* The four t-intervals (for x-/y-/z-slabs, and ray p(t)). */ + const float4 tmins = float3_to_float4(min(t_lower, t_upper), t_range->x); + const float4 tmaxes = float3_to_float4(max(t_lower, t_upper), t_range->y); + + /* Max of mins and min of maxes. */ + const float tmin = reduce_max(tmins); + const float tmax = reduce_min(tmaxes); + + *t_range = make_float2(tmin, tmax); + + return tmin < tmax; +} + +/* Find the segment of a ray defined by P + D * t that lies inside a cylinder defined by + * (x / len_u)^2 + (y / len_v)^2 = 1. */ +ccl_device_inline bool ray_infinite_cylinder_intersect(const float3 P, + const float3 D, + const float len_u, + const float len_v, + ccl_private float2 *t_range) +{ + /* Convert to a 2D problem. */ + const float2 inv_len = 1.0f / make_float2(len_u, len_v); + float2 P_proj = float3_to_float2(P) * inv_len; + const float2 D_proj = float3_to_float2(D) * inv_len; + + /* Solve quadratic equation a*t^2 + 2b*t + c = 0. */ + const float a = dot(D_proj, D_proj); + float b = dot(P_proj, D_proj); + + /* Move ray origin closer to the cylinder to prevent precision issue when the ray is far away. */ + const float t_mid = -b / a; + P_proj += D_proj * t_mid; + + /* Recompute b from the shifted origin. */ + b = dot(P_proj, D_proj); + const float c = dot(P_proj, P_proj) - 1.0f; + + float tmin, tmax; + const bool valid = solve_quadratic(a, 2.0f * b, c, tmin, tmax); + + return valid && intervals_intersect(t_range, make_float2(tmin, tmax) + t_mid); +} + +/* * + * Find the ray segment inside a single-sided cone. + * + * \param axis: a unit-length direction around which the cone has a circular symmetry + * \param P: the vector pointing from the cone apex to the ray origin + * \param D: the direction of the ray, does not need to have unit-length + * \param cos_angle_sq: `sqr(cos(half_aperture_of_the_cone))` + * \param t_range: the lower and upper bounds between which the ray lies inside the cone + * \return whether the intersection exists and is in the provided range + * + * See https://www.geometrictools.com/Documentation/IntersectionLineCone.pdf for illustration + */ +ccl_device_inline bool ray_cone_intersect(const float3 axis, + const float3 P, + float3 D, + const float cos_angle_sq, + ccl_private float2 *t_range) +{ + if (cos_angle_sq < 1e-4f) { + /* The cone is nearly a plane. */ + return ray_plane_intersect(axis, P, D, t_range); + } + + const float inv_len = inversesqrtf(len_squared(D)); + D *= inv_len; + + const float AD = dot(axis, D); + const float AP = dot(axis, P); + + const float a = sqr(AD) - cos_angle_sq; + const float b = 2.0f * (AD * AP - cos_angle_sq * dot(D, P)); + const float c = sqr(AP) - cos_angle_sq * dot(P, P); + + float tmin = 0.0f, tmax = FLT_MAX; + bool valid = solve_quadratic(a, b, c, tmin, tmax); + + /* Check if the intersections are in the same hemisphere as the cone. */ + const bool tmin_valid = AP + tmin * AD > 0.0f; + const bool tmax_valid = AP + tmax * AD > 0.0f; + + valid &= (tmin_valid || tmax_valid); + + if (!tmax_valid) { + tmax = tmin; + tmin = 0.0f; + } + else if (!tmin_valid) { + tmin = tmax; + tmax = FLT_MAX; + } + + return valid && intervals_intersect(t_range, make_float2(tmin, tmax) * inv_len); +} + CCL_NAMESPACE_END #endif /* __UTIL_MATH_INTERSECT_H__ */ diff --git a/intern/cycles/util/transform.h b/intern/cycles/util/transform.h index 208c68dc5a1..0263be7c841 100644 --- a/intern/cycles/util/transform.h +++ b/intern/cycles/util/transform.h @@ -161,6 +161,17 @@ ccl_device_inline Transform make_transform(float a, return t; } +ccl_device_inline Transform make_transform(const float3 x, const float3 y, const float3 z) +{ + Transform t; + + t.x = float3_to_float4(x, 0.0f); + t.y = float3_to_float4(y, 0.0f); + t.z = float3_to_float4(z, 0.0f); + + return t; +} + ccl_device_inline Transform euler_to_transform(const float3 euler) { float cx = cosf(euler.x); diff --git a/intern/ghost/intern/GHOST_WindowWayland.cc b/intern/ghost/intern/GHOST_WindowWayland.cc index 93b40b941e3..0b397732744 100644 --- a/intern/ghost/intern/GHOST_WindowWayland.cc +++ b/intern/ghost/intern/GHOST_WindowWayland.cc @@ -1886,6 +1886,30 @@ GHOST_WindowWayland::GHOST_WindowWayland(GHOST_SystemWayland *system, gwl_window_state_set(window_, state); } + /* NOTE(@ideasman42): Round trips are important before committing. + * This is needed because setting the state is likely to resize the window + * (in the case of maximized & full-screen), "normal" windows may still be resized when + * they are too large or with tiling window-managers. + * + * The additional updates allow for the actual size to be configured by the window manager + * which is read back before committing the surface. This avoids displaying the buffer + * before it's resized (avoiding flickering). + * + * Without the round-trip here: + * - The window will be created and this function will return using the requested buffer size, + * instead of the window size which ends up being used (causing a visible flicker). + * This has the down side that Blender's internal window state has the outdated size + * which then gets immediately resized, causing a noticeable glitch. + * - The window decorations will be displayed at the wrong size before refreshing + * at the new size. + * - On GNOME-Shell 46 shows the previous buffer-size under some conditions, see #119871. + * - 2x updates are needed for RIVER & HYPRLAND. + */ + for (int i = 0; i < 2; i++) { + wl_display_flush(system->wl_display_get()); + wl_display_dispatch(system->wl_display_get()); + } + /* Commit after setting the buffer. * While postponing until after the buffer drawing is context is set * isn't essential, it reduces flickering. */ diff --git a/scripts/startup/bl_ui/properties_data_armature.py b/scripts/startup/bl_ui/properties_data_armature.py index 7f14f4bd75b..a8582708dd4 100644 --- a/scripts/startup/bl_ui/properties_data_armature.py +++ b/scripts/startup/bl_ui/properties_data_armature.py @@ -158,7 +158,7 @@ class ARMATURE_MT_collection_tree_context_menu(Menu): # editable or not. That means this menu has to do the disabling for it. sub = layout.column() sub.enabled = not active_bcoll_is_locked - sub.operator("armature.collection_add", text="Add Child Collection") + sub.operator("armature.collection_add", text="Add Bone Collection") sub.operator("armature.collection_remove") sub.operator("armature.collection_remove_unused", text="Remove Unused Collections") diff --git a/scripts/startup/bl_ui/space_view3d.py b/scripts/startup/bl_ui/space_view3d.py index ef75e104829..1fbd1c136a4 100644 --- a/scripts/startup/bl_ui/space_view3d.py +++ b/scripts/startup/bl_ui/space_view3d.py @@ -2187,6 +2187,7 @@ class VIEW3D_MT_paint_grease_pencil(Menu): layout.separator() layout.menu("VIEW3D_MT_edit_greasepencil_showhide") + layout.menu("VIEW3D_MT_edit_greasepencil_cleanup") layout.separator() @@ -5803,6 +5804,15 @@ class VIEW3D_MT_edit_greasepencil_showhide(Menu): layout.operator("grease_pencil.layer_hide", text="Hide Inactive Layers").unselected = True +class VIEW3D_MT_edit_greasepencil_cleanup(Menu): + bl_label = "Cleanup" + + def draw(self, _context): + layout = self.layout + + layout.operator("grease_pencil.clean_loose") + + class VIEW3D_MT_edit_greasepencil(Menu): bl_label = "Grease Pencil" @@ -5828,7 +5838,7 @@ class VIEW3D_MT_edit_greasepencil(Menu): layout.menu("VIEW3D_MT_edit_greasepencil_showhide") layout.operator_menu_enum("grease_pencil.separate", "mode", text="Separate") - layout.operator("grease_pencil.clean_loose") + layout.menu("VIEW3D_MT_edit_greasepencil_cleanup") layout.separator() @@ -7526,14 +7536,14 @@ class VIEW3D_PT_snapping(Panel): layout = self.layout col = layout.column() - col.label(text="Snap With") + col.label(text="Snap Base") row = col.row(align=True) row.prop(tool_settings, "snap_target", expand=True) - col.label(text="Snap To") + col.label(text="Snap Target") col.prop(tool_settings, "snap_elements_base", expand=True) - col.label(text="Snap Individual Elements To") + col.label(text="Snap Target for Individual Elements") col.prop(tool_settings, "snap_elements_individual", expand=True) col.separator() @@ -8988,6 +8998,7 @@ classes = ( VIEW3D_MT_edit_gpencil_delete, VIEW3D_MT_edit_gpencil_showhide, VIEW3D_MT_edit_greasepencil_showhide, + VIEW3D_MT_edit_greasepencil_cleanup, VIEW3D_MT_weight_gpencil, VIEW3D_MT_gpencil_animation, VIEW3D_MT_gpencil_simplify, diff --git a/source/blender/blenkernel/intern/attribute_access.cc b/source/blender/blenkernel/intern/attribute_access.cc index 6b947417e91..f69184175a3 100644 --- a/source/blender/blenkernel/intern/attribute_access.cc +++ b/source/blender/blenkernel/intern/attribute_access.cc @@ -253,54 +253,6 @@ static AttributeIDRef attribute_id_from_custom_data_layer(const CustomDataLayer return layer.name; } -static bool add_builtin_type_custom_data_layer_from_init(CustomData &custom_data, - const eCustomDataType data_type, - const int domain_num, - const AttributeInit &initializer) -{ - switch (initializer.type) { - case AttributeInit::Type::Construct: { - void *data = CustomData_add_layer(&custom_data, data_type, CD_CONSTRUCT, domain_num); - return data != nullptr; - } - case AttributeInit::Type::DefaultValue: { - void *data = CustomData_add_layer(&custom_data, data_type, CD_SET_DEFAULT, domain_num); - return data != nullptr; - } - case AttributeInit::Type::VArray: { - void *data = CustomData_add_layer(&custom_data, data_type, CD_CONSTRUCT, domain_num); - if (data == nullptr) { - return false; - } - const GVArray &varray = static_cast(initializer).varray; - varray.materialize_to_uninitialized(varray.index_range(), data); - return true; - } - case AttributeInit::Type::MoveArray: { - void *src_data = static_cast(initializer).data; - const void *stored_data = CustomData_add_layer_with_data( - &custom_data, data_type, src_data, domain_num, nullptr); - if (stored_data == nullptr) { - return false; - } - if (stored_data != src_data) { - MEM_freeN(src_data); - return true; - } - return true; - } - case AttributeInit::Type::Shared: { - const AttributeInitShared &init = static_cast(initializer); - const void *stored_data = CustomData_add_layer_with_data( - &custom_data, data_type, const_cast(init.data), domain_num, init.sharing_info); - return stored_data != nullptr; - } - } - - BLI_assert_unreachable(); - return false; -} - static void *add_generic_custom_data_layer(CustomData &custom_data, const eCustomDataType data_type, const eCDAllocType alloctype, @@ -393,10 +345,7 @@ static bool custom_data_layer_matches_attribute_id(const CustomDataLayer &layer, bool BuiltinCustomDataLayerProvider::layer_exists(const CustomData &custom_data) const { - if (stored_as_named_attribute_) { - return CustomData_get_named_layer_index(&custom_data, stored_type_, name_) != -1; - } - return CustomData_has_layer(&custom_data, stored_type_); + return CustomData_get_named_layer_index(&custom_data, data_type_, name_) != -1; } GAttributeReader BuiltinCustomDataLayerProvider::try_get_for_read(const void *owner) const @@ -416,13 +365,7 @@ GAttributeReader BuiltinCustomDataLayerProvider::try_get_for_read(const void *ow return {}; } - int index; - if (stored_as_named_attribute_) { - index = CustomData_get_named_layer_index(custom_data, stored_type_, name_); - } - else { - index = CustomData_get_layer_index(custom_data, stored_type_); - } + const int index = CustomData_get_named_layer_index(custom_data, data_type_, name_); if (index == -1) { return {}; } @@ -452,13 +395,7 @@ GAttributeWriter BuiltinCustomDataLayerProvider::try_get_for_write(void *owner) return {}; } - void *data = nullptr; - if (stored_as_named_attribute_) { - data = CustomData_get_layer_named_for_write(custom_data, stored_type_, name_, element_num); - } - else { - data = CustomData_get_layer_for_write(custom_data, stored_type_, element_num); - } + void *data = CustomData_get_layer_named_for_write(custom_data, data_type_, name_, element_num); if (data == nullptr) { return {}; } @@ -475,57 +412,42 @@ bool BuiltinCustomDataLayerProvider::try_delete(void *owner) const return {}; } - auto update = [&]() { + const int element_num = custom_data_access_.get_element_num(owner); + if (CustomData_free_layer_named(custom_data, name_, element_num)) { if (update_on_change_ != nullptr) { update_on_change_(owner); } - }; - - const int element_num = custom_data_access_.get_element_num(owner); - if (stored_as_named_attribute_) { - if (CustomData_free_layer_named(custom_data, name_, element_num)) { - update(); - return true; - } - return false; - } - - const int layer_index = CustomData_get_layer_index(custom_data, stored_type_); - if (CustomData_free_layer(custom_data, stored_type_, element_num, layer_index)) { - update(); return true; } - return false; } bool BuiltinCustomDataLayerProvider::try_create(void *owner, const AttributeInit &initializer) const { - if (createable_ != Creatable) { - return false; - } CustomData *custom_data = custom_data_access_.get_custom_data(owner); if (custom_data == nullptr) { return false; } const int element_num = custom_data_access_.get_element_num(owner); - if (stored_as_named_attribute_) { - if (CustomData_has_layer_named(custom_data, data_type_, name_)) { - /* Exists already. */ - return false; - } - return add_custom_data_layer_from_attribute_init( - name_, *custom_data, stored_type_, element_num, initializer); - } - - if (CustomData_get_layer(custom_data, stored_type_) != nullptr) { + if (CustomData_has_layer_named(custom_data, data_type_, name_)) { /* Exists already. */ return false; } - return add_builtin_type_custom_data_layer_from_init( - *custom_data, stored_type_, element_num, initializer); + if (add_custom_data_layer_from_attribute_init( + name_, *custom_data, data_type_, element_num, initializer)) + { + if (initializer.type != AttributeInit::Type::Construct) { + /* Avoid calling update function when values are not initialized. In that case + * values must be set elsewhere anyway, which will cause a separate update tag. */ + if (update_on_change_ != nullptr) { + update_on_change_(owner); + } + } + return true; + } + return false; } bool BuiltinCustomDataLayerProvider::exists(const void *owner) const @@ -534,10 +456,7 @@ bool BuiltinCustomDataLayerProvider::exists(const void *owner) const if (custom_data == nullptr) { return false; } - if (stored_as_named_attribute_) { - return CustomData_has_layer_named(custom_data, stored_type_, name_); - } - return CustomData_get_layer(custom_data, stored_type_) != nullptr; + return CustomData_has_layer_named(custom_data, data_type_, name_); } GAttributeReader CustomDataAttributeProvider::try_get_for_read( diff --git a/source/blender/blenkernel/intern/attribute_access_intern.hh b/source/blender/blenkernel/intern/attribute_access_intern.hh index 663aab7f69c..ae372a00089 100644 --- a/source/blender/blenkernel/intern/attribute_access_intern.hh +++ b/source/blender/blenkernel/intern/attribute_access_intern.hh @@ -31,14 +31,10 @@ struct CustomDataAccessInfo { * A #BuiltinAttributeProvider is responsible for exactly one attribute on a geometry component. * The attribute is identified by its name and has a fixed domain and type. Builtin attributes do * not follow the same loose rules as other attributes, because they are mapped to internal - * "legacy" data structures. For example, some builtin attributes cannot be deleted. */ + * "legacy" data structures. For example, some builtin attributes cannot be deleted. + */ class BuiltinAttributeProvider { public: - /* Some utility enums to avoid hard to read booleans in function calls. */ - enum CreatableEnum { - Creatable, - NonCreatable, - }; enum DeletableEnum { Deletable, NonDeletable, @@ -48,7 +44,6 @@ class BuiltinAttributeProvider { const std::string name_; const AttrDomain domain_; const eCustomDataType data_type_; - const CreatableEnum createable_; const DeletableEnum deletable_; const AttributeValidator validator_; @@ -56,13 +51,11 @@ class BuiltinAttributeProvider { BuiltinAttributeProvider(std::string name, const AttrDomain domain, const eCustomDataType data_type, - const CreatableEnum createable, const DeletableEnum deletable, AttributeValidator validator = {}) : name_(std::move(name)), domain_(domain), data_type_(data_type), - createable_(createable), deletable_(deletable), validator_(validator) { @@ -174,27 +167,21 @@ class CustomDataAttributeProvider final : public DynamicAttributesProvider { */ class BuiltinCustomDataLayerProvider final : public BuiltinAttributeProvider { using UpdateOnChange = void (*)(void *owner); - const eCustomDataType stored_type_; const CustomDataAccessInfo custom_data_access_; const UpdateOnChange update_on_change_; - bool stored_as_named_attribute_; public: BuiltinCustomDataLayerProvider(std::string attribute_name, const AttrDomain domain, - const eCustomDataType attribute_type, - const eCustomDataType stored_type, - const CreatableEnum creatable, + const eCustomDataType data_type, const DeletableEnum deletable, const CustomDataAccessInfo custom_data_access, - const UpdateOnChange update_on_write, + const UpdateOnChange update_on_change, const AttributeValidator validator = {}) : BuiltinAttributeProvider( - std::move(attribute_name), domain, attribute_type, creatable, deletable, validator), - stored_type_(stored_type), + std::move(attribute_name), domain, data_type, deletable, validator), custom_data_access_(custom_data_access), - update_on_change_(update_on_write), - stored_as_named_attribute_(data_type_ == stored_type_) + update_on_change_(update_on_change) { } diff --git a/source/blender/blenkernel/intern/bake_items_serialize.cc b/source/blender/blenkernel/intern/bake_items_serialize.cc index 57b06df5343..de1620d1e64 100644 --- a/source/blender/blenkernel/intern/bake_items_serialize.cc +++ b/source/blender/blenkernel/intern/bake_items_serialize.cc @@ -972,9 +972,14 @@ static std::shared_ptr serialize_geometry_set(const GeometrySet auto io_references = io_instances->append_array("references"); for (const InstanceReference &reference : instances.references()) { - BLI_assert(reference.type() == InstanceReference::Type::GeometrySet); - io_references->append( - serialize_geometry_set(reference.geometry_set(), blob_writer, blob_sharing)); + if (reference.type() == InstanceReference::Type::GeometrySet) { + const GeometrySet &geometry = reference.geometry_set(); + io_references->append(serialize_geometry_set(geometry, blob_writer, blob_sharing)); + } + else { + /* TODO: Support serializing object and collection references. */ + io_references->append(serialize_geometry_set({}, blob_writer, blob_sharing)); + } } auto io_attributes = serialize_attributes( diff --git a/source/blender/blenkernel/intern/curves_geometry.cc b/source/blender/blenkernel/intern/curves_geometry.cc index ccb67ec91d0..a4faa76d98e 100644 --- a/source/blender/blenkernel/intern/curves_geometry.cc +++ b/source/blender/blenkernel/intern/curves_geometry.cc @@ -58,6 +58,8 @@ CurvesGeometry::CurvesGeometry() : CurvesGeometry(0, 0) {} CurvesGeometry::CurvesGeometry(const int point_num, const int curve_num) { + this->runtime = MEM_new(__func__); + this->point_num = point_num; this->curve_num = curve_num; CustomData_reset(&this->point_data); @@ -67,8 +69,6 @@ CurvesGeometry::CurvesGeometry(const int point_num, const int curve_num) this->attributes_for_write().add( "position", AttrDomain::Point, AttributeInitConstruct()); - this->runtime = MEM_new(__func__); - if (curve_num > 0) { this->curve_offsets = static_cast( MEM_malloc_arrayN(this->curve_num + 1, sizeof(int), __func__)); diff --git a/source/blender/blenkernel/intern/editmesh.cc b/source/blender/blenkernel/intern/editmesh.cc index 750be28c146..cc1c8cf16ea 100644 --- a/source/blender/blenkernel/intern/editmesh.cc +++ b/source/blender/blenkernel/intern/editmesh.cc @@ -91,7 +91,7 @@ void BKE_editmesh_looptris_calc_with_partial_ex(BMEditMesh *em, const BMeshCalcTessellation_Params *params) { BLI_assert(em->looptris.size() == poly_to_tri_count(em->bm->totface, em->bm->totloop)); - BLI_assert(!em->looptris.is_empty()); + BLI_assert(!(em->bm->totface && em->looptris.is_empty())); BM_mesh_calc_tessellation_with_partial_ex(em->bm, em->looptris, bmpinfo, params); } diff --git a/source/blender/blenkernel/intern/geometry_component_curves.cc b/source/blender/blenkernel/intern/geometry_component_curves.cc index 896480d0ad7..a59e14fe566 100644 --- a/source/blender/blenkernel/intern/geometry_component_curves.cc +++ b/source/blender/blenkernel/intern/geometry_component_curves.cc @@ -476,8 +476,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve() static BuiltinCustomDataLayerProvider position("position", AttrDomain::Point, CD_PROP_FLOAT3, - CD_PROP_FLOAT3, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::NonDeletable, point_access, tag_component_positions_changed); @@ -485,8 +483,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve() static BuiltinCustomDataLayerProvider radius("radius", AttrDomain::Point, CD_PROP_FLOAT, - CD_PROP_FLOAT, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, point_access, tag_component_radii_changed); @@ -494,8 +490,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve() static BuiltinCustomDataLayerProvider id("id", AttrDomain::Point, CD_PROP_INT32, - CD_PROP_INT32, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, point_access, nullptr); @@ -503,8 +497,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve() static BuiltinCustomDataLayerProvider tilt("tilt", AttrDomain::Point, CD_PROP_FLOAT, - CD_PROP_FLOAT, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, point_access, tag_component_normals_changed); @@ -512,8 +504,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve() static BuiltinCustomDataLayerProvider handle_right("handle_right", AttrDomain::Point, CD_PROP_FLOAT3, - CD_PROP_FLOAT3, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, point_access, tag_component_positions_changed); @@ -521,8 +511,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve() static BuiltinCustomDataLayerProvider handle_left("handle_left", AttrDomain::Point, CD_PROP_FLOAT3, - CD_PROP_FLOAT3, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, point_access, tag_component_positions_changed); @@ -536,8 +524,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve() static BuiltinCustomDataLayerProvider handle_type_right("handle_type_right", AttrDomain::Point, CD_PROP_INT8, - CD_PROP_INT8, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, point_access, tag_component_topology_changed, @@ -546,8 +532,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve() static BuiltinCustomDataLayerProvider handle_type_left("handle_type_left", AttrDomain::Point, CD_PROP_INT8, - CD_PROP_INT8, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, point_access, tag_component_topology_changed, @@ -556,21 +540,17 @@ static ComponentAttributeProviders create_attribute_providers_for_curve() static BuiltinCustomDataLayerProvider nurbs_weight("nurbs_weight", AttrDomain::Point, CD_PROP_FLOAT, - CD_PROP_FLOAT, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, point_access, tag_component_positions_changed); static const auto nurbs_order_clamp = mf::build::SI1_SO( "NURBS Order Validate", - [](int8_t value) { return std::max(value, 0); }, + [](int8_t value) { return std::max(value, 1); }, mf::build::exec_presets::AllSpanOrSingle()); static BuiltinCustomDataLayerProvider nurbs_order("nurbs_order", AttrDomain::Curve, CD_PROP_INT8, - CD_PROP_INT8, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, curve_access, tag_component_topology_changed, @@ -585,8 +565,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve() static BuiltinCustomDataLayerProvider normal_mode("normal_mode", AttrDomain::Curve, CD_PROP_INT8, - CD_PROP_INT8, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, curve_access, tag_component_normals_changed, @@ -595,8 +573,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve() static BuiltinCustomDataLayerProvider custom_normal("custom_normal", AttrDomain::Point, CD_PROP_FLOAT3, - CD_PROP_FLOAT3, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, point_access, tag_component_normals_changed); @@ -610,8 +586,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve() static BuiltinCustomDataLayerProvider nurbs_knots_mode("knots_mode", AttrDomain::Curve, CD_PROP_INT8, - CD_PROP_INT8, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, curve_access, tag_component_topology_changed, @@ -626,8 +600,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve() static BuiltinCustomDataLayerProvider curve_type("curve_type", AttrDomain::Curve, CD_PROP_INT8, - CD_PROP_INT8, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, curve_access, tag_component_curve_types_changed, @@ -640,8 +612,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve() static BuiltinCustomDataLayerProvider resolution("resolution", AttrDomain::Curve, CD_PROP_INT32, - CD_PROP_INT32, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, curve_access, tag_component_topology_changed, @@ -650,8 +620,6 @@ static ComponentAttributeProviders create_attribute_providers_for_curve() static BuiltinCustomDataLayerProvider cyclic("cyclic", AttrDomain::Curve, CD_PROP_BOOL, - CD_PROP_BOOL, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, curve_access, tag_component_topology_changed); diff --git a/source/blender/blenkernel/intern/geometry_component_instances.cc b/source/blender/blenkernel/intern/geometry_component_instances.cc index 8137526d7f8..c3a4892ad57 100644 --- a/source/blender/blenkernel/intern/geometry_component_instances.cc +++ b/source/blender/blenkernel/intern/geometry_component_instances.cc @@ -133,8 +133,6 @@ static ComponentAttributeProviders create_attribute_providers_for_instances() static BuiltinCustomDataLayerProvider id("id", AttrDomain::Instance, CD_PROP_INT32, - CD_PROP_INT32, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, instance_custom_data_access, nullptr); @@ -142,8 +140,6 @@ static ComponentAttributeProviders create_attribute_providers_for_instances() static BuiltinCustomDataLayerProvider instance_transform("instance_transform", AttrDomain::Instance, CD_PROP_FLOAT4X4, - CD_PROP_FLOAT4X4, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::NonDeletable, instance_custom_data_access, nullptr); @@ -152,8 +148,6 @@ static ComponentAttributeProviders create_attribute_providers_for_instances() static BuiltinCustomDataLayerProvider reference_index(".reference_index", AttrDomain::Instance, CD_PROP_INT32, - CD_PROP_INT32, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::NonDeletable, instance_custom_data_access, tag_component_reference_index_changed); diff --git a/source/blender/blenkernel/intern/geometry_component_mesh.cc b/source/blender/blenkernel/intern/geometry_component_mesh.cc index 9486615ed4f..e649853123f 100644 --- a/source/blender/blenkernel/intern/geometry_component_mesh.cc +++ b/source/blender/blenkernel/intern/geometry_component_mesh.cc @@ -1010,8 +1010,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh() static BuiltinCustomDataLayerProvider position("position", AttrDomain::Point, CD_PROP_FLOAT3, - CD_PROP_FLOAT3, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::NonDeletable, point_access, tag_component_positions_changed); @@ -1019,8 +1017,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh() static BuiltinCustomDataLayerProvider id("id", AttrDomain::Point, CD_PROP_INT32, - CD_PROP_INT32, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, point_access, nullptr); @@ -1035,8 +1031,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh() static BuiltinCustomDataLayerProvider material_index("material_index", AttrDomain::Face, CD_PROP_INT32, - CD_PROP_INT32, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, face_access, nullptr, @@ -1049,8 +1043,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh() static BuiltinCustomDataLayerProvider edge_verts(".edge_verts", AttrDomain::Edge, CD_PROP_INT32_2D, - CD_PROP_INT32_2D, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::NonDeletable, edge_access, nullptr, @@ -1065,8 +1057,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh() static BuiltinCustomDataLayerProvider corner_vert(".corner_vert", AttrDomain::Corner, CD_PROP_INT32, - CD_PROP_INT32, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::NonDeletable, corner_access, nullptr, @@ -1074,8 +1064,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh() static BuiltinCustomDataLayerProvider corner_edge(".corner_edge", AttrDomain::Corner, CD_PROP_INT32, - CD_PROP_INT32, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::NonDeletable, corner_access, nullptr, @@ -1084,8 +1072,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh() static BuiltinCustomDataLayerProvider sharp_face("sharp_face", AttrDomain::Face, CD_PROP_BOOL, - CD_PROP_BOOL, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, face_access, tag_component_sharpness_changed); @@ -1093,8 +1079,6 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh() static BuiltinCustomDataLayerProvider sharp_edge("sharp_edge", AttrDomain::Edge, CD_PROP_BOOL, - CD_PROP_BOOL, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, edge_access, tag_component_sharpness_changed); diff --git a/source/blender/blenkernel/intern/geometry_component_pointcloud.cc b/source/blender/blenkernel/intern/geometry_component_pointcloud.cc index 03a8ee2521d..4895f2d4323 100644 --- a/source/blender/blenkernel/intern/geometry_component_pointcloud.cc +++ b/source/blender/blenkernel/intern/geometry_component_pointcloud.cc @@ -147,24 +147,18 @@ static ComponentAttributeProviders create_attribute_providers_for_point_cloud() static BuiltinCustomDataLayerProvider position("position", AttrDomain::Point, CD_PROP_FLOAT3, - CD_PROP_FLOAT3, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::NonDeletable, point_access, tag_component_positions_changed); static BuiltinCustomDataLayerProvider radius("radius", AttrDomain::Point, CD_PROP_FLOAT, - CD_PROP_FLOAT, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, point_access, tag_component_radius_changed); static BuiltinCustomDataLayerProvider id("id", AttrDomain::Point, CD_PROP_INT32, - CD_PROP_INT32, - BuiltinAttributeProvider::Creatable, BuiltinAttributeProvider::Deletable, point_access, nullptr); diff --git a/source/blender/blenkernel/intern/pointcloud.cc b/source/blender/blenkernel/intern/pointcloud.cc index aa4a3c1d58e..141ac807479 100644 --- a/source/blender/blenkernel/intern/pointcloud.cc +++ b/source/blender/blenkernel/intern/pointcloud.cc @@ -62,11 +62,11 @@ static void pointcloud_init_data(ID *id) MEMCPY_STRUCT_AFTER(pointcloud, DNA_struct_default_get(PointCloud), id); + pointcloud->runtime = new blender::bke::PointCloudRuntime(); + CustomData_reset(&pointcloud->pdata); pointcloud->attributes_for_write().add( "position", blender::bke::AttrDomain::Point, blender::bke::AttributeInitConstruct()); - - pointcloud->runtime = new blender::bke::PointCloudRuntime(); } static void pointcloud_copy_data(Main * /*bmain*/, diff --git a/source/blender/compositor/CMakeLists.txt b/source/blender/compositor/CMakeLists.txt index 6b7c153ddf9..38315eafe5f 100644 --- a/source/blender/compositor/CMakeLists.txt +++ b/source/blender/compositor/CMakeLists.txt @@ -584,20 +584,6 @@ if(WITH_COMPOSITOR_CPU) ${CMAKE_CURRENT_BINARY_DIR}/operations ) - set(GENSRC_DIR ${CMAKE_CURRENT_BINARY_DIR}/operations) - set(GENSRC ${GENSRC_DIR}/COM_SMAAAreaTexture.h) - add_custom_command( - OUTPUT ${GENSRC} - COMMAND ${CMAKE_COMMAND} -E make_directory ${GENSRC_DIR} - COMMAND "$" ${GENSRC} - DEPENDS smaa_areatex - ) - list(APPEND SRC - ${GENSRC} - ) - unset(GENSRC) - unset(GENSRC_DIR) - if(WITH_OPENIMAGEDENOISE) add_definitions(-DWITH_OPENIMAGEDENOISE) add_definitions(-DOIDN_STATIC_LIB) diff --git a/source/blender/compositor/nodes/COM_AntiAliasingNode.cc b/source/blender/compositor/nodes/COM_AntiAliasingNode.cc index d2c68b7d041..3b73bdc4f1b 100644 --- a/source/blender/compositor/nodes/COM_AntiAliasingNode.cc +++ b/source/blender/compositor/nodes/COM_AntiAliasingNode.cc @@ -7,37 +7,41 @@ namespace blender::compositor { +/* Blender encodes the threshold in the [0, 1] range, while the SMAA algorithm expects it in + * the [0, 0.5] range. */ +static float get_threshold(const NodeAntiAliasingData *data) +{ + return data->threshold / 2.0f; +} + +/* Blender encodes the local contrast adaptation factor in the [0, 1] range, while the SMAA + * algorithm expects it in the [0, 10] range. */ +static float get_local_contrast_adaptation_factor(const NodeAntiAliasingData *data) +{ + return data->contrast_limit * 10.0f; +} + +/* Blender encodes the corner rounding factor in the float [0, 1] range, while the SMAA algorithm + * expects it in the integer [0, 100] range. */ +static int get_corner_rounding(const NodeAntiAliasingData *data) +{ + return int(data->corner_rounding * 100.0f); +} + void AntiAliasingNode::convert_to_operations(NodeConverter &converter, const CompositorContext & /*context*/) const { const bNode *node = this->get_bnode(); const NodeAntiAliasingData *data = (const NodeAntiAliasingData *)node->storage; - /* Edge Detection (First Pass) */ - SMAAEdgeDetectionOperation *operation1 = nullptr; + SMAAOperation *operation = new SMAAOperation(); + operation->set_threshold(get_threshold(data)); + operation->set_local_contrast_adaptation_factor(get_local_contrast_adaptation_factor(data)); + operation->set_corner_rounding(get_corner_rounding(data)); + converter.add_operation(operation); - operation1 = new SMAAEdgeDetectionOperation(); - operation1->set_threshold(data->threshold); - operation1->set_local_contrast_adaptation_factor(data->contrast_limit); - converter.add_operation(operation1); - - converter.map_input_socket(get_input_socket(0), operation1->get_input_socket(0)); - - /* Blending Weight Calculation Pixel Shader (Second Pass) */ - SMAABlendingWeightCalculationOperation *operation2 = - new SMAABlendingWeightCalculationOperation(); - operation2->set_corner_rounding(data->corner_rounding); - converter.add_operation(operation2); - - converter.add_link(operation1->get_output_socket(), operation2->get_input_socket(0)); - - /* Neighborhood Blending Pixel Shader (Third Pass) */ - SMAANeighborhoodBlendingOperation *operation3 = new SMAANeighborhoodBlendingOperation(); - converter.add_operation(operation3); - - converter.map_input_socket(get_input_socket(0), operation3->get_input_socket(0)); - converter.add_link(operation2->get_output_socket(), operation3->get_input_socket(1)); - converter.map_output_socket(get_output_socket(0), operation3->get_output_socket()); + converter.map_input_socket(get_input_socket(0), operation->get_input_socket(0)); + converter.map_output_socket(get_output_socket(0), operation->get_output_socket()); } } // namespace blender::compositor diff --git a/source/blender/compositor/nodes/COM_CornerPinNode.cc b/source/blender/compositor/nodes/COM_CornerPinNode.cc index 716f1e1bae6..66d9be04f3b 100644 --- a/source/blender/compositor/nodes/COM_CornerPinNode.cc +++ b/source/blender/compositor/nodes/COM_CornerPinNode.cc @@ -18,28 +18,13 @@ void CornerPinNode::convert_to_operations(NodeConverter &converter, PlaneCornerPinMaskOperation *plane_mask_operation = new PlaneCornerPinMaskOperation(); converter.add_operation(plane_mask_operation); - SMAAEdgeDetectionOperation *smaa_edge_detection = new SMAAEdgeDetectionOperation(); - converter.add_operation(smaa_edge_detection); + SMAAOperation *smaa_operation = new SMAAOperation(); + converter.add_operation(smaa_operation); converter.add_link(plane_mask_operation->get_output_socket(), - smaa_edge_detection->get_input_socket(0)); + smaa_operation->get_input_socket(0)); - SMAABlendingWeightCalculationOperation *smaa_blending_weights = - new SMAABlendingWeightCalculationOperation(); - converter.add_operation(smaa_blending_weights); - - converter.add_link(smaa_edge_detection->get_output_socket(), - smaa_blending_weights->get_input_socket(0)); - - SMAANeighborhoodBlendingOperation *smaa_neighborhood = new SMAANeighborhoodBlendingOperation(); - converter.add_operation(smaa_neighborhood); - - converter.add_link(plane_mask_operation->get_output_socket(), - smaa_neighborhood->get_input_socket(0)); - converter.add_link(smaa_blending_weights->get_output_socket(), - smaa_neighborhood->get_input_socket(1)); - - converter.map_output_socket(this->get_output_socket(1), smaa_neighborhood->get_output_socket()); + converter.map_output_socket(this->get_output_socket(1), smaa_operation->get_output_socket()); PlaneCornerPinWarpImageOperation *warp_image_operation = new PlaneCornerPinWarpImageOperation(); converter.add_operation(warp_image_operation); @@ -62,7 +47,7 @@ void CornerPinNode::convert_to_operations(NodeConverter &converter, converter.add_operation(set_alpha_operation); converter.add_link(warp_image_operation->get_output_socket(), set_alpha_operation->get_input_socket(0)); - converter.add_link(smaa_neighborhood->get_output_socket(), + converter.add_link(smaa_operation->get_output_socket(), set_alpha_operation->get_input_socket(1)); converter.map_output_socket(this->get_output_socket(0), set_alpha_operation->get_output_socket()); diff --git a/source/blender/compositor/nodes/COM_DilateErodeNode.cc b/source/blender/compositor/nodes/COM_DilateErodeNode.cc index f087b42e507..731fdf89aaa 100644 --- a/source/blender/compositor/nodes/COM_DilateErodeNode.cc +++ b/source/blender/compositor/nodes/COM_DilateErodeNode.cc @@ -37,26 +37,10 @@ void DilateErodeNode::convert_to_operations(NodeConverter &converter, converter.map_input_socket(get_input_socket(0), operation->get_input_socket(0)); if (editor_node->custom3 < 2.0f) { - SMAAEdgeDetectionOperation *smaa_edge_detection = new SMAAEdgeDetectionOperation(); - converter.add_operation(smaa_edge_detection); - - converter.add_link(operation->get_output_socket(), smaa_edge_detection->get_input_socket(0)); - - SMAABlendingWeightCalculationOperation *smaa_blending_weights = - new SMAABlendingWeightCalculationOperation(); - converter.add_operation(smaa_blending_weights); - - converter.add_link(smaa_edge_detection->get_output_socket(), - smaa_blending_weights->get_input_socket(0)); - - SMAANeighborhoodBlendingOperation *smaa_neighborhood = - new SMAANeighborhoodBlendingOperation(); - converter.add_operation(smaa_neighborhood); - - converter.add_link(operation->get_output_socket(), smaa_neighborhood->get_input_socket(0)); - converter.add_link(smaa_blending_weights->get_output_socket(), - smaa_neighborhood->get_input_socket(1)); - converter.map_output_socket(get_output_socket(0), smaa_neighborhood->get_output_socket()); + SMAAOperation *smaa_operation = new SMAAOperation(); + converter.add_operation(smaa_operation); + converter.add_link(operation->get_output_socket(), smaa_operation->get_input_socket(0)); + converter.map_output_socket(get_output_socket(0), smaa_operation->get_output_socket()); } else { converter.map_output_socket(get_output_socket(0), operation->get_output_socket(0)); diff --git a/source/blender/compositor/nodes/COM_IDMaskNode.cc b/source/blender/compositor/nodes/COM_IDMaskNode.cc index a0b712889fe..98a0dc638e9 100644 --- a/source/blender/compositor/nodes/COM_IDMaskNode.cc +++ b/source/blender/compositor/nodes/COM_IDMaskNode.cc @@ -27,27 +27,10 @@ void IDMaskNode::convert_to_operations(NodeConverter &converter, converter.map_output_socket(get_output_socket(0), operation->get_output_socket(0)); } else { - SMAAEdgeDetectionOperation *operation1 = nullptr; - - operation1 = new SMAAEdgeDetectionOperation(); - converter.add_operation(operation1); - - converter.add_link(operation->get_output_socket(0), operation1->get_input_socket(0)); - - /* Blending Weight Calculation Pixel Shader (Second Pass). */ - SMAABlendingWeightCalculationOperation *operation2 = - new SMAABlendingWeightCalculationOperation(); - converter.add_operation(operation2); - - converter.add_link(operation1->get_output_socket(), operation2->get_input_socket(0)); - - /* Neighborhood Blending Pixel Shader (Third Pass). */ - SMAANeighborhoodBlendingOperation *operation3 = new SMAANeighborhoodBlendingOperation(); - converter.add_operation(operation3); - - converter.add_link(operation->get_output_socket(0), operation3->get_input_socket(0)); - converter.add_link(operation2->get_output_socket(), operation3->get_input_socket(1)); - converter.map_output_socket(get_output_socket(0), operation3->get_output_socket()); + SMAAOperation *smaa_operation = new SMAAOperation(); + converter.add_operation(smaa_operation); + converter.add_link(operation->get_output_socket(0), smaa_operation->get_input_socket(0)); + converter.map_output_socket(get_output_socket(0), smaa_operation->get_output_socket()); } } diff --git a/source/blender/compositor/nodes/COM_PlaneTrackDeformNode.cc b/source/blender/compositor/nodes/COM_PlaneTrackDeformNode.cc index 21f3d26202b..8a02e95a855 100644 --- a/source/blender/compositor/nodes/COM_PlaneTrackDeformNode.cc +++ b/source/blender/compositor/nodes/COM_PlaneTrackDeformNode.cc @@ -35,28 +35,13 @@ void PlaneTrackDeformNode::convert_to_operations(NodeConverter &converter, } converter.add_operation(plane_mask_operation); - SMAAEdgeDetectionOperation *smaa_edge_detection = new SMAAEdgeDetectionOperation(); - converter.add_operation(smaa_edge_detection); + SMAAOperation *smaa_operation = new SMAAOperation(); + converter.add_operation(smaa_operation); converter.add_link(plane_mask_operation->get_output_socket(), - smaa_edge_detection->get_input_socket(0)); + smaa_operation->get_input_socket(0)); - SMAABlendingWeightCalculationOperation *smaa_blending_weights = - new SMAABlendingWeightCalculationOperation(); - converter.add_operation(smaa_blending_weights); - - converter.add_link(smaa_edge_detection->get_output_socket(), - smaa_blending_weights->get_input_socket(0)); - - SMAANeighborhoodBlendingOperation *smaa_neighborhood = new SMAANeighborhoodBlendingOperation(); - converter.add_operation(smaa_neighborhood); - - converter.add_link(plane_mask_operation->get_output_socket(), - smaa_neighborhood->get_input_socket(0)); - converter.add_link(smaa_blending_weights->get_output_socket(), - smaa_neighborhood->get_input_socket(1)); - - converter.map_output_socket(this->get_output_socket(1), smaa_neighborhood->get_output_socket()); + converter.map_output_socket(this->get_output_socket(1), smaa_operation->get_output_socket()); PlaneTrackWarpImageOperation *warp_image_operation = new PlaneTrackWarpImageOperation(); warp_image_operation->set_movie_clip(clip); @@ -75,7 +60,7 @@ void PlaneTrackDeformNode::convert_to_operations(NodeConverter &converter, converter.add_operation(set_alpha_operation); converter.add_link(warp_image_operation->get_output_socket(), set_alpha_operation->get_input_socket(0)); - converter.add_link(smaa_neighborhood->get_output_socket(), + converter.add_link(smaa_operation->get_output_socket(), set_alpha_operation->get_input_socket(1)); converter.map_output_socket(this->get_output_socket(0), set_alpha_operation->get_output_socket()); diff --git a/source/blender/compositor/nodes/COM_ZCombineNode.cc b/source/blender/compositor/nodes/COM_ZCombineNode.cc index f8dd36d1db3..ed79727b7cd 100644 --- a/source/blender/compositor/nodes/COM_ZCombineNode.cc +++ b/source/blender/compositor/nodes/COM_ZCombineNode.cc @@ -54,25 +54,10 @@ void ZCombineNode::convert_to_operations(NodeConverter &converter, converter.map_input_socket(get_input_socket(3), maskoperation->get_input_socket(1)); /* Step 2 anti alias mask bit of an expensive operation, but does the trick. */ - SMAAEdgeDetectionOperation *smaa_edge_detection = new SMAAEdgeDetectionOperation(); - converter.add_operation(smaa_edge_detection); + SMAAOperation *smaa_operation = new SMAAOperation(); + converter.add_operation(smaa_operation); - converter.add_link(maskoperation->get_output_socket(), - smaa_edge_detection->get_input_socket(0)); - - SMAABlendingWeightCalculationOperation *smaa_blending_weights = - new SMAABlendingWeightCalculationOperation(); - converter.add_operation(smaa_blending_weights); - - converter.add_link(smaa_edge_detection->get_output_socket(), - smaa_blending_weights->get_input_socket(0)); - - SMAANeighborhoodBlendingOperation *smaa_neighborhood = new SMAANeighborhoodBlendingOperation(); - converter.add_operation(smaa_neighborhood); - - converter.add_link(maskoperation->get_output_socket(), smaa_neighborhood->get_input_socket(0)); - converter.add_link(smaa_blending_weights->get_output_socket(), - smaa_neighborhood->get_input_socket(1)); + converter.add_link(maskoperation->get_output_socket(), smaa_operation->get_input_socket(0)); /* use mask to blend between the input colors. */ ZCombineMaskOperation *zcombineoperation = this->get_bnode()->custom1 ? @@ -80,7 +65,7 @@ void ZCombineNode::convert_to_operations(NodeConverter &converter, new ZCombineMaskOperation(); converter.add_operation(zcombineoperation); - converter.add_link(smaa_neighborhood->get_output_socket(), + converter.add_link(smaa_operation->get_output_socket(), zcombineoperation->get_input_socket(0)); converter.map_input_socket(get_input_socket(0), zcombineoperation->get_input_socket(1)); converter.map_input_socket(get_input_socket(2), zcombineoperation->get_input_socket(2)); diff --git a/source/blender/compositor/operations/COM_SMAAOperation.cc b/source/blender/compositor/operations/COM_SMAAOperation.cc index f49a069a81e..bb96c20c94d 100644 --- a/source/blender/compositor/operations/COM_SMAAOperation.cc +++ b/source/blender/compositor/operations/COM_SMAAOperation.cc @@ -1,805 +1,1514 @@ -/* SPDX-FileCopyrightText: 2024 Blender Authors +/* SPDX-FileCopyrightText: 2013 Jorge Jimenez + * SPDX-FileCopyrightText: 2013 Jose I. Echevarria + * SPDX-FileCopyrightText: 2013 Belen Masia + * SPDX-FileCopyrightText: 2013 Fernando Navarro + * SPDX-FileCopyrightText: 2013 Diego Gutierrez + * SPDX-FileCopyrightText: 2019-2023 Blender Authors * - * SPDX-License-Identifier: GPL-2.0-or-later */ + * SPDX-License-Identifier: MIT AND GPL-2.0-or-later */ -#include "COM_SMAAOperation.h" -#include "BKE_node.hh" -#include "COM_SMAAAreaTexture.h" +#include "BLI_math_vector.h" +#include "BLI_math_vector.hh" +#include "BLI_smaa_textures.h" +#include "BLI_span.hh" +#include "BLI_task.hh" #include "IMB_colormanagement.hh" +#include "COM_MemoryBuffer.h" +#include "COM_SMAAOperation.h" + +/** + * _______ ___ ___ ___ ___ + * / || \/ | / \ / \ + * | (---- | \ / | / ^ \ / ^ \ + * \ \ | |\/| | / /_\ \ / /_\ \ + * ----) | | | | | / _____ \ / _____ \ + * |_______/ |__| |__| /__/ \__\ /__/ \__\ + * + * E N H A N C E D + * S U B P I X E L M O R P H O L O G I C A L A N T I A L I A S I N G + * + * http://www.iryoku.com/smaa/ + * + * Hi, welcome aboard! + * + * Here you'll find instructions to get the shader up and running as fast as + * possible. + * + * IMPORTANTE NOTICE: when updating, remember to update both this file and the + * precomputed textures! They may change from version to version. + * + * The shader has three passes, chained together as follows: + * + * |input|------------------� + * v | + * [ SMAA*EdgeDetection ] | + * v | + * |edgesTex| | + * v | + * [ SMAABlendingWeightCalculation ] | + * v | + * |blendTex| | + * v | + * [ SMAANeighborhoodBlending ] <------� + * v + * |output| + * + * Note that each [pass] has its own vertex and pixel shader. Remember to use + * oversized triangles instead of quads to avoid overshading along the + * diagonal. + * + * You've three edge detection methods to choose from: luma, color or depth. + * They represent different quality/performance and anti-aliasing/sharpness + * tradeoffs, so our recommendation is for you to choose the one that best + * suits your particular scenario: + * + * - Depth edge detection is usually the fastest but it may miss some edges. + * + * - Luma edge detection is usually more expensive than depth edge detection, + * but catches visible edges that depth edge detection can miss. + * + * - Color edge detection is usually the most expensive one but catches + * chroma-only edges. + * + * For quickstarters: just use luma edge detection. + * + * The general advice is to not rush the integration process and ensure each + * step is done correctly (don't try to integrate SMAA T2x with predicated edge + * detection from the start!). Ok then, let's go! + * + * 1. The first step is to create two RGBA temporal render targets for holding + * |edgesTex| and |blendTex|. + * + * In DX10 or DX11, you can use a RG render target for the edges texture. + * In the case of NVIDIA GPUs, using RG render targets seems to actually be + * slower. + * + * On the Xbox 360, you can use the same render target for resolving both + * |edgesTex| and |blendTex|, as they aren't needed simultaneously. + * + * 2. Both temporal render targets |edgesTex| and |blendTex| must be cleared + * each frame. Do not forget to clear the alpha channel! + * + * 3. The next step is loading the two supporting precalculated textures, + * 'areaTex' and 'searchTex'. You'll find them in the 'Textures' folder as + * C++ headers, and also as regular DDS files. They'll be needed for the + * 'SMAABlendingWeightCalculation' pass. + * + * If you use the C++ headers, be sure to load them in the format specified + * inside of them. + * + * You can also compress 'areaTex' and 'searchTex' using BC5 and BC4 + * respectively, if you have that option in your content processor pipeline. + * When compressing then, you get a non-perceptible quality decrease, and a + * marginal performance increase. + * + * 4. All samplers must be set to linear filtering and clamp. + * + * After you get the technique working, remember that 64-bit inputs have + * half-rate linear filtering on GCN. + * + * If SMAA is applied to 64-bit color buffers, switching to point filtering + * when accessing them will increase the performance. Search for + * 'SMAASamplePoint' to see which textures may benefit from point + * filtering, and where (which is basically the color input in the edge + * detection and resolve passes). + * + * 5. All texture reads and buffer writes must be non-sRGB, with the exception + * of the input read and the output write in + * 'SMAANeighborhoodBlending' (and only in this pass!). If sRGB reads in + * this last pass are not possible, the technique will work anyway, but + * will perform antialiasing in gamma space. + * + * IMPORTANT: for best results the input read for the color/luma edge + * detection should *NOT* be sRGB. + * + * 6. Before including SMAA.h you'll have to setup the render target metrics, + * the target and any optional configuration defines. Optionally you can + * use a preset. + * + * You have the following targets available: + * SMAA_HLSL_3 + * SMAA_HLSL_4 + * SMAA_HLSL_4_1 + * SMAA_GLSL_3 * + * SMAA_GLSL_4 * + * + * * (See SMAA_INCLUDE_VS and SMAA_INCLUDE_PS below). + * + * And four presets: + * SMAA_PRESET_LOW (%60 of the quality) + * SMAA_PRESET_MEDIUM (%80 of the quality) + * SMAA_PRESET_HIGH (%95 of the quality) + * SMAA_PRESET_ULTRA (%99 of the quality) + * + * For example: + * #define SMAA_RT_METRICS float4(1.0 / 1280.0, 1.0 / 720.0, 1280.0, 720.0) + * #define SMAA_HLSL_4 + * #define SMAA_PRESET_HIGH + * #include "SMAA.h" + * + * Note that SMAA_RT_METRICS doesn't need to be a macro, it can be a + * uniform variable. The code is designed to minimize the impact of not + * using a constant value, but it is still better to hardcode it. + * + * Depending on how you encoded 'areaTex' and 'searchTex', you may have to + * add (and customize) the following defines before including SMAA.h: + * #define SMAA_AREATEX_SELECT(sample) sample.rg + * #define SMAA_SEARCHTEX_SELECT(sample) sample.r + * + * If your engine is already using porting macros, you can define + * SMAA_CUSTOM_SL, and define the porting functions by yourself. + * + * 7. Then, you'll have to setup the passes as indicated in the scheme above. + * You can take a look into SMAA.fx, to see how we did it for our demo. + * Checkout the function wrappers, you may want to copy-paste them! + * + * 8. It's recommended to validate the produced |edgesTex| and |blendTex|. + * You can use a screenshot from your engine to compare the |edgesTex| + * and |blendTex| produced inside of the engine with the results obtained + * with the reference demo. + * + * 9. After you get the last pass to work, it's time to optimize. You'll have + * to initialize a stencil buffer in the first pass (discard is already in + * the code), then mask execution by using it the second pass. The last + * pass should be executed in all pixels. + * + * + * After this point you can choose to enable predicated thresholding, + * temporal supersampling and motion blur integration: + * + * a) If you want to use predicated thresholding, take a look into + * SMAA_PREDICATION; you'll need to pass an extra texture in the edge + * detection pass. + * + * b) If you want to enable temporal supersampling (SMAA T2x): + * + * 1. The first step is to render using subpixel jitters. I won't go into + * detail, but it's as simple as moving each vertex position in the + * vertex shader, you can check how we do it in our DX10 demo. + * + * 2. Then, you must setup the temporal resolve. You may want to take a look + * into SMAAResolve for resolving 2x modes. After you get it working, you'll + * probably see ghosting everywhere. But fear not, you can enable the + * CryENGINE temporal reprojection by setting the SMAA_REPROJECTION macro. + * Check out SMAA_DECODE_VELOCITY if your velocity buffer is encoded. + * + * 3. The next step is to apply SMAA to each subpixel jittered frame, just as + * done for 1x. + * + * 4. At this point you should already have something usable, but for best + * results the proper area textures must be set depending on current jitter. + * For this, the parameter 'subsampleIndices' of + * 'SMAABlendingWeightCalculationPS' must be set as follows, for our T2x + * mode: + * + * @SUBSAMPLE_INDICES + * + * | S# | Camera Jitter | subsampleIndices | + * +----+------------------+---------------------+ + * | 0 | ( 0.25, -0.25) | float4(1, 1, 1, 0) | + * | 1 | (-0.25, 0.25) | float4(2, 2, 2, 0) | + * + * These jitter positions assume a bottom-to-top y axis. S# stands for the + * sample number. + * + * More information about temporal supersampling here: + * http://iryoku.com/aacourse/downloads/13-Anti-Aliasing-Methods-in-CryENGINE-3.pdf + * + * c) If you want to enable spatial multisampling (SMAA S2x): + * + * 1. The scene must be rendered using MSAA 2x. The MSAA 2x buffer must be + * created with: + * - DX10: see below (*) + * - DX10.1: D3D10_STANDARD_MULTISAMPLE_PATTERN or + * - DX11: D3D11_STANDARD_MULTISAMPLE_PATTERN + * + * This allows to ensure that the subsample order matches the table in + * @SUBSAMPLE_INDICES. + * + * (*) In the case of DX10, we refer the reader to: + * - SMAA::detectMSAAOrder and + * - SMAA::msaaReorder + * + * These functions allow matching the standard multisample patterns by + * detecting the subsample order for a specific GPU, and reordering + * them appropriately. + * + * 2. A shader must be run to output each subsample into a separate buffer + * (DX10 is required). You can use SMAASeparate for this purpose, or just do + * it in an existing pass (for example, in the tone mapping pass, which has + * the advantage of feeding tone mapped subsamples to SMAA, which will yield + * better results). + * + * 3. The full SMAA 1x pipeline must be run for each separated buffer, storing + * the results in the final buffer. The second run should alpha blend with + * the existing final buffer using a blending factor of 0.5. + * 'subsampleIndices' must be adjusted as in the SMAA T2x case (see point + * b). + * + * d) If you want to enable temporal supersampling on top of SMAA S2x + * (which actually is SMAA 4x): + * + * 1. SMAA 4x consists on temporally jittering SMAA S2x, so the first step is + * to calculate SMAA S2x for current frame. In this case, 'subsampleIndices' + * must be set as follows: + * + * | F# | S# | Camera Jitter | Net Jitter | subsampleIndices | + * +----+----+--------------------+-------------------+----------------------+ + * | 0 | 0 | ( 0.125, 0.125) | ( 0.375, -0.125) | float4(5, 3, 1, 3) | + * | 0 | 1 | ( 0.125, 0.125) | (-0.125, 0.375) | float4(4, 6, 2, 3) | + * +----+----+--------------------+-------------------+----------------------+ + * | 1 | 2 | (-0.125, -0.125) | ( 0.125, -0.375) | float4(3, 5, 1, 4) | + * | 1 | 3 | (-0.125, -0.125) | (-0.375, 0.125) | float4(6, 4, 2, 4) | + * + * These jitter positions assume a bottom-to-top y axis. F# stands for the + * frame number. S# stands for the sample number. + * + * 2. After calculating SMAA S2x for current frame (with the new subsample + * indices), previous frame must be reprojected as in SMAA T2x mode (see + * point b). + * + * e) If motion blur is used, you may want to do the edge detection pass + * together with motion blur. This has two advantages: + * + * 1. Pixels under heavy motion can be omitted from the edge detection process. + * For these pixels we can just store "no edge", as motion blur will take + * care of them. + * 2. The center pixel tap is reused. + * + * Note that in this case depth testing should be used instead of stenciling, + * as we have to write all the pixels in the motion blur pass. + * + * That's it! + */ + +/* ---------------------------------------------------------------------------- + * Blender's Defines */ + +#define SMAA_CUSTOM_SL +#define SMAA_AREATEX_SELECT(sample) sample.xy() +#define SMAA_SEARCHTEX_SELECT(sample) sample.x +#define SMAATexture2D(tex) const MemoryBuffer *tex +#define SMAATexturePass2D(tex) tex +#define SMAASampleLevelZero(tex, coord) tex->texture_bilinear_extend(coord) +#define SMAASampleLevelZeroPoint(tex, coord) tex->texture_bilinear_extend(coord) +#define SMAASampleLevelZeroOffset(tex, coord, offset, size) \ + tex->texture_bilinear_extend(coord + float2(offset) / float2(size)) +#define SMAASample(tex, coord) tex->texture_bilinear_extend(coord) +#define SMAASamplePoint(tex, coord) tex->texture_nearest_extend(coord) +#define SMAASamplePointOffset(tex, coord, offset, size) \ + tex->texture_nearest_extend(coord + float2(offset) / float2(size)) +#define SMAASampleOffset(tex, coord, offset, size) \ + tex->texture_bilinear_extend(coord + float2(offset) / float2(size)) +#define SMAA_FLATTEN +#define SMAA_BRANCH +#define lerp(a, b, t) math::interpolate(a, b, t) +#define saturate(a) math::clamp(a, 0.0f, 1.0f) +#define mad(a, b, c) (a * b + c) + +/* ---------------------------------------------------------------------------- + * SMAA Presets */ + +/** + * Note that if you use one of these presets, the following configuration + * macros will be ignored if set in the "Configurable Defines" section. + */ + +#if defined(SMAA_PRESET_LOW) +# define SMAA_THRESHOLD 0.15f +# define SMAA_MAX_SEARCH_STEPS 4 +# define SMAA_DISABLE_DIAG_DETECTION +# define SMAA_DISABLE_CORNER_DETECTION +#elif defined(SMAA_PRESET_MEDIUM) +# define SMAA_THRESHOLD 0.1f +# define SMAA_MAX_SEARCH_STEPS 8 +# define SMAA_DISABLE_DIAG_DETECTION +# define SMAA_DISABLE_CORNER_DETECTION +#elif defined(SMAA_PRESET_HIGH) +# define SMAA_THRESHOLD 0.1f +# define SMAA_MAX_SEARCH_STEPS 16 +# define SMAA_MAX_SEARCH_STEPS_DIAG 8 +# define SMAA_CORNER_ROUNDING 25 +#elif defined(SMAA_PRESET_ULTRA) +# define SMAA_THRESHOLD 0.05f +# define SMAA_MAX_SEARCH_STEPS 32 +# define SMAA_MAX_SEARCH_STEPS_DIAG 16 +# define SMAA_CORNER_ROUNDING 25 +#endif + +/* ---------------------------------------------------------------------------- + * Configurable Defines */ + +/** + * SMAA_THRESHOLD specifies the threshold or sensitivity to edges. + * Lowering this value you will be able to detect more edges at the expense of + * performance. + * + * Range: [0, 0.5] + * 0.1 is a reasonable value, and allows to catch most visible edges. + * 0.05 is a rather overkill value, that allows to catch 'em all. + * + * If temporal supersampling is used, 0.2 could be a reasonable value, as low + * contrast edges are properly filtered by just 2x. + */ +#ifndef SMAA_THRESHOLD +# define SMAA_THRESHOLD 0.1f +#endif + +/** + * SMAA_DEPTH_THRESHOLD specifies the threshold for depth edge detection. + * + * Range: depends on the depth range of the scene. + */ +#ifndef SMAA_DEPTH_THRESHOLD +# define SMAA_DEPTH_THRESHOLD (0.1f * SMAA_THRESHOLD) +#endif + +/** + * SMAA_MAX_SEARCH_STEPS specifies the maximum steps performed in the + * horizontal/vertical pattern searches, at each side of the pixel. + * + * In number of pixels, it's actually the double. So the maximum line length + * perfectly handled by, for example 16, is 64 (by perfectly, we meant that + * longer lines won't look as good, but still antialiased). + * + * Range: [0, 112] + */ +#ifndef SMAA_MAX_SEARCH_STEPS +# define SMAA_MAX_SEARCH_STEPS 16 +#endif + +/** + * SMAA_MAX_SEARCH_STEPS_DIAG specifies the maximum steps performed in the + * diagonal pattern searches, at each side of the pixel. In this case we jump + * one pixel at time, instead of two. + * + * Range: [0, 20] + * + * On high-end machines it is cheap (between a 0.8x and 0.9x slower for 16 + * steps), but it can have a significant impact on older machines. + * + * Define SMAA_DISABLE_DIAG_DETECTION to disable diagonal processing. + */ +#ifndef SMAA_MAX_SEARCH_STEPS_DIAG +# define SMAA_MAX_SEARCH_STEPS_DIAG 8 +#endif + +/** + * SMAA_CORNER_ROUNDING specifies how much sharp corners will be rounded. + * + * Range: [0, 100] + * + * Define SMAA_DISABLE_CORNER_DETECTION to disable corner processing. + */ +#ifndef SMAA_CORNER_ROUNDING +# define SMAA_CORNER_ROUNDING 25 +#endif + +/** + * If there is an neighbor edge that has SMAA_LOCAL_CONTRAST_FACTOR times + * bigger contrast than current edge, current edge will be discarded. + * + * This allows to eliminate spurious crossing edges, and is based on the fact + * that, if there is too much contrast in a direction, that will hide + * perceptually contrast in the other neighbors. + */ +#ifndef SMAA_LOCAL_CONTRAST_ADAPTATION_FACTOR +# define SMAA_LOCAL_CONTRAST_ADAPTATION_FACTOR 2.0f +#endif + +/** + * Predicated thresholding allows to better preserve texture details and to + * improve performance, by decreasing the number of detected edges using an + * additional buffer like the light accumulation buffer, object ids or even the + * depth buffer (the depth buffer usage may be limited to indoor or short range + * scenes). + * + * It locally decreases the luma or color threshold if an edge is found in an + * additional buffer (so the global threshold can be higher). + * + * This method was developed by Playstation EDGE MLAA team, and used in + * Killzone 3, by using the light accumulation buffer. More information here: + * http://iryoku.com/aacourse/downloads/06-MLAA-on-PS3.pptx + */ +#ifndef SMAA_PREDICATION +# define SMAA_PREDICATION 0 +#endif + +/** + * Threshold to be used in the additional predication buffer. + * + * Range: depends on the input, so you'll have to find the magic number that + * works for you. + */ +#ifndef SMAA_PREDICATION_THRESHOLD +# define SMAA_PREDICATION_THRESHOLD 0.01f +#endif + +/** + * How much to scale the global threshold used for luma or color edge + * detection when using predication. + * + * Range: [1, 5] + */ +#ifndef SMAA_PREDICATION_SCALE +# define SMAA_PREDICATION_SCALE 2.0f +#endif + +/** + * How much to locally decrease the threshold. + * + * Range: [0, 1] + */ +#ifndef SMAA_PREDICATION_STRENGTH +# define SMAA_PREDICATION_STRENGTH 0.4f +#endif + +/** + * Temporal reprojection allows to remove ghosting artifacts when using + * temporal supersampling. We use the CryEngine 3 method which also introduces + * velocity weighting. This feature is of extreme importance for totally + * removing ghosting. More information here: + * http://iryoku.com/aacourse/downloads/13-Anti-Aliasing-Methods-in-CryENGINE-3.pdf + * + * Note that you'll need to setup a velocity buffer for enabling reprojection. + * For static geometry, saving the previous depth buffer is a viable + * alternative. + */ +#ifndef SMAA_REPROJECTION +# define SMAA_REPROJECTION 0 +#endif + +/** + * SMAA_REPROJECTION_WEIGHT_SCALE controls the velocity weighting. It allows to + * remove ghosting trails behind the moving object, which are not removed by + * just using reprojection. Using low values will exhibit ghosting, while using + * high values will disable temporal supersampling under motion. + * + * Behind the scenes, velocity weighting removes temporal supersampling when + * the velocity of the subsamples differs (meaning they are different objects). + * + * Range: [0, 80] + */ +#ifndef SMAA_REPROJECTION_WEIGHT_SCALE +# define SMAA_REPROJECTION_WEIGHT_SCALE 30.0f +#endif + +/** + * On some compilers, discard cannot be used in vertex shaders. Thus, they need + * to be compiled separately. + */ +#ifndef SMAA_INCLUDE_VS +# define SMAA_INCLUDE_VS 1 +#endif +#ifndef SMAA_INCLUDE_PS +# define SMAA_INCLUDE_PS 1 +#endif + +/* ---------------------------------------------------------------------------- + * Texture Access Defines */ + +#ifndef SMAA_AREATEX_SELECT +# if defined(SMAA_HLSL_3) +# define SMAA_AREATEX_SELECT(sample) sample.ra +# else +# define SMAA_AREATEX_SELECT(sample) sample.rg +# endif +#endif + +#ifndef SMAA_SEARCHTEX_SELECT +# define SMAA_SEARCHTEX_SELECT(sample) sample.r +#endif + +#ifndef SMAA_DECODE_VELOCITY +# define SMAA_DECODE_VELOCITY(sample) sample.rg +#endif + +/* ---------------------------------------------------------------------------- + * Non-Configurable Defines */ + +#define SMAA_AREATEX_MAX_DISTANCE 16 +#define SMAA_AREATEX_MAX_DISTANCE_DIAG 20 +#define SMAA_AREATEX_PIXEL_SIZE (1.0f / float2(160.0f, 560.0f)) +#define SMAA_AREATEX_SUBTEX_SIZE (1.0f / 7.0f) +#define SMAA_SEARCHTEX_SIZE float2(66.0f, 33.0f) +#define SMAA_SEARCHTEX_PACKED_SIZE float2(64.0f, 16.0f) +#define SMAA_CORNER_ROUNDING_NORM (float(SMAA_CORNER_ROUNDING) / 100.0f) + +/* ---------------------------------------------------------------------------- + * Porting Functions */ + +#if defined(SMAA_HLSL_3) +# define SMAATexture2D(tex) sampler2D tex +# define SMAATexturePass2D(tex) tex +# define SMAASampleLevelZero(tex, coord) tex2Dlod(tex, float4(coord, 0.0, 0.0)) +# define SMAASampleLevelZeroPoint(tex, coord) tex2Dlod(tex, float4(coord, 0.0, 0.0)) +/* clang-format off */ +# define SMAASampleLevelZeroOffset(tex, coord, offset) tex2Dlod(tex, float4(coord + offset * SMAA_RT_METRICS.xy, 0.0, 0.0)) +/* clang-format on */ +# define SMAASample(tex, coord) tex2D(tex, coord) +# define SMAASamplePoint(tex, coord) tex2D(tex, coord) +# define SMAASampleOffset(tex, coord, offset) tex2D(tex, coord + offset * SMAA_RT_METRICS.xy) +# define SMAA_FLATTEN [flatten] +# define SMAA_BRANCH [branch] +#endif +#if defined(SMAA_HLSL_4) || defined(SMAA_HLSL_4_1) +SamplerState LinearSampler +{ + Filter = MIN_MAG_LINEAR_MIP_POINT; + AddressU = Clamp; + AddressV = Clamp; +}; +SamplerState PointSampler +{ + Filter = MIN_MAG_MIP_POINT; + AddressU = Clamp; + AddressV = Clamp; +}; +# define SMAATexture2D(tex) Texture2D tex +# define SMAATexturePass2D(tex) tex +# define SMAASampleLevelZero(tex, coord) tex.SampleLevel(LinearSampler, coord, 0) +# define SMAASampleLevelZeroPoint(tex, coord) tex.SampleLevel(PointSampler, coord, 0) +/* clang-format off */ +# define SMAASampleLevelZeroOffset(tex, coord, offset) tex.SampleLevel(LinearSampler, coord, 0, offset) +/* clang-format on */ +# define SMAASample(tex, coord) tex.Sample(LinearSampler, coord) +# define SMAASamplePoint(tex, coord) tex.Sample(PointSampler, coord) +# define SMAASampleOffset(tex, coord, offset) tex.Sample(LinearSampler, coord, offset) +# define SMAA_FLATTEN [flatten] +# define SMAA_BRANCH [branch] +# define SMAATexture2DMS2(tex) Texture2DMS tex +# define SMAALoad(tex, pos, sample) tex.Load(pos, sample) +# if defined(SMAA_HLSL_4_1) +# define SMAAGather(tex, coord) tex.Gather(LinearSampler, coord, 0) +# endif +#endif +#if defined(SMAA_GLSL_3) || defined(SMAA_GLSL_4) || defined(GPU_METAL) || defined(GPU_VULKAN) +# define SMAATexture2D(tex) sampler2D tex +# define SMAATexturePass2D(tex) tex +# define SMAASampleLevelZero(tex, coord) textureLod(tex, coord, 0.0) +# define SMAASampleLevelZeroPoint(tex, coord) textureLod(tex, coord, 0.0) +# define SMAASampleLevelZeroOffset(tex, coord, offset) textureLodOffset(tex, coord, 0.0, offset) +# define SMAASample(tex, coord) texture(tex, coord) +# define SMAASamplePoint(tex, coord) texture(tex, coord) +# define SMAASampleOffset(tex, coord, offset) texture(tex, coord, offset) +# define SMAA_FLATTEN +# define SMAA_BRANCH +# define lerp(a, b, t) mix(a, b, t) +# define saturate(a) clamp(a, 0.0, 1.0) +# if defined(SMAA_GLSL_4) +# define SMAAGather(tex, coord) textureGather(tex, coord) +# endif +# if defined(SMAA_GLSL_4) +# define mad(a, b, c) fma(a, b, c) +# elif defined(GPU_VULKAN) +/* NOTE(Vulkan) mad macro doesn't work, define each override as work-around. */ +vec4 mad(vec4 a, vec4 b, vec4 c) +{ + return fma(a, b, c); +} +vec3 mad(vec3 a, vec3 b, vec3 c) +{ + return fma(a, b, c); +} +vec2 mad(vec2 a, vec2 b, vec2 c) +{ + return fma(a, b, c); +} +float mad(float a, float b, float c) +{ + return fma(a, b, c); +} +# else +# define mad(a, b, c) (a * b + c) +# endif +/* NOTE(Metal): Types already natively declared in MSL. */ +# ifndef GPU_METAL +# define float2 vec2 +# define float3 vec3 +# define float4 vec4 +# define int2 ivec2 +# define int3 ivec3 +# define int4 ivec4 +# define bool2 bvec2 +# define bool3 bvec3 +# define bool4 bvec4 +# endif +#endif + +/* clang-format off */ +#if !defined(SMAA_HLSL_3) && !defined(SMAA_HLSL_4) && !defined(SMAA_HLSL_4_1) && !defined(SMAA_GLSL_3) && !defined(SMAA_GLSL_4) && !defined(SMAA_CUSTOM_SL) +# error you must define the shading language: SMAA_HLSL_*, SMAA_GLSL_* or SMAA_CUSTOM_SL +#endif +/* clang-format on */ + namespace blender::compositor { -/* - * An implementation of Enhanced Sub-pixel Morphological Anti-aliasing (SMAA) - * - * The algorithm was proposed by: - * Jorge Jimenez, Jose I. Echevarria, Tiago Sousa, Diego Gutierrez - * - * http://www.iryoku.com/smaa/ - * - * This file is based on SMAA-CPP: - * - * https://github.com/i_ri-E/smaa-cpp - * - * Currently only SMAA 1x mode is provided, so the operation will be done - * with no spatial multi-sampling nor temporal super-sampling. - * - * NOTE: This program assumes the screen coordinates are DirectX style, so - * the vertical direction is upside-down. "top" and "bottom" actually mean - * bottom and top, respectively. +/* ---------------------------------------------------------------------------- + * Misc functions */ + +/** + * Conditional move: */ - -/*-----------------------------------------------------------------------------*/ -/* Non-Configurable Defines */ - -#define SMAA_AREATEX_SIZE 80 -#define SMAA_AREATEX_MAX_DISTANCE 20 -#define SMAA_AREATEX_MAX_DISTANCE_DIAG 20 -#define SMAA_MAX_SEARCH_STEPS 362 /* 362 - 1 = 19^2 */ -#define SMAA_MAX_SEARCH_STEPS_DIAG 19 - -/*-----------------------------------------------------------------------------*/ -/* Internal Functions to Sample Pixel Color from Image */ - -static inline void sample(MemoryBuffer *reader, int x, int y, float color[4]) +static void SMAAMovc(float2 cond, float2 &variable, float2 value) { - reader->read_elem_checked(x, y, color); + /* Use select function (select(genType A, genType B, genBType cond)). */ + variable = math::interpolate(variable, value, cond); } -template -static void sample_bilinear_vertical(T *reader, int x, int y, float yoffset, float color[4]) +static void SMAAMovc(float4 cond, float4 &variable, float4 value) { - float iy = floorf(yoffset); - float fy = yoffset - iy; - y += int(iy); - - float color00[4], color01[4]; - - sample(reader, x + 0, y + 0, color00); - sample(reader, x + 0, y + 1, color01); - - color[0] = interpf(color01[0], color00[0], fy); - color[1] = interpf(color01[1], color00[1], fy); - color[2] = interpf(color01[2], color00[2], fy); - color[3] = interpf(color01[3], color00[3], fy); + /* Use select function (select(genType A, genType B, genBType cond)). */ + variable = math::interpolate(variable, value, cond); } -template -static void sample_bilinear_horizontal(T *reader, int x, int y, float xoffset, float color[4]) +#if SMAA_INCLUDE_VS +/* ---------------------------------------------------------------------------- + * Vertex Shaders */ + +/** + * Edge Detection Vertex Shader + */ +static void SMAAEdgeDetectionVS(float2 texcoord, int2 size, float4 offset[3]) { - float ix = floorf(xoffset); - float fx = xoffset - ix; - x += int(ix); - - float color00[4], color10[4]; - - sample(reader, x + 0, y + 0, color00); - sample(reader, x + 1, y + 0, color10); - - color[0] = interpf(color10[0], color00[0], fx); - color[1] = interpf(color10[1], color00[1], fx); - color[2] = interpf(color10[2], color00[2], fx); - color[3] = interpf(color10[3], color00[3], fx); -} - -/*-----------------------------------------------------------------------------*/ -/* Internal Functions to Sample Blending Weights from AreaTex */ - -static inline const float *areatex_sample_internal(const float *areatex, int x, int y) -{ - return &areatex[(std::clamp(x, 0, SMAA_AREATEX_SIZE - 1) + - std::clamp(y, 0, SMAA_AREATEX_SIZE - 1) * SMAA_AREATEX_SIZE) * - 2]; + offset[0] = float4(texcoord.xy(), texcoord.xy()) + + float4(-1.0f, 0.0f, 0.0f, -1.0f) / float4(size, size); + offset[1] = float4(texcoord.xy(), texcoord.xy()) + + float4(1.0f, 0.0f, 0.0f, 1.0f) / float4(size, size); + offset[2] = float4(texcoord.xy(), texcoord.xy()) + + float4(-2.0f, 0.0f, 0.0f, -2.0f) / float4(size, size); } /** - * We have the distance and both crossing edges. So, what are the areas - * at each side of current edge? + * Blend Weight Calculation Vertex Shader */ -static void area(int d1, int d2, int e1, int e2, float weights[2]) +static void SMAABlendingWeightCalculationVS(float2 texcoord, + int2 size, + float2 &pixcoord, + float4 offset[3]) { - /* The areas texture is compressed quadratically: */ - float x = float(SMAA_AREATEX_MAX_DISTANCE * e1) + sqrtf(float(d1)); - float y = float(SMAA_AREATEX_MAX_DISTANCE * e2) + sqrtf(float(d2)); + pixcoord = texcoord * float2(size); - float ix = floorf(x), iy = floorf(y); - float fx = x - ix, fy = y - iy; - int X = int(ix), Y = int(iy); + // We will use these offsets for the searches later on (see @PSEUDO_GATHER4): + offset[0] = float4(texcoord.xy(), texcoord.xy()) + + float4(-0.25f, -0.125f, 1.25f, -0.125f) / float4(size, size); + offset[1] = float4(texcoord.xy(), texcoord.xy()) + + float4(-0.125f, -0.25f, -0.125f, 1.25f) / float4(size, size); - const float *weights00 = areatex_sample_internal(areatex, X + 0, Y + 0); - const float *weights10 = areatex_sample_internal(areatex, X + 1, Y + 0); - const float *weights01 = areatex_sample_internal(areatex, X + 0, Y + 1); - const float *weights11 = areatex_sample_internal(areatex, X + 1, Y + 1); - - weights[0] = interpf( - interpf(weights11[0], weights01[0], fx), interpf(weights10[0], weights00[0], fx), fy); - weights[1] = interpf( - interpf(weights11[1], weights01[1], fx), interpf(weights10[1], weights00[1], fx), fy); + // And these for the searches, they indicate the ends of the loops: + offset[2] = float4(offset[0].x, offset[0].z, offset[1].y, offset[1].w) + + (float4(-2.0f, 2.0f, -2.0f, 2.0f) * float(SMAA_MAX_SEARCH_STEPS)) / + float4(float2(size.x), float2(size.y)); } /** - * Similar to area(), this calculates the area corresponding to a certain + * Neighborhood Blending Vertex Shader + */ +static void SMAANeighborhoodBlendingVS(float2 texcoord, int2 size, float4 &offset) +{ + offset = float4(texcoord, texcoord) + float4(1.0f, 0.0f, 0.0f, 1.0f) / float4(size, size); +} +#endif // SMAA_INCLUDE_VS + +/** + * Luma Edge Detection + * + * IMPORTANT NOTICE: luma edge detection requires gamma-corrected colors, and + * thus 'colorTex' should be a non-sRGB texture. + */ +static float2 SMAALumaEdgeDetectionPS(float2 texcoord, + float4 offset[3], + SMAATexture2D(colorTex), +#if SMAA_PREDICATION + SMAATexture2D(predicationTex), +#endif + float edge_threshold, + float3 luminance_coefficients, + float local_contrast_adaptation_factor) +{ +#if SMAA_PREDICATION + float2 threshold = SMAACalculatePredicatedThreshold( + texcoord, offset, SMAATexturePass2D(predicationTex)); +#else + // Calculate the threshold: + float2 threshold = float2(edge_threshold, edge_threshold); +#endif + + // Calculate lumas: + // float4 weights = float4(0.2126, 0.7152, 0.0722, 0.0); + float4 weights = float4(luminance_coefficients, 0.0f); + float L = math::dot(SMAASamplePoint(colorTex, texcoord), weights); + + float Lleft = math::dot(SMAASamplePoint(colorTex, offset[0].xy()), weights); + float Ltop = math::dot(SMAASamplePoint(colorTex, offset[0].zw()), weights); + + // We do the usual threshold: + float4 delta; + float2 delta_left_top = math::abs(L - float2(Lleft, Ltop)); + delta.x = delta_left_top.x; + delta.y = delta_left_top.y; + float2 edges = math::step(threshold, delta.xy()); + + // Then return early if there is no edge: + if (math::dot(edges, float2(1.0f, 1.0f)) == 0.0f) { + return float2(0.0f); + } + + // Calculate right and bottom deltas: + float Lright = math::dot(SMAASamplePoint(colorTex, offset[1].xy()), weights); + float Lbottom = math::dot(SMAASamplePoint(colorTex, offset[1].zw()), weights); + float2 delta_right_bottom = math::abs(L - float2(Lright, Lbottom)); + delta.z = delta_right_bottom.x; + delta.w = delta_right_bottom.y; + + // Calculate the maximum delta in the direct neighborhood: + float2 maxDelta = math::max(delta.xy(), delta.zw()); + + // Calculate left-left and top-top deltas: + float Lleftleft = math::dot(SMAASamplePoint(colorTex, offset[2].xy()), weights); + float Ltoptop = math::dot(SMAASamplePoint(colorTex, offset[2].zw()), weights); + float2 delta_left_left_top_top = math::abs(float2(Lleft, Ltop) - float2(Lleftleft, Ltoptop)); + delta.z = delta_left_left_top_top.x; + delta.w = delta_left_left_top_top.y; + + // Calculate the final maximum delta: + maxDelta = math::max(maxDelta.xy(), delta.zw()); + float finalDelta = math::max(maxDelta.x, maxDelta.y); + + // Local contrast adaptation: + edges *= math::step(finalDelta, local_contrast_adaptation_factor * delta.xy()); + + return edges; +} + +/* ---------------------------------------------------------------------------- + * Diagonal Search Functions */ + +#if !defined(SMAA_DISABLE_DIAG_DETECTION) + +/** + * Allows to decode two binary values from a bilinear-filtered access. + */ +static float2 SMAADecodeDiagBilinearAccess(float2 e) +{ + // Bilinear access for fetching 'e' have a 0.25 offset, and we are + // interested in the R and G edges: + // + // +---G---+-------+ + // | x o R x | + // +-------+-------+ + // + // Then, if one of these edge is enabled: + // Red: (0.75 * X + 0.25 * 1) => 0.25 or 1.0 + // Green: (0.75 * 1 + 0.25 * X) => 0.75 or 1.0 + // + // This function will unpack the values (mad + mul + round): + // wolframalpha.com: round(x * abs(5 * x - 5 * 0.75)) plot 0 to 1 + e.x = e.x * math::abs(5.0f * e.x - 5.0f * 0.75f); + return math::round(e); +} + +static float4 SMAADecodeDiagBilinearAccess(float4 e) +{ + e.x = e.x * math::abs(5.0f * e.x - 5.0f * 0.75f); + e.z = e.z * math::abs(5.0f * e.z - 5.0f * 0.75f); + return math::round(e); +} + +/** + * These functions allows to perform diagonal pattern searches. + */ +static float2 SMAASearchDiag1( + SMAATexture2D(edgesTex), float2 texcoord, float2 dir, int2 size, float2 &e) +{ + float4 coord = float4(texcoord, -1.0f, 1.0f); + float3 t = float3(1.0f / float2(size), 1.0f); + while (coord.z < float(SMAA_MAX_SEARCH_STEPS_DIAG - 1) && coord.w > 0.9f) { + float3 increment = mad(t, float3(dir, 1.0f), coord.xyz()); + coord.x = increment.x; + coord.y = increment.y; + coord.z = increment.z; + e = SMAASamplePoint(edgesTex, coord.xy()).xy(); + coord.w = math::dot(e, float2(0.5f, 0.5f)); + } + return coord.zw(); +} + +static float2 SMAASearchDiag2( + SMAATexture2D(edgesTex), float2 texcoord, float2 dir, int2 size, float2 &e) +{ + float4 coord = float4(texcoord, -1.0f, 1.0f); + coord.x += 0.25f / size.x; // See @SearchDiag2Optimization + float3 t = float3(1.0f / float2(size), 1.0f); + while (coord.z < float(SMAA_MAX_SEARCH_STEPS_DIAG - 1) && coord.w > 0.9f) { + float3 increment = mad(t, float3(dir, 1.0f), coord.xyz()); + coord.x = increment.x; + coord.y = increment.y; + coord.z = increment.z; + + // @SearchDiag2Optimization + // Fetch both edges at once using bilinear filtering: + e = SMAASampleLevelZero(edgesTex, coord.xy()).xy(); + e = SMAADecodeDiagBilinearAccess(e); + + // Non-optimized version: + // e.g = SMAASampleLevelZero(edgesTex, coord.xy).g; + // e.r = SMAASampleLevelZeroOffset(edgesTex, coord.xy, int2(1, 0), size).r; + + coord.w = math::dot(e, float2(0.5f, 0.5f)); + } + return coord.zw(); +} + +/** + * Similar to SMAAArea, this calculates the area corresponding to a certain * diagonal distance and crossing edges 'e'. */ -static void area_diag(int d1, int d2, int e1, int e2, float weights[2]) +static float2 SMAAAreaDiag(SMAATexture2D(areaTex), float2 dist, float2 e, float offset) { - int x = SMAA_AREATEX_MAX_DISTANCE_DIAG * e1 + d1; - int y = SMAA_AREATEX_MAX_DISTANCE_DIAG * e2 + d2; + float2 texcoord = mad( + float2(SMAA_AREATEX_MAX_DISTANCE_DIAG, SMAA_AREATEX_MAX_DISTANCE_DIAG), e, dist); - const float *w = areatex_sample_internal(areatex_diag, x, y); - copy_v2_v2(weights, w); + // We do a scale and bias for mapping to texel space: + texcoord = mad(SMAA_AREATEX_PIXEL_SIZE, texcoord, 0.5f * SMAA_AREATEX_PIXEL_SIZE); + + // Diagonal areas are on the second half of the texture: + texcoord.x += 0.5f; + + // Move to proper place, according to the subpixel offset: + texcoord.y += SMAA_AREATEX_SUBTEX_SIZE * offset; + + // Do it! + return SMAA_AREATEX_SELECT(SMAASampleLevelZero(areaTex, texcoord)); } -/*-----------------------------------------------------------------------------*/ -/* Edge Detection (First Pass) */ -/*-----------------------------------------------------------------------------*/ - -SMAAEdgeDetectionOperation::SMAAEdgeDetectionOperation() +/** + * This searches for diagonal patterns and returns the corresponding weights. + */ +static float2 SMAACalculateDiagWeights(SMAATexture2D(edgesTex), + SMAATexture2D(areaTex), + float2 texcoord, + float2 e, + float4 subsampleIndices, + int2 size) { - this->add_input_socket(DataType::Color); /* image */ - this->add_input_socket(DataType::Value); /* Depth, material ID, etc. TODO: currently unused. */ - this->add_output_socket(DataType::Color); - flags_.can_be_constant = true; - this->set_threshold(CMP_DEFAULT_SMAA_THRESHOLD); - this->set_local_contrast_adaptation_factor(CMP_DEFAULT_SMAA_CONTRAST_LIMIT); + float2 weights = float2(0.0f, 0.0f); + + // Search for the line ends: + float4 d; + float2 end; + if (e.x > 0.0f) { + float2 negative_diagonal = SMAASearchDiag1( + SMAATexturePass2D(edgesTex), texcoord, float2(-1.0f, 1.0f), size, end); + d.x = negative_diagonal.x; + d.z = negative_diagonal.y; + d.x += float(end.y > 0.9f); + } + else { + d.x = 0.0f; + d.z = 0.0f; + } + float2 positive_diagonal = SMAASearchDiag1( + SMAATexturePass2D(edgesTex), texcoord, float2(1.0, -1.0), size, end); + d.y = positive_diagonal.x; + d.w = positive_diagonal.y; + + SMAA_BRANCH + if (d.x + d.y > 2.0f) { // d.x + d.y + 1 > 3 + // Fetch the crossing edges: + float4 coords = float4(texcoord, texcoord) + + float4(-d.x + 0.25f, d.x, d.y, -d.y - 0.25f) / float4(size, size); + float4 c; + float2 left_edge = SMAASampleLevelZeroOffset(edgesTex, coords.xy(), int2(-1, 0), size).xy(); + float2 right_edge = SMAASampleLevelZeroOffset(edgesTex, coords.zw(), int2(1, 0), size).xy(); + c.x = left_edge.x; + c.y = left_edge.y; + c.z = right_edge.x; + c.w = right_edge.y; + float4 decoded_access = SMAADecodeDiagBilinearAccess(c); + c.y = decoded_access.x; + c.x = decoded_access.y; + c.w = decoded_access.z; + c.z = decoded_access.w; + + // Non-optimized version: + // float4 coords = mad(float4(-d.x, d.x, d.y, -d.y), SMAA_RT_METRICS.xyxy, texcoord.xyxy); + // float4 c; + // c.x = SMAASampleLevelZeroOffset(edgesTex, coords.xy, int2(-1, 0), size).g; + // c.y = SMAASampleLevelZeroOffset(edgesTex, coords.xy, int2( 0, 0), size).r; + // c.z = SMAASampleLevelZeroOffset(edgesTex, coords.zw, int2( 1, 0), size).g; + // c.w = SMAASampleLevelZeroOffset(edgesTex, coords.zw, int2( 1, -1), size).r; + + // Merge crossing edges at each side into a single value: + float2 cc = mad(float2(2.0f, 2.0f), float2(c.x, c.z), float2(c.y, c.w)); + + // Remove the crossing edge if we didn't found the end of the line: + SMAAMovc(math::step(0.9f, d.zw()), cc, float2(0.0f, 0.0f)); + + // Fetch the areas for this line: + weights += SMAAAreaDiag(SMAATexturePass2D(areaTex), d.xy(), cc, subsampleIndices.z); + } + + // Search for the line ends: + float2 negative_diagonal = SMAASearchDiag2( + SMAATexturePass2D(edgesTex), texcoord, float2(-1.0f, -1.0f), size, end); + d.x = negative_diagonal.x; + d.z = negative_diagonal.y; + if (SMAASamplePointOffset(edgesTex, texcoord, int2(1, 0), size).x > 0.0f) { + float2 positive_diagonal = SMAASearchDiag2( + SMAATexturePass2D(edgesTex), texcoord, float2(1.0f, 1.0f), size, end); + d.y = positive_diagonal.x; + d.w = positive_diagonal.y; + d.y += float(end.y > 0.9f); + } + else { + d.y = 0.0f; + d.w = 0.0f; + } + + SMAA_BRANCH + if (d.x + d.y > 2.0f) { // d.x + d.y + 1 > 3 + // Fetch the crossing edges: + float4 coords = float4(texcoord, texcoord) + float4(-d.x, -d.x, d.y, d.y) / float4(size, size); + float4 c; + c.x = SMAASampleLevelZeroOffset(edgesTex, coords.xy(), int2(-1, 0), size).y; + c.y = SMAASampleLevelZeroOffset(edgesTex, coords.xy(), int2(0, -1), size).x; + float2 left_edge = SMAASampleLevelZeroOffset(edgesTex, coords.zw(), int2(1, 0), size).xy(); + c.z = left_edge.y; + c.w = left_edge.x; + float2 cc = mad(float2(2.0f, 2.0f), float2(c.x, c.z), float2(c.y, c.w)); + + // Remove the crossing edge if we didn't found the end of the line: + SMAAMovc(math::step(0.9f, d.zw()), cc, float2(0.0f, 0.0f)); + + // Fetch the areas for this line: + float2 area = SMAAAreaDiag(SMAATexturePass2D(areaTex), d.xy(), cc, subsampleIndices.w).xy(); + weights.x += area.y; + weights.y += area.x; + } + + return weights; +} +#endif + +/* ---------------------------------------------------------------------------- + * Horizontal/Vertical Search Functions */ + +/** + * This allows to determine how much length should we add in the last step + * of the searches. It takes the bilinearly interpolated edge (see + * @PSEUDO_GATHER4), and adds 0, 1 or 2, depending on which edges and + * crossing edges are active. + */ +static float SMAASearchLength(SMAATexture2D(searchTex), float2 e, float offset) +{ + // The texture is flipped vertically, with left and right cases taking half + // of the space horizontally: + float2 scale = SMAA_SEARCHTEX_SIZE * float2(0.5f, -1.0f); + float2 bias = SMAA_SEARCHTEX_SIZE * float2(offset, 1.0f); + + // Scale and bias to access texel centers: + scale += float2(-1.0f, 1.0f); + bias += float2(0.5f, -0.5f); + + // Convert from pixel coordinates to texcoords: + // (We use SMAA_SEARCHTEX_PACKED_SIZE because the texture is cropped) + scale *= 1.0f / SMAA_SEARCHTEX_PACKED_SIZE; + bias *= 1.0f / SMAA_SEARCHTEX_PACKED_SIZE; + + // Lookup the search texture: + return SMAA_SEARCHTEX_SELECT(SMAASampleLevelZero(searchTex, mad(scale, e, bias))); } -void SMAAEdgeDetectionOperation::set_threshold(float threshold) +/** + * Horizontal/vertical search functions for the 2nd pass. + */ +static float SMAASearchXLeft( + SMAATexture2D(edgesTex), SMAATexture2D(searchTex), float2 texcoord, float end, int2 size) { - /* UI values are between 0 and 1 for simplicity but algorithm expects values between 0 and 0.5 */ - threshold_ = scalenorm(0, 0.5, threshold); + /** + * @PSEUDO_GATHER4 + * This texcoord has been offset by (-0.25, -0.125) in the vertex shader to + * sample between edge, thus fetching four edges in a row. + * Sampling with different offsets in each direction allows to disambiguate + * which edges are active from the four fetched ones. + */ + float2 e = float2(0.0f, 1.0f); + while (texcoord.x > end && e.y > 0.8281f && // Is there some edge not activated? + e.x == 0.0f) // Or is there a crossing edge that breaks the line? + { + e = SMAASampleLevelZero(edgesTex, texcoord).xy(); + texcoord = texcoord - float2(2.0f, 0.0f) / float2(size); + } + + float offset = mad( + -(255.0f / 127.0f), SMAASearchLength(SMAATexturePass2D(searchTex), e, 0.0f), 3.25f); + return texcoord.x + offset / size.x; + + // Non-optimized version: + // We correct the previous (-0.25, -0.125) offset we applied: + // texcoord.x += 0.25 * SMAA_RT_METRICS.x; + + // The searches are bias by 1, so adjust the coords accordingly: + // texcoord.x += SMAA_RT_METRICS.x; + + // Disambiguate the length added by the last step: + // texcoord.x += 2.0 * SMAA_RT_METRICS.x; // Undo last step + // texcoord.x -= SMAA_RT_METRICS.x * (255.0 / 127.0) * + // SMAASearchLength(SMAATexturePass2D(searchTex), e, 0.0); return mad(SMAA_RT_METRICS.x, offset, + // texcoord.x); } -void SMAAEdgeDetectionOperation::set_local_contrast_adaptation_factor(float factor) +static float SMAASearchXRight( + SMAATexture2D(edgesTex), SMAATexture2D(searchTex), float2 texcoord, float end, int2 size) { - /* UI values are between 0 and 1 for simplicity but algorithm expects values between 1 and 10 */ - contrast_limit_ = scalenorm(1, 10, factor); + float2 e = float2(0.0f, 1.0f); + while (texcoord.x < end && e.y > 0.8281f && // Is there some edge not activated? + e.x == 0.0f) // Or is there a crossing edge that breaks the line? + { + e = SMAASampleLevelZero(edgesTex, texcoord).xy(); + texcoord = texcoord + float2(2.0f, 0.0f) / float2(size); + } + float offset = mad( + -(255.0f / 127.0f), SMAASearchLength(SMAATexturePass2D(searchTex), e, 0.5f), 3.25f); + return texcoord.x - offset / size.x; } -void SMAAEdgeDetectionOperation::get_area_of_interest(const int /*input_idx*/, - const rcti &output_area, - rcti &r_input_area) +static float SMAASearchYUp( + SMAATexture2D(edgesTex), SMAATexture2D(searchTex), float2 texcoord, float end, int2 size) { - r_input_area.xmax = output_area.xmax + 1; - r_input_area.xmin = output_area.xmin - 2; - r_input_area.ymax = output_area.ymax + 1; - r_input_area.ymin = output_area.ymin - 2; + float2 e = float2(1.0f, 0.0f); + while (texcoord.y > end && e.x > 0.8281f && // Is there some edge not activated? + e.y == 0.0f) // Or is there a crossing edge that breaks the line? + { + e = SMAASampleLevelZero(edgesTex, texcoord).xy(); + texcoord = texcoord - float2(0.0f, 2.0f) / float2(size); + } + float2 flipped_edge = float2(e.y, e.x); + float offset = mad(-(255.0f / 127.0f), + SMAASearchLength(SMAATexturePass2D(searchTex), flipped_edge, 0.0f), + 3.25f); + return texcoord.y + offset / size.y; } -void SMAAEdgeDetectionOperation::update_memory_buffer_partial(MemoryBuffer *output, - const rcti &area, - Span inputs) +static float SMAASearchYDown( + SMAATexture2D(edgesTex), SMAATexture2D(searchTex), float2 texcoord, float end, int2 size) { - const MemoryBuffer *image = inputs[0]; - for (BuffersIterator it = output->iterate_with({}, area); !it.is_end(); ++it) { - float color[4]; - const int x = it.x; - const int y = it.y; + float2 e = float2(1.0f, 0.0f); + while (texcoord.y < end && e.x > 0.8281f && // Is there some edge not activated? + e.y == 0.0f) // Or is there a crossing edge that breaks the line? + { + e = SMAASampleLevelZero(edgesTex, texcoord).xy(); + texcoord = texcoord + float2(0.0f, 2.0f) / float2(size); + } + float2 flipped_edge = float2(e.y, e.x); + float offset = mad(-(255.0f / 127.0f), + SMAASearchLength(SMAATexturePass2D(searchTex), flipped_edge, 0.5f), + 3.25f); + return texcoord.y - offset / size.y; +} - /* Calculate luma deltas: */ - image->read_elem_checked(x, y, color); - const float L = IMB_colormanagement_get_luminance(color); - image->read_elem_checked(x - 1, y, color); - const float Lleft = IMB_colormanagement_get_luminance(color); - image->read_elem_checked(x, y - 1, color); - const float Ltop = IMB_colormanagement_get_luminance(color); - const float Dleft = fabsf(L - Lleft); - const float Dtop = fabsf(L - Ltop); +/** + * Ok, we have the distance and both crossing edges. So, what are the areas + * at each side of current edge? + */ +static float2 SMAAArea(SMAATexture2D(areaTex), float2 dist, float e1, float e2, float offset) +{ + // Rounding prevents precision errors of bilinear filtering: + float2 texcoord = mad(float2(SMAA_AREATEX_MAX_DISTANCE, SMAA_AREATEX_MAX_DISTANCE), + math::round(4.0f * float2(e1, e2)), + dist); - /* We do the usual threshold: */ - it.out[0] = (x > 0 && Dleft >= threshold_) ? 1.0f : 0.0f; - it.out[1] = (y > 0 && Dtop >= threshold_) ? 1.0f : 0.0f; - it.out[2] = 0.0f; - it.out[3] = 1.0f; + // We do a scale and bias for mapping to texel space: + texcoord = mad(SMAA_AREATEX_PIXEL_SIZE, texcoord, 0.5f * SMAA_AREATEX_PIXEL_SIZE); - /* Then discard if there is no edge: */ - if (is_zero_v2(it.out)) { - continue; + // Move to proper place, according to the subpixel offset: + texcoord.y = mad(SMAA_AREATEX_SUBTEX_SIZE, offset, texcoord.y); + + // Do it! + return SMAA_AREATEX_SELECT(SMAASampleLevelZero(areaTex, texcoord)); +} + +/* ---------------------------------------------------------------------------- + * Corner Detection Functions */ + +static void SMAADetectHorizontalCornerPattern(SMAATexture2D(edgesTex), + float2 &weights, + float4 texcoord, + float2 d, + int2 size, + int corner_rounding) +{ +#if !defined(SMAA_DISABLE_CORNER_DETECTION) + float2 leftRight = math::step(d, float2(d.y, d.x)); + float2 rounding = (1.0f - corner_rounding / 100.0f) * leftRight; + + rounding /= leftRight.x + leftRight.y; // Reduce blending for pixels in the center of a line. + + float2 factor = float2(1.0f, 1.0f); + factor.x -= rounding.x * SMAASampleLevelZeroOffset(edgesTex, texcoord.xy(), int2(0, 1), size).x; + factor.x -= rounding.y * SMAASampleLevelZeroOffset(edgesTex, texcoord.zw(), int2(1, 1), size).x; + factor.y -= rounding.x * SMAASampleLevelZeroOffset(edgesTex, texcoord.xy(), int2(0, -2), size).x; + factor.y -= rounding.y * SMAASampleLevelZeroOffset(edgesTex, texcoord.zw(), int2(1, -2), size).x; + + weights *= saturate(factor); +#endif +} + +static void SMAADetectVerticalCornerPattern(SMAATexture2D(edgesTex), + float2 &weights, + float4 texcoord, + float2 d, + int2 size, + int corner_rounding) +{ +#if !defined(SMAA_DISABLE_CORNER_DETECTION) + float2 leftRight = math::step(d, float2(d.y, d.x)); + float2 rounding = (1.0f - corner_rounding / 100.0f) * leftRight; + + rounding /= leftRight.x + leftRight.y; + + float2 factor = float2(1.0f, 1.0f); + factor.x -= rounding.x * SMAASampleLevelZeroOffset(edgesTex, texcoord.xy(), int2(1, 0), size).y; + factor.x -= rounding.y * SMAASampleLevelZeroOffset(edgesTex, texcoord.zw(), int2(1, 1), size).y; + factor.y -= rounding.x * SMAASampleLevelZeroOffset(edgesTex, texcoord.xy(), int2(-2, 0), size).y; + factor.y -= rounding.y * SMAASampleLevelZeroOffset(edgesTex, texcoord.zw(), int2(-2, 1), size).y; + + weights *= saturate(factor); +#endif +} + +/* ---------------------------------------------------------------------------- + * Blending Weight Calculation Pixel Shader (Second Pass) */ + +static float4 SMAABlendingWeightCalculationPS(float2 texcoord, + float2 pixcoord, + float4 offset[3], + MemoryBuffer *edgesTex, + MemoryBuffer *areaTex, + MemoryBuffer *searchTex, + float4 subsampleIndices, + int2 size, + int corner_rounding) +{ // Just pass zero for SMAA 1x, see @SUBSAMPLE_INDICES. + float4 weights = float4(0.0f, 0.0f, 0.0f, 0.0f); + + float2 e = SMAASamplePoint(edgesTex, texcoord).xy(); + + SMAA_BRANCH + if (e.y > 0.0f) { // Edge at north +#if !defined(SMAA_DISABLE_DIAG_DETECTION) + // Diagonals have both north and west edges, so searching for them in + // one of the boundaries is enough. + float2 diagonal_weights = SMAACalculateDiagWeights(SMAATexturePass2D(edgesTex), + SMAATexturePass2D(areaTex), + texcoord, + e, + subsampleIndices, + size); + + weights.x = diagonal_weights.x; + weights.y = diagonal_weights.y; + + // We give priority to diagonals, so if we find a diagonal we skip + // horizontal/vertical processing. + SMAA_BRANCH + if (weights.x == -weights.y) { // weights.x + weights.y == 0.0 +#endif + + float2 d; + + // Find the distance to the left: + float3 coords; + coords.x = SMAASearchXLeft(SMAATexturePass2D(edgesTex), + SMAATexturePass2D(searchTex), + offset[0].xy(), + offset[2].x, + size); + coords.y = + offset[1].y; // offset[1].y = texcoord.y - 0.25 * SMAA_RT_METRICS.y (@CROSSING_OFFSET) + d.x = coords.x; + + // Now fetch the left crossing edges, two at a time using bilinear + // filtering. Sampling at -0.25 (see @CROSSING_OFFSET) enables to + // discern what value each edge has: + float e1 = SMAASampleLevelZero(edgesTex, coords.xy()).x; + + // Find the distance to the right: + coords.z = SMAASearchXRight(SMAATexturePass2D(edgesTex), + SMAATexturePass2D(searchTex), + offset[0].zw(), + offset[2].y, + size); + d.y = coords.z; + + // We want the distances to be in pixel units (doing this here allows + // better interleaving of arithmetic and memory accesses): + d = math::abs(math::round(mad(float2(size.x), d, -float2(pixcoord.x)))); + + // SMAAArea below needs a sqrt, as the areas texture is compressed + // quadratically: + float2 sqrt_d = math::sqrt(d); + + // Fetch the right crossing edges: + float e2 = + SMAASampleLevelZeroOffset(edgesTex, float2(coords.z, coords.y), int2(1, 0), size).x; + + // Ok, we know how this pattern looks like, now it is time for getting + // the actual area: + float2 area = SMAAArea(SMAATexturePass2D(areaTex), sqrt_d, e1, e2, subsampleIndices.y); + weights.x = area.x; + weights.y = area.y; + + // Fix corners: + coords.y = texcoord.y; + + float2 corner_weight = weights.xy(); + SMAADetectHorizontalCornerPattern(SMAATexturePass2D(edgesTex), + corner_weight, + float4(coords.xy(), coords.z, coords.y), + d, + size, + corner_rounding); + weights.x = corner_weight.x; + weights.y = corner_weight.y; + +#if !defined(SMAA_DISABLE_DIAG_DETECTION) } + else + e.x = 0.0f; // Skip vertical processing. +#endif + } - /* Calculate right and bottom deltas: */ - image->read_elem_checked(x + 1, y, color); - const float Lright = IMB_colormanagement_get_luminance(color); - image->read_elem_checked(x, y + 1, color); - const float Lbottom = IMB_colormanagement_get_luminance(color); - const float Dright = fabsf(L - Lright); - const float Dbottom = fabsf(L - Lbottom); + SMAA_BRANCH + if (e.x > 0.0f) { // Edge at west + float2 d; - /* Calculate the maximum delta in the direct neighborhood: */ - float max_delta = fmaxf(fmaxf(Dleft, Dright), fmaxf(Dtop, Dbottom)); + // Find the distance to the top: + float3 coords; + coords.y = SMAASearchYUp(SMAATexturePass2D(edgesTex), + SMAATexturePass2D(searchTex), + offset[1].xy(), + offset[2].z, + size); + coords.x = offset[0].x; // offset[1].x = texcoord.x - 0.25 * SMAA_RT_METRICS.x; + d.x = coords.y; - /* Calculate luma used for both left and top edges: */ - image->read_elem_checked(x - 1, y - 1, color); - const float Llefttop = IMB_colormanagement_get_luminance(color); + // Fetch the top crossing edges: + float e1 = SMAASampleLevelZero(edgesTex, coords.xy()).y; - /* Left edge */ - if (it.out[0] != 0.0f) { - /* Calculate deltas around the left pixel: */ - image->read_elem_checked(x - 2, y, color); - const float Lleftleft = IMB_colormanagement_get_luminance(color); - image->read_elem_checked(x - 1, y + 1, color); - const float Lleftbottom = IMB_colormanagement_get_luminance(color); - const float Dleftleft = fabsf(Lleft - Lleftleft); - const float Dlefttop = fabsf(Lleft - Llefttop); - const float Dleftbottom = fabsf(Lleft - Lleftbottom); + // Find the distance to the bottom: + coords.z = SMAASearchYDown(SMAATexturePass2D(edgesTex), + SMAATexturePass2D(searchTex), + offset[1].zw(), + offset[2].w, + size); + d.y = coords.z; - /* Calculate the final maximum delta: */ - max_delta = fmaxf(max_delta, fmaxf(Dleftleft, fmaxf(Dlefttop, Dleftbottom))); + // We want the distances to be in pixel units: + d = math::abs(math::round(mad(float2(size.y), d, -float2(pixcoord.y)))); - /* Local contrast adaptation: */ - if (max_delta > contrast_limit_ * Dleft) { - it.out[0] = 0.0f; - } - } + // SMAAArea below needs a sqrt, as the areas texture is compressed + // quadratically: + float2 sqrt_d = math::sqrt(d); - /* Top edge */ - if (it.out[1] != 0.0f) { - /* Calculate top-top delta: */ - image->read_elem_checked(x, y - 2, color); - const float Ltoptop = IMB_colormanagement_get_luminance(color); - image->read_elem_checked(x + 1, y - 1, color); - const float Ltopright = IMB_colormanagement_get_luminance(color); - const float Dtoptop = fabsf(Ltop - Ltoptop); - const float Dtopleft = fabsf(Ltop - Llefttop); - const float Dtopright = fabsf(Ltop - Ltopright); + // Fetch the bottom crossing edges: + float e2 = SMAASampleLevelZeroOffset(edgesTex, float2(coords.x, coords.z), int2(0, 1), size).y; - /* Calculate the final maximum delta: */ - max_delta = fmaxf(max_delta, fmaxf(Dtoptop, fmaxf(Dtopleft, Dtopright))); + // Get the area for this direction: + float2 area = SMAAArea(SMAATexturePass2D(areaTex), sqrt_d, e1, e2, subsampleIndices.x); + weights.z = area.x; + weights.w = area.y; - /* Local contrast adaptation: */ - if (max_delta > contrast_limit_ * Dtop) { - it.out[1] = 0.0f; - } - } + // Fix corners: + coords.x = texcoord.x; + + float2 corner_weight = weights.zw(); + SMAADetectVerticalCornerPattern(SMAATexturePass2D(edgesTex), + corner_weight, + float4(coords.xy(), coords.x, coords.z), + d, + size, + corner_rounding); + weights.z = corner_weight.x; + weights.w = corner_weight.y; + } + + return weights; +} + +/* ---------------------------------------------------------------------------- + * Neighborhood Blending Pixel Shader (Third Pass) */ + +static float4 SMAANeighborhoodBlendingPS(float2 texcoord, + float4 offset, + SMAATexture2D(colorTex), + SMAATexture2D(blendTex), +#if SMAA_REPROJECTION + SMAATexture2D(velocityTex), +#endif + int2 size) +{ + // Fetch the blending weights for current pixel: + float4 a; + a.x = SMAASample(blendTex, offset.xy()).w; // Right + a.y = SMAASample(blendTex, offset.zw()).y; // Top + a.z = SMAASample(blendTex, texcoord).z; // Left + a.w = SMAASample(blendTex, texcoord).x; // Bottom + + // Is there any blending weight with a value greater than 0.0? + SMAA_BRANCH + if (math::dot(a, float4(1.0f, 1.0f, 1.0f, 1.0f)) < 1e-5f) { + float4 color = SMAASampleLevelZero(colorTex, texcoord); + +#if SMAA_REPROJECTION + float2 velocity = SMAA_DECODE_VELOCITY(SMAASampleLevelZero(velocityTex, texcoord)); + + // Pack velocity into the alpha channel: + color.a = math::sqrt(5.0f * math::length(velocity)); +#endif + + return color; + } + else { + bool h = math::max(a.x, a.z) > math::max(a.y, a.w); // max(horizontal) > max(vertical) + + // Calculate the blending offsets: + float4 blendingOffset = float4(0.0f, a.y, 0.0f, a.w); + float2 blendingWeight = float2(a.y, a.w); + SMAAMovc(float4(h), blendingOffset, float4(a.x, 0.0f, a.z, 0.0f)); + SMAAMovc(float2(h), blendingWeight, float2(a.x, a.z)); + blendingWeight /= math::dot(blendingWeight, float2(1.0f, 1.0f)); + + // Calculate the texture coordinates: + float4 blendingCoord = float4(texcoord, texcoord) + blendingOffset / float4(size, -size); + + // We exploit bilinear filtering to mix current pixel with the chosen + // neighbor: + float4 color = blendingWeight.x * SMAASampleLevelZero(colorTex, blendingCoord.xy()); + color += blendingWeight.y * SMAASampleLevelZero(colorTex, blendingCoord.zw()); + +#if SMAA_REPROJECTION + // Antialias velocity for proper reprojection in a later stage: + float2 velocity = blendingWeight.x * + SMAA_DECODE_VELOCITY(SMAASampleLevelZero(velocityTex, blendingCoord.xy())); + velocity += blendingWeight.y * + SMAA_DECODE_VELOCITY(SMAASampleLevelZero(velocityTex, blendingCoord.zw())); + + // Pack velocity into the alpha channel: + color.a = math::sqrt(5.0f * math::length(velocity)); +#endif + + return color; } } -/*-----------------------------------------------------------------------------*/ -/* Blending Weight Calculation (Second Pass) */ -/*-----------------------------------------------------------------------------*/ - -SMAABlendingWeightCalculationOperation::SMAABlendingWeightCalculationOperation() +SMAAOperation::SMAAOperation() { - this->add_input_socket(DataType::Color); /* edges */ + this->add_input_socket(DataType::Color); this->add_output_socket(DataType::Color); flags_.can_be_constant = true; - this->set_corner_rounding(CMP_DEFAULT_SMAA_CORNER_ROUNDING); } -void SMAABlendingWeightCalculationOperation::set_corner_rounding(float rounding) -{ - /* UI values are between 0 and 1 for simplicity but algorithm expects values between 0 and 100 */ - corner_rounding_ = int(scalenorm(0, 100, rounding)); -} - -void SMAABlendingWeightCalculationOperation::update_memory_buffer_started( - MemoryBuffer * /*output*/, const rcti & /*out_area*/, Span inputs) -{ - const MemoryBuffer *image = inputs[0]; - sample_image_fn_ = [=](int x, int y, float *out) { image->read_elem_checked(x, y, out); }; -} - -void SMAABlendingWeightCalculationOperation::update_memory_buffer_partial( - MemoryBuffer *output, const rcti &out_area, Span /*inputs*/) -{ - for (BuffersIterator it = output->iterate_with({}, out_area); !it.is_end(); ++it) { - const int x = it.x; - const int y = it.y; - zero_v4(it.out); - - float edges[4]; - sample_image_fn_(x, y, edges); - - /* Edge at north */ - float c[4]; - if (edges[1] > 0.0f) { - /* Diagonals have both north and west edges, so calculating weights for them */ - /* in one of the boundaries is enough. */ - calculate_diag_weights(x, y, edges, it.out); - - /* We give priority to diagonals, so if we find a diagonal we skip. */ - /* horizontal/vertical processing. */ - if (!is_zero_v2(it.out)) { - continue; - } - - /* Find the distance to the left and the right: */ - int left = search_xleft(x, y); - int right = search_xright(x, y); - int d1 = x - left, d2 = right - x; - - /* Fetch the left and right crossing edges: */ - int e1 = 0, e2 = 0; - sample_image_fn_(left, y - 1, c); - if (c[0] > 0.0) { - e1 += 1; - } - sample_image_fn_(left, y, c); - if (c[0] > 0.0) { - e1 += 2; - } - sample_image_fn_(right + 1, y - 1, c); - if (c[0] > 0.0) { - e2 += 1; - } - sample_image_fn_(right + 1, y, c); - if (c[0] > 0.0) { - e2 += 2; - } - - /* Ok, we know how this pattern looks like, now it is time for getting */ - /* the actual area: */ - area(d1, d2, e1, e2, it.out); /* R, G */ - - /* Fix corners: */ - if (corner_rounding_) { - detect_horizontal_corner_pattern(it.out, left, right, y, d1, d2); - } - } - - /* Edge at west */ - if (edges[0] > 0.0f) { - /* Did we already do diagonal search for this west edge from the left neighboring pixel? */ - if (is_vertical_search_unneeded(x, y)) { - continue; - } - - /* Find the distance to the top and the bottom: */ - int top = search_yup(x, y); - int bottom = search_ydown(x, y); - int d1 = y - top, d2 = bottom - y; - - /* Fetch the top and bottom crossing edges: */ - int e1 = 0, e2 = 0; - sample_image_fn_(x - 1, top, c); - if (c[1] > 0.0) { - e1 += 1; - } - sample_image_fn_(x, top, c); - if (c[1] > 0.0) { - e1 += 2; - } - sample_image_fn_(x - 1, bottom + 1, c); - if (c[1] > 0.0) { - e2 += 1; - } - sample_image_fn_(x, bottom + 1, c); - if (c[1] > 0.0) { - e2 += 2; - } - - /* Get the area for this direction: */ - area(d1, d2, e1, e2, it.out + 2); /* B, A */ - - /* Fix corners: */ - if (corner_rounding_) { - detect_vertical_corner_pattern(it.out + 2, x, top, bottom, d1, d2); - } - } - } -} - -void SMAABlendingWeightCalculationOperation::get_area_of_interest(const int /*input_idx*/, - const rcti &output_area, - rcti &r_input_area) +void SMAAOperation::get_area_of_interest(const int /*input_idx*/, + const rcti &output_area, + rcti &r_input_area) { r_input_area.xmax = output_area.xmax + - fmax(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG + 1); - r_input_area.xmin = output_area.xmin - - fmax(fmax(SMAA_MAX_SEARCH_STEPS - 1, 1), SMAA_MAX_SEARCH_STEPS_DIAG + 1); - r_input_area.ymax = output_area.ymax + fmax(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG); - r_input_area.ymin = output_area.ymin - - fmax(fmax(SMAA_MAX_SEARCH_STEPS - 1, 1), SMAA_MAX_SEARCH_STEPS_DIAG); + math::max(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG + 1); + r_input_area.xmin = output_area.xmin - math::max(math::max(SMAA_MAX_SEARCH_STEPS - 1, 1), + SMAA_MAX_SEARCH_STEPS_DIAG + 1); + r_input_area.ymax = output_area.ymax + + math::max(SMAA_MAX_SEARCH_STEPS, SMAA_MAX_SEARCH_STEPS_DIAG); + r_input_area.ymin = output_area.ymin - math::max(math::max(SMAA_MAX_SEARCH_STEPS - 1, 1), + SMAA_MAX_SEARCH_STEPS_DIAG); } -/*-----------------------------------------------------------------------------*/ -/* Diagonal Search Functions */ - -int SMAABlendingWeightCalculationOperation::search_diag1(int x, int y, int dir, bool *r_found) +void SMAAOperation::update_memory_buffer(MemoryBuffer *output, + const rcti & /*area*/, + Span inputs) { - float e[4]; - int end = x + SMAA_MAX_SEARCH_STEPS_DIAG * dir; - *r_found = false; - - while (x != end) { - x += dir; - y -= dir; - sample_image_fn_(x, y, e); - if (e[1] == 0.0f) { - *r_found = true; - break; - } - if (e[0] == 0.0f) { - *r_found = true; - return (dir < 0) ? x : x - dir; - } - } - - return x - dir; -} - -int SMAABlendingWeightCalculationOperation::search_diag2(int x, int y, int dir, bool *r_found) -{ - float e[4]; - int end = x + SMAA_MAX_SEARCH_STEPS_DIAG * dir; - *r_found = false; - - while (x != end) { - x += dir; - y += dir; - sample_image_fn_(x, y, e); - if (e[1] == 0.0f) { - *r_found = true; - break; - } - sample_image_fn_(x + 1, y, e); - if (e[0] == 0.0f) { - *r_found = true; - return (dir > 0) ? x : x - dir; - } - } - - return x - dir; -} - -void SMAABlendingWeightCalculationOperation::calculate_diag_weights(int x, - int y, - const float edges[2], - float weights[2]) -{ - int d1, d2; - bool d1_found, d2_found; - float e[4], c[4]; - - zero_v2(weights); - - if (SMAA_MAX_SEARCH_STEPS_DIAG <= 0) { + const MemoryBuffer *image = inputs[0]; + if (image->is_a_single_elem()) { + copy_v4_v4(output->get_elem(0, 0), image->get_elem(0, 0)); return; } - /* Search for the line ends: */ - if (edges[0] > 0.0f) { - d1 = x - search_diag1(x, y, -1, &d1_found); - } - else { - d1 = 0; - d1_found = true; - } - d2 = search_diag1(x, y, 1, &d2_found) - x; + const int2 size = int2(image->get_width(), image->get_height()); + MemoryBuffer edges(DataType::Float2, size.x, size.y); - if (d1 + d2 > 2) { /* d1 + d2 + 1 > 3 */ - int e1 = 0, e2 = 0; + float3 luminance_coefficients; + IMB_colormanagement_get_luminance_coefficients(luminance_coefficients); - if (d1_found) { - /* Fetch the crossing edges: */ - int left = x - d1, bottom = y + d1; + threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) { + for (const int64_t y : sub_y_range) { + for (const int64_t x : IndexRange(size.x)) { + int2 texel = int2(x, y); + float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size); - sample_image_fn_(left - 1, bottom, c); - if (c[1] > 0.0) { - e1 += 2; - } - sample_image_fn_(left, bottom, c); - if (c[0] > 0.0) { - e1 += 1; + float4 offset[3]; + SMAAEdgeDetectionVS(coordinates, size, offset); + + float2 edge = SMAALumaEdgeDetectionPS(coordinates, + offset, + image, + threshold_, + luminance_coefficients, + local_contrast_adaptation_factor_); + copy_v2_v2(edges.get_elem(texel.x, texel.y), edge); } } + }); - if (d2_found) { - /* Fetch the crossing edges: */ - int right = x + d2, top = y - d2; + MemoryBuffer blending_weights(DataType::Color, size.x, size.y); - sample_image_fn_(right + 1, top, c); - if (c[1] > 0.0) { - e2 += 2; - } - sample_image_fn_(right + 1, top - 1, c); - if (c[0] > 0.0) { - e2 += 1; + MemoryBuffer area_texture(DataType::Float2, AREATEX_WIDTH, AREATEX_HEIGHT); + area_texture.copy_from(areaTexBytes, area_texture.get_rect()); + + MemoryBuffer search_texture(DataType::Value, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT); + search_texture.copy_from(searchTexBytes, search_texture.get_rect()); + + threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) { + for (const int64_t y : sub_y_range) { + for (const int64_t x : IndexRange(size.x)) { + int2 texel = int2(x, y); + float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size); + + float4 offset[3]; + float2 pixel_coordinates; + SMAABlendingWeightCalculationVS(coordinates, size, pixel_coordinates, offset); + + float4 weights = SMAABlendingWeightCalculationPS(coordinates, + pixel_coordinates, + offset, + &edges, + &area_texture, + &search_texture, + float4(0.0f), + size, + corner_rounding_); + copy_v4_v4(blending_weights.get_elem(texel.x, texel.y), weights); } } + }); - /* Fetch the areas for this line: */ - area_diag(d1, d2, e1, e2, weights); - } + threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) { + for (const int64_t y : sub_y_range) { + for (const int64_t x : IndexRange(size.x)) { + int2 texel = int2(x, y); + float2 coordinates = (float2(texel) + float2(0.5f)) / float2(size); - /* Search for the line ends: */ - d1 = x - search_diag2(x, y, -1, &d1_found); - sample_image_fn_(x + 1, y, e); - if (e[0] > 0.0f) { - d2 = search_diag2(x, y, 1, &d2_found) - x; - } - else { - d2 = 0; - d2_found = true; - } + float4 offset; + SMAANeighborhoodBlendingVS(coordinates, size, offset); - if (d1 + d2 > 2) { /* d1 + d2 + 1 > 3 */ - int e1 = 0, e2 = 0; - - if (d1_found) { - /* Fetch the crossing edges: */ - int left = x - d1, top = y - d1; - - sample_image_fn_(left - 1, top, c); - if (c[1] > 0.0) { - e1 += 2; - } - sample_image_fn_(left, top - 1, c); - if (c[0] > 0.0) { - e1 += 1; + float4 result = SMAANeighborhoodBlendingPS( + coordinates, offset, image, &blending_weights, size); + copy_v4_v4(output->get_elem(texel.x, texel.y), result); } } - - if (d2_found) { - /* Fetch the crossing edges: */ - int right = x + d2, bottom = y + d2; - - sample_image_fn_(right + 1, bottom, c); - if (c[1] > 0.0) { - e2 += 2; - } - if (c[0] > 0.0) { - e2 += 1; - } - } - - /* Fetch the areas for this line: */ - float w[2]; - area_diag(d1, d2, e1, e2, w); - weights[0] += w[1]; - weights[1] += w[0]; - } -} - -bool SMAABlendingWeightCalculationOperation::is_vertical_search_unneeded(int x, int y) -{ - int d1, d2; - bool found; - float e[4]; - - if (SMAA_MAX_SEARCH_STEPS_DIAG <= 0) { - return false; - } - - /* Search for the line ends: */ - sample_image_fn_(x - 1, y, e); - if (e[1] > 0.0f) { - d1 = x - search_diag2(x - 1, y, -1, &found); - } - else { - d1 = 0; - } - d2 = search_diag2(x - 1, y, 1, &found) - x; - - return (d1 + d2 > 2); /* d1 + d2 + 1 > 3 */ -} - -/*-----------------------------------------------------------------------------*/ -/* Horizontal/Vertical Search Functions */ - -int SMAABlendingWeightCalculationOperation::search_xleft(int x, int y) -{ - int end = x - SMAA_MAX_SEARCH_STEPS; - float e[4]; - - while (x > end) { - sample_image_fn_(x, y, e); - if (e[1] == 0.0f) { /* Is the edge not activated? */ - break; - } - if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */ - return x; - } - sample_image_fn_(x, y - 1, e); - if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */ - return x; - } - x--; - } - - return x + 1; -} - -int SMAABlendingWeightCalculationOperation::search_xright(int x, int y) -{ - int end = x + SMAA_MAX_SEARCH_STEPS; - float e[4]; - - while (x < end) { - x++; - sample_image_fn_(x, y, e); - if (e[1] == 0.0f || /* Is the edge not activated? */ - e[0] != 0.0f) /* Or is there a crossing edge that breaks the line? */ - { - break; - } - sample_image_fn_(x, y - 1, e); - if (e[0] != 0.0f) { /* Or is there a crossing edge that breaks the line? */ - break; - } - } - - return x - 1; -} - -int SMAABlendingWeightCalculationOperation::search_yup(int x, int y) -{ - int end = y - SMAA_MAX_SEARCH_STEPS; - float e[4]; - - while (y > end) { - sample_image_fn_(x, y, e); - if (e[0] == 0.0f) { /* Is the edge not activated? */ - break; - } - if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */ - return y; - } - sample_image_fn_(x - 1, y, e); - if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */ - return y; - } - y--; - } - - return y + 1; -} - -int SMAABlendingWeightCalculationOperation::search_ydown(int x, int y) -{ - int end = y + SMAA_MAX_SEARCH_STEPS; - float e[4]; - - while (y < end) { - y++; - sample_image_fn_(x, y, e); - if (e[0] == 0.0f || /* Is the edge not activated? */ - e[1] != 0.0f) /* Or is there a crossing edge that breaks the line? */ - { - break; - } - sample_image_fn_(x - 1, y, e); - if (e[1] != 0.0f) { /* Or is there a crossing edge that breaks the line? */ - break; - } - } - - return y - 1; -} - -/*-----------------------------------------------------------------------------*/ -/* Corner Detection Functions */ - -void SMAABlendingWeightCalculationOperation::detect_horizontal_corner_pattern( - float weights[2], int left, int right, int y, int d1, int d2) -{ - float factor[2] = {1.0f, 1.0f}; - float rounding = corner_rounding_ / 100.0f; - float e[4]; - - /* Reduce blending for pixels in the center of a line. */ - rounding *= (d1 == d2) ? 0.5f : 1.0f; - - /* Near the left corner */ - if (d1 <= d2) { - sample_image_fn_(left, y + 1, e); - factor[0] -= rounding * e[0]; - sample_image_fn_(left, y - 2, e); - factor[1] -= rounding * e[0]; - } - /* Near the right corner */ - if (d1 >= d2) { - sample_image_fn_(right + 1, y + 1, e); - factor[0] -= rounding * e[0]; - sample_image_fn_(right + 1, y - 2, e); - factor[1] -= rounding * e[0]; - } - - weights[0] *= std::clamp(factor[0], 0.0f, 1.0f); - weights[1] *= std::clamp(factor[1], 0.0f, 1.0f); -} - -void SMAABlendingWeightCalculationOperation::detect_vertical_corner_pattern( - float weights[2], int x, int top, int bottom, int d1, int d2) -{ - float factor[2] = {1.0f, 1.0f}; - float rounding = corner_rounding_ / 100.0f; - float e[4]; - - /* Reduce blending for pixels in the center of a line. */ - rounding *= (d1 == d2) ? 0.5f : 1.0f; - - /* Near the top corner */ - if (d1 <= d2) { - sample_image_fn_(x + 1, top, e); - factor[0] -= rounding * e[1]; - sample_image_fn_(x - 2, top, e); - factor[1] -= rounding * e[1]; - } - /* Near the bottom corner */ - if (d1 >= d2) { - sample_image_fn_(x + 1, bottom + 1, e); - factor[0] -= rounding * e[1]; - sample_image_fn_(x - 2, bottom + 1, e); - factor[1] -= rounding * e[1]; - } - - weights[0] *= std::clamp(factor[0], 0.0f, 1.0f); - weights[1] *= std::clamp(factor[1], 0.0f, 1.0f); -} - -/*-----------------------------------------------------------------------------*/ -/* Neighborhood Blending (Third Pass) */ -/*-----------------------------------------------------------------------------*/ - -SMAANeighborhoodBlendingOperation::SMAANeighborhoodBlendingOperation() -{ - this->add_input_socket(DataType::Color); /* image */ - this->add_input_socket(DataType::Color); /* blend */ - this->add_output_socket(DataType::Color); - flags_.can_be_constant = true; -} - -void SMAANeighborhoodBlendingOperation::update_memory_buffer_partial(MemoryBuffer *output, - const rcti &out_area, - Span inputs) -{ - MemoryBuffer *image1 = inputs[0]; - MemoryBuffer *image2 = inputs[1]; - for (BuffersIterator it = output->iterate_with({}, out_area); !it.is_end(); ++it) { - const float x = it.x; - const float y = it.y; - float w[4]; - - /* Fetch the blending weights for current pixel: */ - image2->read_elem_checked(x, y, w); - const float left = w[2], top = w[0]; - image2->read_elem_checked(x + 1, y, w); - const float right = w[3]; - image2->read_elem_checked(x, y + 1, w); - const float bottom = w[1]; - - /* Is there any blending weight with a value greater than 0.0? */ - if (right + bottom + left + top < 1e-5f) { - image1->read_elem_checked(x, y, it.out); - continue; - } - - /* Calculate the blending offsets: */ - void (*sample_fn)(MemoryBuffer *reader, int x, int y, float xoffset, float color[4]); - float offset1, offset2, weight1, weight2, color1[4], color2[4]; - - if (fmaxf(right, left) > fmaxf(bottom, top)) { /* `max(horizontal) > max(vertical)` */ - sample_fn = sample_bilinear_horizontal; - offset1 = right; - offset2 = -left; - weight1 = right / (right + left); - weight2 = left / (right + left); - } - else { - sample_fn = sample_bilinear_vertical; - offset1 = bottom; - offset2 = -top; - weight1 = bottom / (bottom + top); - weight2 = top / (bottom + top); - } - - /* We exploit bilinear filtering to mix current pixel with the chosen neighbor: */ - sample_fn(image1, x, y, offset1, color1); - sample_fn(image1, x, y, offset2, color2); - - mul_v4_v4fl(it.out, color1, weight1); - madd_v4_v4fl(it.out, color2, weight2); - } -} - -void SMAANeighborhoodBlendingOperation::get_area_of_interest(const int /*input_idx*/, - const rcti &output_area, - rcti &r_input_area) -{ - r_input_area = output_area; - expand_area_for_sampler(r_input_area, PixelSampler::Bilinear); + }); } } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_SMAAOperation.h b/source/blender/compositor/operations/COM_SMAAOperation.h index 845611705e2..3f379e30716 100644 --- a/source/blender/compositor/operations/COM_SMAAOperation.h +++ b/source/blender/compositor/operations/COM_SMAAOperation.h @@ -4,89 +4,38 @@ #pragma once -#include "COM_MultiThreadedOperation.h" +#include "COM_NodeOperation.h" namespace blender::compositor { -/*-----------------------------------------------------------------------------*/ -/* Edge Detection (First Pass) */ - -class SMAAEdgeDetectionOperation : public MultiThreadedOperation { +class SMAAOperation : public NodeOperation { protected: - float threshold_; - float contrast_limit_; + float threshold_ = 0.1f; + float local_contrast_adaptation_factor_ = 2.0f; + int corner_rounding_ = 25; public: - SMAAEdgeDetectionOperation(); + SMAAOperation(); - void set_threshold(float threshold); + void set_threshold(float threshold) + { + threshold_ = threshold; + } - void set_local_contrast_adaptation_factor(float factor); + void set_local_contrast_adaptation_factor(float factor) + { + local_contrast_adaptation_factor_ = factor; + } + + void set_corner_rounding(int corner_rounding) + { + corner_rounding_ = corner_rounding; + } void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override; - void update_memory_buffer_partial(MemoryBuffer *output, - const rcti &area, - Span inputs) override; -}; - -/*-----------------------------------------------------------------------------*/ -/* Blending Weight Calculation (Second Pass) */ - -class SMAABlendingWeightCalculationOperation : public MultiThreadedOperation { - private: - std::function sample_image_fn_; - int corner_rounding_; - - public: - SMAABlendingWeightCalculationOperation(); - - void set_corner_rounding(float rounding); - - void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override; - void update_memory_buffer_started(MemoryBuffer *output, - const rcti &area, - Span inputs) override; - void update_memory_buffer_partial(MemoryBuffer *output, - const rcti &area, - Span inputs) override; - - private: - /* Diagonal Search Functions */ - /** - * These functions allows to perform diagonal pattern searches. - */ - int search_diag1(int x, int y, int dir, bool *r_found); - int search_diag2(int x, int y, int dir, bool *r_found); - /** - * This searches for diagonal patterns and returns the corresponding weights. - */ - void calculate_diag_weights(int x, int y, const float edges[2], float weights[2]); - bool is_vertical_search_unneeded(int x, int y); - - /* Horizontal/Vertical Search Functions */ - int search_xleft(int x, int y); - int search_xright(int x, int y); - int search_yup(int x, int y); - int search_ydown(int x, int y); - - /* Corner Detection Functions */ - void detect_horizontal_corner_pattern( - float weights[2], int left, int right, int y, int d1, int d2); - void detect_vertical_corner_pattern( - float weights[2], int x, int top, int bottom, int d1, int d2); -}; - -/*-----------------------------------------------------------------------------*/ -/* Neighborhood Blending (Third Pass) */ - -class SMAANeighborhoodBlendingOperation : public MultiThreadedOperation { - public: - SMAANeighborhoodBlendingOperation(); - - void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override; - void update_memory_buffer_partial(MemoryBuffer *output, - const rcti &area, - Span inputs) override; + void update_memory_buffer(MemoryBuffer *output, + const rcti &area, + Span inputs) override; }; } // namespace blender::compositor diff --git a/source/blender/compositor/operations/COM_TranslateOperation.h b/source/blender/compositor/operations/COM_TranslateOperation.h index 0179cc62c95..5beaa234093 100644 --- a/source/blender/compositor/operations/COM_TranslateOperation.h +++ b/source/blender/compositor/operations/COM_TranslateOperation.h @@ -7,6 +7,8 @@ #include "COM_ConstantOperation.h" #include "COM_MultiThreadedOperation.h" +#include + namespace blender::compositor { class TranslateOperation : public MultiThreadedOperation { @@ -21,6 +23,8 @@ class TranslateOperation : public MultiThreadedOperation { bool is_delta_set_; bool is_relative_; + std::mutex mutex_; + protected: MemoryBufferExtend x_extend_mode_; MemoryBufferExtend y_extend_mode_; @@ -50,6 +54,11 @@ class TranslateOperation : public MultiThreadedOperation { inline void ensure_delta() { if (!is_delta_set_) { + std::unique_lock lock(mutex_); + if (is_delta_set_) { + return; + } + delta_x_ = get_input_operation(X_INPUT_INDEX)->get_constant_value_default(0.0f); delta_y_ = get_input_operation(Y_INPUT_INDEX)->get_constant_value_default(0.0f); if (get_is_relative()) { diff --git a/source/blender/compositor/realtime_compositor/cached_resources/intern/cached_image.cc b/source/blender/compositor/realtime_compositor/cached_resources/intern/cached_image.cc index 2121f13a1e6..29ed5b0909d 100644 --- a/source/blender/compositor/realtime_compositor/cached_resources/intern/cached_image.cc +++ b/source/blender/compositor/realtime_compositor/cached_resources/intern/cached_image.cc @@ -15,6 +15,7 @@ #include "GPU_shader.hh" #include "GPU_texture.hh" +#include "IMB_colormanagement.hh" #include "IMB_imbuf.hh" #include "IMB_imbuf_types.hh" @@ -56,74 +57,6 @@ bool operator==(const CachedImageKey &a, const CachedImageKey &b) * Cached Image. */ -/* Returns a new texture of the given format and precision preprocessed using the given shader. The - * input texture is freed. */ -static GPUTexture *preprocess_texture(Context &context, - GPUTexture *input_texture, - eGPUTextureFormat target_format, - ResultPrecision precision, - const char *shader_name) -{ - const int2 size = int2(GPU_texture_width(input_texture), GPU_texture_height(input_texture)); - - GPUTexture *preprocessed_texture = GPU_texture_create_2d( - "Cached Image", size.x, size.y, 1, target_format, GPU_TEXTURE_USAGE_GENERAL, nullptr); - - GPUShader *shader = context.get_shader(shader_name, precision); - GPU_shader_bind(shader); - - const int input_unit = GPU_shader_get_sampler_binding(shader, "input_tx"); - GPU_texture_bind(input_texture, input_unit); - - const int image_unit = GPU_shader_get_sampler_binding(shader, "output_img"); - GPU_texture_image_bind(preprocessed_texture, image_unit); - - compute_dispatch_threads_at_least(shader, size); - - GPU_shader_unbind(); - GPU_texture_unbind(input_texture); - GPU_texture_image_unbind(preprocessed_texture); - GPU_texture_free(input_texture); - - return preprocessed_texture; -} - -/* Compositor images are expected to be always pre-multiplied, so identify if the GPU texture - * returned by the IMB module is straight and needs to be pre-multiplied. An exception is when - * the image has an alpha mode of channel packed or alpha ignore, in which case, we always ignore - * pre-multiplication. */ -static bool should_premultiply_alpha(Image *image, ImBuf *image_buffer) -{ - if (ELEM(image->alpha_mode, IMA_ALPHA_CHANNEL_PACKED, IMA_ALPHA_IGNORE)) { - return false; - } - - return !BKE_image_has_gpu_texture_premultiplied_alpha(image, image_buffer); -} - -/* Get a suitable texture format supported by the compositor given the format of the texture - * returned by the IMB module. See imb_gpu_get_format for the formats that needs to be handled. */ -static eGPUTextureFormat get_compatible_texture_format(eGPUTextureFormat original_format) -{ - switch (original_format) { - case GPU_R16F: - case GPU_R32F: - case GPU_RGBA16F: - case GPU_RGBA32F: - return original_format; - case GPU_R8: - return GPU_R16F; - case GPU_RGBA8: - case GPU_SRGB8_A8: - return GPU_RGBA16F; - default: - break; - } - - BLI_assert_unreachable(); - return original_format; -} - /* Get the selected render layer selected assuming the image is a multilayer image. */ static RenderLayer *get_render_layer(Image *image, ImageUser &image_user) { @@ -205,6 +138,56 @@ static ImageUser compute_image_user_for_pass(Context &context, return image_user_for_pass; } +/* The image buffer might be stored as an sRGB 8-bit image, while the compositor expects linear + * float images, so compute a linear float buffer for the image buffer. This will also do linear + * space conversion and alpha pre-multiplication as needed. We could store those images in sRGB GPU + * textures and let the GPU do the linear space conversion, but the issues is that we don't control + * how the GPU does the conversion and so we get tiny differences across CPU and GPU compositing, + * and potentially even across GPUs/Drivers. Further, if alpha pre-multiplication is needed, we + * would need to do it ourself, which means alpha pre-multiplication will happen before linear + * space conversion, which would produce yet another difference. So we just do everything on the + * CPU, since this is already a cached resource. + * + * To avoid conflicts with other threads, create a new image buffer and assign all the necessary + * information to it, with IB_DO_NOT_TAKE_OWNERSHIP for buffers since a deep copy is not needed. + * + * The caller should free the returned image buffer. */ +static ImBuf *compute_linear_buffer(ImBuf *image_buffer) +{ + /* Do not pass the flags to the allocation function to avoid buffer allocation, but assign them + * after to retain important information like precision and alpha mode. */ + ImBuf *linear_image_buffer = IMB_allocImBuf( + image_buffer->x, image_buffer->y, image_buffer->planes, 0); + linear_image_buffer->flags = image_buffer->flags; + + /* Assign the float buffer if it exists, as well as its number of channels. */ + IMB_assign_float_buffer( + linear_image_buffer, image_buffer->float_buffer, IB_DO_NOT_TAKE_OWNERSHIP); + linear_image_buffer->channels = image_buffer->channels; + + /* If no float buffer exists, assign it then compute a float buffer from it. This is the main + * call of this function. */ + if (!linear_image_buffer->float_buffer.data) { + IMB_assign_byte_buffer( + linear_image_buffer, image_buffer->byte_buffer, IB_DO_NOT_TAKE_OWNERSHIP); + IMB_float_from_rect(linear_image_buffer); + } + + /* If the image buffer contained compressed data, assign them as well, but only if the color + * space of the buffer is linear or data, since we need linear data and can't preprocess the + * compressed buffer. If not, we fallback to the float buffer already assigned, which is + * guaranteed to exist as a fallback for compressed textures. */ + const bool is_suitable_compressed_color_space = + IMB_colormanagement_space_is_data(image_buffer->byte_buffer.colorspace) || + IMB_colormanagement_space_is_scene_linear(image_buffer->byte_buffer.colorspace); + if (image_buffer->ftype == IMB_FTYPE_DDS && is_suitable_compressed_color_space) { + linear_image_buffer->ftype = IMB_FTYPE_DDS; + IMB_assign_dds_data(linear_image_buffer, image_buffer->dds_data, IB_DO_NOT_TAKE_OWNERSHIP); + } + + return linear_image_buffer; +} + CachedImage::CachedImage(Context &context, Image *image, ImageUser *image_user, @@ -227,34 +210,12 @@ CachedImage::CachedImage(Context &context, context, image, image_user, pass_name); ImBuf *image_buffer = BKE_image_acquire_ibuf(image, &image_user_for_pass, nullptr); - const bool is_premultiplied = BKE_image_has_gpu_texture_premultiplied_alpha(image, image_buffer); - texture_ = IMB_create_gpu_texture("Image Texture", image_buffer, true, is_premultiplied); + ImBuf *linear_image_buffer = compute_linear_buffer(image_buffer); + + texture_ = IMB_create_gpu_texture("Image Texture", linear_image_buffer, true, true); GPU_texture_update_mipmap_chain(texture_); - const eGPUTextureFormat original_format = GPU_texture_format(texture_); - const eGPUTextureFormat target_format = get_compatible_texture_format(original_format); - const ResultType result_type = Result::type(target_format); - const ResultPrecision precision = Result::precision(target_format); - - /* The GPU image returned by the IMB module can be in a format not supported by the compositor, - * or it might need pre-multiplication, so preprocess them first. */ - if (result_type == ResultType::Color && should_premultiply_alpha(image, image_buffer)) { - texture_ = preprocess_texture( - context, texture_, target_format, precision, "compositor_premultiply_alpha"); - } - else if (original_format != target_format) { - const char *conversion_shader_name = result_type == ResultType::Float ? - "compositor_convert_float_to_float" : - "compositor_convert_color_to_color"; - texture_ = preprocess_texture( - context, texture_, target_format, precision, conversion_shader_name); - } - - /* Set the alpha to 1 using swizzling if alpha is ignored. */ - if (result_type == ResultType::Color && image->alpha_mode == IMA_ALPHA_IGNORE) { - GPU_texture_swizzle_set(texture_, "rgb1"); - } - + IMB_freeImBuf(linear_image_buffer); BKE_image_release_ibuf(image, image_buffer, nullptr); } diff --git a/source/blender/editors/animation/anim_filter.cc b/source/blender/editors/animation/anim_filter.cc index 9c8d4699c7d..8f4820f4542 100644 --- a/source/blender/editors/animation/anim_filter.cc +++ b/source/blender/editors/animation/anim_filter.cc @@ -3674,138 +3674,136 @@ size_t ANIM_animdata_filter(bAnimContext *ac, void *data, eAnimCont_Types datatype) { + if (!data || !anim_data) { + return 0; + } + size_t items = 0; + switch (datatype) { + /* Action-Editing Modes */ + case ANIMCONT_ACTION: /* 'Action Editor' */ + { + Object *obact = ac->obact; + SpaceAction *saction = (SpaceAction *)ac->sl; + bDopeSheet *ads = (saction) ? &saction->ads : nullptr; - /* only filter data if there's somewhere to put it */ - if (data && anim_data) { - /* firstly filter the data */ - switch (datatype) { - /* Action-Editing Modes */ - case ANIMCONT_ACTION: /* 'Action Editor' */ - { - Object *obact = ac->obact; - SpaceAction *saction = (SpaceAction *)ac->sl; - bDopeSheet *ads = (saction) ? &saction->ads : nullptr; - - /* specially check for AnimData filter, see #36687. */ - if (UNLIKELY(filter_mode & ANIMFILTER_ANIMDATA)) { - /* all channels here are within the same AnimData block, hence this special case */ - if (LIKELY(obact->adt)) { - ANIMCHANNEL_NEW_CHANNEL(obact->adt, ANIMTYPE_ANIMDATA, (ID *)obact, nullptr); - } + /* specially check for AnimData filter, see #36687. */ + if (UNLIKELY(filter_mode & ANIMFILTER_ANIMDATA)) { + /* all channels here are within the same AnimData block, hence this special case */ + if (LIKELY(obact->adt)) { + ANIMCHANNEL_NEW_CHANNEL(obact->adt, ANIMTYPE_ANIMDATA, (ID *)obact, nullptr); } - else { - /* The check for the DopeSheet summary is included here - * since the summary works here too. */ - if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) { - items += animfilter_action( - ac, anim_data, ads, static_cast(data), filter_mode, (ID *)obact); - } - } - - break; } - case ANIMCONT_SHAPEKEY: /* 'ShapeKey Editor' */ - { - Key *key = (Key *)data; - - /* specially check for AnimData filter, see #36687. */ - if (UNLIKELY(filter_mode & ANIMFILTER_ANIMDATA)) { - /* all channels here are within the same AnimData block, hence this special case */ - if (LIKELY(key->adt)) { - ANIMCHANNEL_NEW_CHANNEL(key->adt, ANIMTYPE_ANIMDATA, (ID *)key, nullptr); - } - } - else { - /* The check for the DopeSheet summary is included here - * since the summary works here too. */ - if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) { - items = animdata_filter_shapekey(ac, anim_data, key, filter_mode); - } - } - - break; - } - - /* Modes for Specialty Data Types (i.e. not keyframes) */ - case ANIMCONT_GPENCIL: { + else { + /* The check for the DopeSheet summary is included here + * since the summary works here too. */ if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) { - if (U.experimental.use_grease_pencil_version3) { - items = animdata_filter_grease_pencil(ac, anim_data, filter_mode); - } - else { - items = animdata_filter_gpencil_legacy(ac, anim_data, data, filter_mode); - } + items += animfilter_action( + ac, anim_data, ads, static_cast(data), filter_mode, (ID *)obact); } - break; } - case ANIMCONT_MASK: { + + break; + } + case ANIMCONT_SHAPEKEY: /* 'ShapeKey Editor' */ + { + Key *key = (Key *)data; + + /* specially check for AnimData filter, see #36687. */ + if (UNLIKELY(filter_mode & ANIMFILTER_ANIMDATA)) { + /* all channels here are within the same AnimData block, hence this special case */ + if (LIKELY(key->adt)) { + ANIMCHANNEL_NEW_CHANNEL(key->adt, ANIMTYPE_ANIMDATA, (ID *)key, nullptr); + } + } + else { + /* The check for the DopeSheet summary is included here + * since the summary works here too. */ if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) { - items = animdata_filter_mask(ac->bmain, anim_data, data, filter_mode); + items = animdata_filter_shapekey(ac, anim_data, key, filter_mode); } - break; } - /* DopeSheet Based Modes */ - case ANIMCONT_DOPESHEET: /* 'DopeSheet Editor' */ - { - /* the DopeSheet editor is the primary place where the DopeSheet summaries are useful */ - if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) { - items += animdata_filter_dopesheet( - ac, anim_data, static_cast(data), filter_mode); - } - break; - } - case ANIMCONT_FCURVES: /* Graph Editor -> F-Curves/Animation Editing */ - case ANIMCONT_DRIVERS: /* Graph Editor -> Drivers Editing */ - case ANIMCONT_NLA: /* NLA Editor */ - { - /* all of these editors use the basic DopeSheet data for filtering options, - * but don't have all the same features */ - items = animdata_filter_dopesheet( - ac, anim_data, static_cast(data), filter_mode); - break; - } - - /* Timeline Mode - Basically the same as dopesheet, - * except we only have the summary for now */ - case ANIMCONT_TIMELINE: { - /* the DopeSheet editor is the primary place where the DopeSheet summaries are useful */ - if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) { - items += animdata_filter_dopesheet( - ac, anim_data, static_cast(data), filter_mode); - } - break; - } - - /* Special/Internal Use */ - case ANIMCONT_CHANNEL: /* animation channel */ - { - bDopeSheet *ads = ac->ads; - - /* based on the channel type, filter relevant data for this */ - items = animdata_filter_animchan( - ac, anim_data, ads, static_cast(data), filter_mode); - break; - } - - /* unhandled */ - default: { - printf("ANIM_animdata_filter() - Invalid datatype argument %i\n", datatype); - break; - } + break; } - /* remove any 'weedy' entries */ - items = animdata_filter_remove_invalid(anim_data); + /* Modes for Specialty Data Types (i.e. not keyframes) */ + case ANIMCONT_GPENCIL: { + if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) { + if (U.experimental.use_grease_pencil_version3) { + items = animdata_filter_grease_pencil(ac, anim_data, filter_mode); + } + else { + items = animdata_filter_gpencil_legacy(ac, anim_data, data, filter_mode); + } + } + break; + } + case ANIMCONT_MASK: { + if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) { + items = animdata_filter_mask(ac->bmain, anim_data, data, filter_mode); + } + break; + } - /* remove duplicates (if required) */ - if (filter_mode & ANIMFILTER_NODUPLIS) { - items = animdata_filter_remove_duplis(anim_data); + /* DopeSheet Based Modes */ + case ANIMCONT_DOPESHEET: /* 'DopeSheet Editor' */ + { + /* the DopeSheet editor is the primary place where the DopeSheet summaries are useful */ + if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) { + items += animdata_filter_dopesheet( + ac, anim_data, static_cast(data), filter_mode); + } + break; + } + case ANIMCONT_FCURVES: /* Graph Editor -> F-Curves/Animation Editing */ + case ANIMCONT_DRIVERS: /* Graph Editor -> Drivers Editing */ + case ANIMCONT_NLA: /* NLA Editor */ + { + /* all of these editors use the basic DopeSheet data for filtering options, + * but don't have all the same features */ + items = animdata_filter_dopesheet( + ac, anim_data, static_cast(data), filter_mode); + break; + } + + /* Timeline Mode - Basically the same as dopesheet, + * except we only have the summary for now */ + case ANIMCONT_TIMELINE: { + /* the DopeSheet editor is the primary place where the DopeSheet summaries are useful */ + if (animdata_filter_dopesheet_summary(ac, anim_data, filter_mode, &items)) { + items += animdata_filter_dopesheet( + ac, anim_data, static_cast(data), filter_mode); + } + break; + } + + /* Special/Internal Use */ + case ANIMCONT_CHANNEL: /* animation channel */ + { + bDopeSheet *ads = ac->ads; + + /* based on the channel type, filter relevant data for this */ + items = animdata_filter_animchan( + ac, anim_data, ads, static_cast(data), filter_mode); + break; + } + + /* unhandled */ + default: { + printf("ANIM_animdata_filter() - Invalid datatype argument %i\n", datatype); + break; } } - /* return the number of items in the list */ + /* remove any 'weedy' entries */ + items = animdata_filter_remove_invalid(anim_data); + + /* remove duplicates (if required) */ + if (filter_mode & ANIMFILTER_NODUPLIS) { + items = animdata_filter_remove_duplis(anim_data); + } + return items; } diff --git a/source/blender/editors/asset/intern/asset_ops.cc b/source/blender/editors/asset/intern/asset_ops.cc index 99150ad06ef..5d9d1c740db 100644 --- a/source/blender/editors/asset/intern/asset_ops.cc +++ b/source/blender/editors/asset/intern/asset_ops.cc @@ -582,7 +582,7 @@ static void ASSET_OT_catalog_delete(wmOperatorType *ot) static asset_system::AssetCatalogService *get_catalog_service(bContext *C) { const SpaceFile *sfile = CTX_wm_space_file(C); - if (!sfile) { + if (!sfile || ED_fileselect_is_file_browser(sfile)) { return nullptr; } diff --git a/source/blender/editors/grease_pencil/intern/grease_pencil_edit.cc b/source/blender/editors/grease_pencil/intern/grease_pencil_edit.cc index 57f1e1696d8..3524274ef81 100644 --- a/source/blender/editors/grease_pencil/intern/grease_pencil_edit.cc +++ b/source/blender/editors/grease_pencil/intern/grease_pencil_edit.cc @@ -1406,7 +1406,7 @@ static void GREASE_PENCIL_OT_clean_loose(wmOperatorType *ot) ot->invoke = WM_operator_props_popup_confirm; ot->exec = grease_pencil_clean_loose_exec; - ot->poll = editable_grease_pencil_poll; + ot->poll = active_grease_pencil_layer_poll; ot->flag = OPTYPE_REGISTER | OPTYPE_UNDO; diff --git a/source/blender/editors/io/io_cache.cc b/source/blender/editors/io/io_cache.cc index 49ac8f29383..4884318f952 100644 --- a/source/blender/editors/io/io_cache.cc +++ b/source/blender/editors/io/io_cache.cc @@ -53,8 +53,8 @@ static int cachefile_open_invoke(bContext *C, wmOperator *op, const wmEvent * /* char filepath[FILE_MAX]; Main *bmain = CTX_data_main(C); - STRNCPY(filepath, BKE_main_blendfile_path(bmain)); - BLI_path_extension_replace(filepath, sizeof(filepath), ".abc"); + /* Default to the same directory as the blend file. */ + BLI_path_split_dir_part(BKE_main_blendfile_path(bmain), filepath, sizeof(filepath)); RNA_string_set(op->ptr, "filepath", filepath); } @@ -119,7 +119,7 @@ void CACHEFILE_OT_open(wmOperatorType *ot) ot->cancel = open_cancel; WM_operator_properties_filesel(ot, - FILE_TYPE_ALEMBIC | FILE_TYPE_FOLDER, + FILE_TYPE_ALEMBIC | FILE_TYPE_USD | FILE_TYPE_FOLDER, FILE_BLENDER, FILE_OPENFILE, WM_FILESEL_FILEPATH | WM_FILESEL_RELPATH, @@ -163,8 +163,8 @@ static int cachefile_layer_open_invoke(bContext *C, wmOperator *op, const wmEven char filepath[FILE_MAX]; Main *bmain = CTX_data_main(C); - STRNCPY(filepath, BKE_main_blendfile_path(bmain)); - BLI_path_extension_replace(filepath, sizeof(filepath), ".abc"); + /* Default to the same directory as the blend file. */ + BLI_path_split_dir_part(BKE_main_blendfile_path(bmain), filepath, sizeof(filepath)); RNA_string_set(op->ptr, "filepath", filepath); } @@ -215,7 +215,7 @@ void CACHEFILE_OT_layer_add(wmOperatorType *ot) ot->exec = cachefile_layer_add_exec; WM_operator_properties_filesel(ot, - FILE_TYPE_ALEMBIC | FILE_TYPE_FOLDER, + FILE_TYPE_ALEMBIC | FILE_TYPE_USD | FILE_TYPE_FOLDER, FILE_BLENDER, FILE_OPENFILE, WM_FILESEL_FILEPATH | WM_FILESEL_RELPATH, diff --git a/source/blender/editors/mesh/editmesh_tools.cc b/source/blender/editors/mesh/editmesh_tools.cc index 37c7c5c9628..673c69d1cd1 100644 --- a/source/blender/editors/mesh/editmesh_tools.cc +++ b/source/blender/editors/mesh/editmesh_tools.cc @@ -3584,7 +3584,10 @@ static int edbm_remove_doubles_exec(bContext *C, wmOperator *op) } } - BKE_reportf(op->reports, RPT_INFO, "Removed %d vertice(s)", count_multi); + BKE_reportf(op->reports, + RPT_INFO, + count_multi == 1 ? "Removed %d vertex" : "Removed %d vertices", + count_multi); return OPERATOR_FINISHED; } diff --git a/source/blender/editors/object/object_edit.cc b/source/blender/editors/object/object_edit.cc index 93fbd07f300..9805b9fc286 100644 --- a/source/blender/editors/object/object_edit.cc +++ b/source/blender/editors/object/object_edit.cc @@ -1613,7 +1613,6 @@ static int shade_smooth_exec(bContext *C, wmOperator *op) const float angle = RNA_float_get(op->ptr, "angle"); bke::mesh_sharp_edges_set_from_angle(mesh, angle, keep_sharp_edges); } - mesh.tag_sharpness_changed(); BKE_mesh_batch_cache_dirty_tag(static_cast(ob->data), BKE_MESH_BATCH_DIRTY_ALL); changed = true; } diff --git a/source/blender/editors/space_view3d/view3d_gizmo_preselect_type.cc b/source/blender/editors/space_view3d/view3d_gizmo_preselect_type.cc index 01f1ed1e320..f5738fd6340 100644 --- a/source/blender/editors/space_view3d/view3d_gizmo_preselect_type.cc +++ b/source/blender/editors/space_view3d/view3d_gizmo_preselect_type.cc @@ -506,7 +506,7 @@ void ED_view3d_gizmo_mesh_preselect_get_active(const bContext *C, /* weak, allocate an array just to access the index. */ Base *base = nullptr; Object *obedit = nullptr; - { + if (object_index != -1) { Vector bases = BKE_view_layer_array_from_bases_in_edit_mode( scene, view_layer, CTX_wm_view3d(C)); if (object_index < bases.size()) { diff --git a/source/blender/editors/transform/transform_ops.cc b/source/blender/editors/transform/transform_ops.cc index c43f6c5621f..6e1345d8e3d 100644 --- a/source/blender/editors/transform/transform_ops.cc +++ b/source/blender/editors/transform/transform_ops.cc @@ -705,11 +705,15 @@ void Transform_Properties(wmOperatorType *ot, int flags) RNA_def_boolean(ot->srna, "use_snap_project", false, "Project Individual Elements", ""); - /* TODO(@gfxcoder): Rename `snap_target` to `snap_source` to avoid previous ambiguity of - * "target" (now, "source" is geometry to be moved and "target" is geometry to which moved - * geometry is snapped). Use "Source snap point" and "Point on source that will snap to - * target" for name and description, respectively. */ - prop = RNA_def_enum(ot->srna, "snap_target", rna_enum_snap_source_items, 0, "Snap With", ""); + /* TODO(@gfxcoder): Rename `snap_target` to `snap_base` to avoid previous ambiguity of + * "target" (now, "base" or "source" is geometry to be moved and "target" is geometry to + * which moved geometry is snapped). */ + prop = RNA_def_enum(ot->srna, + "snap_target", + rna_enum_snap_source_items, + 0, + "Snap Base", + "Point on source that will snap to target"); RNA_def_property_flag(prop, PROP_HIDDEN); /* Target selection. */ diff --git a/source/blender/geometry/GEO_join_geometries.hh b/source/blender/geometry/GEO_join_geometries.hh index 8003cf5028b..0bdb179c5db 100644 --- a/source/blender/geometry/GEO_join_geometries.hh +++ b/source/blender/geometry/GEO_join_geometries.hh @@ -13,6 +13,6 @@ bke::GeometrySet join_geometries(Span geometries, const bke::AnonymousAttributePropagationInfo &propagation_info); void join_attributes(Span src_components, - bke::GeometryComponent &r_result, - Span ignored_attributes = {}); + Span ignored_attributes, + bke::GeometryComponent &r_result); } // namespace blender::geometry diff --git a/source/blender/geometry/GEO_realize_instances.hh b/source/blender/geometry/GEO_realize_instances.hh index 08a0d8a6da1..fe69ecddb5a 100644 --- a/source/blender/geometry/GEO_realize_instances.hh +++ b/source/blender/geometry/GEO_realize_instances.hh @@ -62,16 +62,8 @@ bke::GeometrySet realize_instances(bke::GeometrySet geometry_set, const RealizeInstancesOptions &options); /** - * Join all instances into a single geometry component for each geometry type. For example, all - * mesh instances (including the already realized mesh) are joined into a single mesh. The output - * geometry set does not contain any instances. If the input did not contain any instances, it is - * returned directly. - * - * The `id` attribute has special handling. If there is an id attribute on any component, the - * output will contain an `id` attribute as well. The output id is generated by mixing/hashing ids - * of instances and of the instanced geometry data. - * - * Will realize only the instances chosen by varied_depth_option to there chosen depth. + * Same #realize_instances but will realize only the instances chosen by + * varied_depth_option to there chosen depth. */ bke::GeometrySet realize_instances(bke::GeometrySet geometry_set, const RealizeInstancesOptions &options, diff --git a/source/blender/geometry/intern/join_geometries.cc b/source/blender/geometry/intern/join_geometries.cc index 92912e9fa1a..1b1ac578dc8 100644 --- a/source/blender/geometry/intern/join_geometries.cc +++ b/source/blender/geometry/intern/join_geometries.cc @@ -74,8 +74,8 @@ static void fill_new_attribute(const Span src_compone } void join_attributes(const Span src_components, - GeometryComponent &r_result, - const Span ignored_attributes) + const Span ignored_attributes, + GeometryComponent &r_result) { const Map info = get_final_attribute_info(src_components, ignored_attributes); @@ -129,7 +129,7 @@ static void join_instances(const Span src_components, r_result.replace_instances(dst_instances.release()); auto &dst_component = r_result.get_component_for_write(); - join_attributes(src_components, dst_component, {".reference_index"}); + join_attributes(src_components, {".reference_index"}, dst_component); } static void join_volumes(const Span /*src_components*/, diff --git a/source/blender/geometry/intern/realize_instances.cc b/source/blender/geometry/intern/realize_instances.cc index 45b6d3c1285..27a735a5516 100644 --- a/source/blender/geometry/intern/realize_instances.cc +++ b/source/blender/geometry/intern/realize_instances.cc @@ -298,7 +298,7 @@ struct InstanceContext { curves(gather_info.curves.attributes.size()), instances(gather_info.instances_attriubutes.size()) { - //empty + // empty } }; @@ -753,7 +753,7 @@ static void gather_realize_tasks_recursive(GatherTasksInfo &gather_info, * is an instance, the condition is true only when the depth is exactly 0. Additionally, the * function extends its operation to instances if any of their nested children meet the first * condition. - * + * * Based on bke::GeometrySet::attribute_foreach */ static bool attribute_foreach(const bke::GeometrySet &geometry_set, @@ -823,12 +823,11 @@ static bool attribute_foreach(const bke::GeometrySet &geometry_set, return is_relevant; } - /** * Based on bke::GeometrySet::gather_attributes_for_propagation. * Specialized for Specialized attribute_foreach to get: * current_depth, depth_target, instance_depth and selection. -*/ + */ void static gather_attributes_for_propagation( bke::GeometrySet re_geometry_set, const Span component_types, @@ -869,7 +868,8 @@ void static gather_attributes_for_propagation( AttrDomain domain = meta_data.domain; if (dst_component_type != bke::GeometryComponent::Type::Instance && - domain == AttrDomain::Instance) { + domain == AttrDomain::Instance) + { domain = AttrDomain::Point; } @@ -983,7 +983,7 @@ static void execute_instances_tasks( continue; } - const void *attribute_ptr; + const void *attribute_ptr; if (attribute_fallback_array[attribute_index] != nullptr) { attribute_ptr = attribute_fallback_array[attribute_index]; } @@ -1014,7 +1014,7 @@ static void execute_instances_tasks( } join_attributes( - for_join_attributes, dst_component, {"position", ".reference_index", "instance_transform"}); + for_join_attributes, {"position", ".reference_index", "instance_transform"}, dst_component); } /** \} */ @@ -1990,9 +1990,7 @@ bke::GeometrySet realize_instances(bke::GeometrySet geometry_set, VariedDepthOption all_instances; all_instances.depths = VArray::ForSingle(VariedDepthOption::MAX_DEPTH, geometry_set.get_instances()->instances_num()); - IndexMaskMemory memory; - all_instances.selection = IndexMask::from_bools( - VArray::ForSingle(true, geometry_set.get_instances()->instances_num()), memory); + all_instances.selection = IndexMask(geometry_set.get_instances()->instances_num()); return realize_instances(geometry_set, options, all_instances); } @@ -2049,12 +2047,8 @@ bke::GeometrySet realize_instances(bke::GeometrySet geometry_set, const float4x4 transform = float4x4::identity(); InstanceContext attribute_fallbacks(gather_info); - gather_realize_tasks_recursive(gather_info, - 0, - VariedDepthOption::MAX_DEPTH, - geometry_set, - transform, - attribute_fallbacks); + gather_realize_tasks_recursive( + gather_info, 0, VariedDepthOption::MAX_DEPTH, geometry_set, transform, attribute_fallbacks); bke::GeometrySet new_geometry_set; execute_instances_tasks(gather_info.instances.instances_components_to_merge, @@ -2062,28 +2056,28 @@ bke::GeometrySet realize_instances(bke::GeometrySet geometry_set, all_instance_attributes, gather_info.instances.attribute_fallback, new_geometry_set); - + const int64_t total_points_num = get_final_points_num(gather_info.r_tasks); /* This doesn't have to be exact at all, it's just a rough estimate ot make decisions about * multi-threading (overhead). */ const int64_t approximate_used_bytes_num = total_points_num * 32; threading::memory_bandwidth_bound_task(approximate_used_bytes_num, [&]() { - execute_realize_pointcloud_tasks(options.keep_original_ids, - all_pointclouds_info, - gather_info.r_tasks.pointcloud_tasks, - all_pointclouds_info.attributes, - new_geometry_set); - execute_realize_mesh_tasks(options.keep_original_ids, - all_meshes_info, - gather_info.r_tasks.mesh_tasks, - all_meshes_info.attributes, - all_meshes_info.materials, - new_geometry_set); - execute_realize_curve_tasks(options.keep_original_ids, - all_curves_info, - gather_info.r_tasks.curve_tasks, - all_curves_info.attributes, - new_geometry_set); + execute_realize_pointcloud_tasks(options.keep_original_ids, + all_pointclouds_info, + gather_info.r_tasks.pointcloud_tasks, + all_pointclouds_info.attributes, + new_geometry_set); + execute_realize_mesh_tasks(options.keep_original_ids, + all_meshes_info, + gather_info.r_tasks.mesh_tasks, + all_meshes_info.attributes, + all_meshes_info.materials, + new_geometry_set); + execute_realize_curve_tasks(options.keep_original_ids, + all_curves_info, + gather_info.r_tasks.curve_tasks, + all_curves_info.attributes, + new_geometry_set); }); if (gather_info.r_tasks.first_volume) { new_geometry_set.add(*gather_info.r_tasks.first_volume); diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm index ca73c206e38..f9d46627ada 100644 --- a/source/blender/gpu/metal/mtl_texture.mm +++ b/source/blender/gpu/metal/mtl_texture.mm @@ -527,6 +527,8 @@ void gpu::MTLTexture::update_sub( } } + const bool is_compressed = (format_flag_ & GPU_FORMAT_COMPRESSED); + @autoreleasepool { /* Determine totalsize of INPUT Data. */ int num_channels = to_component_len(format_); @@ -593,10 +595,12 @@ void gpu::MTLTexture::update_sub( false /* Not a clear. */ }; - /* Determine whether we can do direct BLIT or not. */ + /* Determine whether we can do direct BLIT or not. For compressed textures, + * always assume a direct blit (input data pretends to be float, but it is + * not). */ bool can_use_direct_blit = true; - if (expected_dst_bytes_per_pixel != input_bytes_per_pixel || - num_channels != destination_num_channels) + if (!is_compressed && (expected_dst_bytes_per_pixel != input_bytes_per_pixel || + num_channels != destination_num_channels)) { can_use_direct_blit = false; } @@ -620,7 +624,7 @@ void gpu::MTLTexture::update_sub( /* Safety Checks. */ if (type == GPU_DATA_UINT_24_8 || type == GPU_DATA_10_11_11_REV || - type == GPU_DATA_2_10_10_10_REV) + type == GPU_DATA_2_10_10_10_REV || is_compressed) { BLI_assert(can_use_direct_blit && "Special input data type must be a 1-1 mapping with destination texture as it " @@ -755,6 +759,12 @@ void gpu::MTLTexture::update_sub( extent[0] : ctx->pipeline_state.unpack_row_length); size_t bytes_per_image = bytes_per_row; + if (is_compressed) { + size_t block_size = to_block_size(format_); + size_t blocks_x = divide_ceil_u(extent[0], 4); + bytes_per_row = blocks_x * block_size; + bytes_per_image = bytes_per_row; + } int max_array_index = ((type_ == GPU_TEXTURE_1D_ARRAY) ? extent[1] : 1); for (int array_index = 0; array_index < max_array_index; array_index++) { @@ -827,6 +837,13 @@ void gpu::MTLTexture::update_sub( extent[0] : ctx->pipeline_state.unpack_row_length); size_t bytes_per_image = bytes_per_row * extent[1]; + if (is_compressed) { + size_t block_size = to_block_size(format_); + size_t blocks_x = divide_ceil_u(extent[0], 4); + size_t blocks_y = divide_ceil_u(extent[1], 4); + bytes_per_row = blocks_x * block_size; + bytes_per_image = bytes_per_row * blocks_y; + } size_t texture_array_relative_offset = 0; int base_slice = (type_ == GPU_TEXTURE_2D_ARRAY) ? offset[2] : 0; @@ -1218,6 +1235,12 @@ void gpu::MTLTexture::ensure_mipmaps(int miplvl) void gpu::MTLTexture::generate_mipmap() { + /* Compressed textures allow users to provide their own custom mipmaps. And + * we can't generate them at runtime anyway. */ + if (format_flag_ & GPU_FORMAT_COMPRESSED) { + return; + } + /* Fetch Active Context. */ MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); BLI_assert(ctx); diff --git a/source/blender/gpu/metal/mtl_texture_util.mm b/source/blender/gpu/metal/mtl_texture_util.mm index 86773f048b7..8b29a582833 100644 --- a/source/blender/gpu/metal/mtl_texture_util.mm +++ b/source/blender/gpu/metal/mtl_texture_util.mm @@ -160,13 +160,17 @@ MTLPixelFormat gpu_texture_format_to_metal(eGPUTextureFormat tex_format) return MTLPixelFormatR8Snorm; /* Special formats, texture only. */ case GPU_SRGB8_A8_DXT1: + return MTLPixelFormatBC1_RGBA_sRGB; case GPU_SRGB8_A8_DXT3: + return MTLPixelFormatBC2_RGBA_sRGB; case GPU_SRGB8_A8_DXT5: + return MTLPixelFormatBC3_RGBA_sRGB; case GPU_RGBA8_DXT1: + return MTLPixelFormatBC1_RGBA; case GPU_RGBA8_DXT3: + return MTLPixelFormatBC2_RGBA; case GPU_RGBA8_DXT5: - BLI_assert_msg(false, "Compressed texture not implemented yet!\n"); - return MTLPixelFormatRGBA8Unorm; + return MTLPixelFormatBC3_RGBA; case GPU_SRGB8: /* 24-Bit pixel format are not supported. Emulate using a padded type with alpha. */ return MTLPixelFormatRGBA8Unorm_sRGB; @@ -247,6 +251,14 @@ size_t get_mtl_format_bytesize(MTLPixelFormat tex_format) return 4; case MTLPixelFormatDepth16Unorm: return 2; + case MTLPixelFormatBC1_RGBA: + case MTLPixelFormatBC1_RGBA_sRGB: + return 1; /* Note: not quite correct (BC1 is 0.5 bpp). */ + case MTLPixelFormatBC2_RGBA: + case MTLPixelFormatBC2_RGBA_sRGB: + case MTLPixelFormatBC3_RGBA: + case MTLPixelFormatBC3_RGBA_sRGB: + return 1; default: BLI_assert_msg(false, "Unrecognised GPU pixel format!\n"); @@ -272,6 +284,12 @@ int get_mtl_format_num_components(MTLPixelFormat tex_format) case MTLPixelFormatRGBA8Unorm_sRGB: case MTLPixelFormatRGB10A2Uint: case MTLPixelFormatRGB10A2Unorm: + case MTLPixelFormatBC1_RGBA_sRGB: + case MTLPixelFormatBC2_RGBA_sRGB: + case MTLPixelFormatBC3_RGBA_sRGB: + case MTLPixelFormatBC1_RGBA: + case MTLPixelFormatBC2_RGBA: + case MTLPixelFormatBC3_RGBA: return 4; case MTLPixelFormatRG11B10Float: diff --git a/source/blender/imbuf/IMB_imbuf.hh b/source/blender/imbuf/IMB_imbuf.hh index 16377b37edd..a945811b406 100644 --- a/source/blender/imbuf/IMB_imbuf.hh +++ b/source/blender/imbuf/IMB_imbuf.hh @@ -118,6 +118,21 @@ ImBuf *IMB_allocFromBuffer(const uint8_t *byte_buffer, void IMB_assign_byte_buffer(ImBuf *ibuf, uint8_t *buffer_data, ImBufOwnership ownership); void IMB_assign_float_buffer(ImBuf *ibuf, float *buffer_data, ImBufOwnership ownership); +/** + * Assign the content and the color space of the corresponding buffer the data from the given + * buffer. + * + * \note Does not modify the topology (width, height, number of channels) + * or the mipmaps in any way. + * + * \note The ownership of the data in the source buffer is ignored. + */ +void IMB_assign_byte_buffer(ImBuf *ibuf, const ImBufByteBuffer &buffer, ImBufOwnership ownership); +void IMB_assign_float_buffer(ImBuf *ibuf, + const ImBufFloatBuffer &buffer, + ImBufOwnership ownership); +void IMB_assign_dds_data(ImBuf *ibuf, const DDSData &data, ImBufOwnership ownership); + /** * Make corresponding buffers available for modification. * Is achieved by ensuring that the given ImBuf is the only owner of the underlying buffer data. diff --git a/source/blender/imbuf/IMB_imbuf_types.hh b/source/blender/imbuf/IMB_imbuf_types.hh index e40f109c8a0..629bf94fb1e 100644 --- a/source/blender/imbuf/IMB_imbuf_types.hh +++ b/source/blender/imbuf/IMB_imbuf_types.hh @@ -34,17 +34,6 @@ struct IDProperty; #define IMB_MIPMAP_LEVELS 20 #define IMB_FILEPATH_SIZE 1024 -struct DDSData { - /** DDS fourcc info */ - unsigned int fourcc; - /** The number of mipmaps in the dds file */ - unsigned int nummipmaps; - /** The compressed image data */ - unsigned char *data; - /** The size of the compressed data */ - unsigned int size; -}; - /** * \ingroup imbuf * This is the abstraction of an image. ImBuf is the basic type used for all imbuf operations. @@ -143,6 +132,19 @@ enum ImBufOwnership { IB_TAKE_OWNERSHIP = 1, }; +struct DDSData { + /** DDS fourcc info */ + unsigned int fourcc; + /** The number of mipmaps in the dds file */ + unsigned int nummipmaps; + /** The compressed image data */ + unsigned char *data; + /** The size of the compressed data */ + unsigned int size; + /** Who owns the data buffer. */ + ImBufOwnership ownership; +}; + /* Different storage specialization. * * NOTE: Avoid direct assignments and allocations, use the buffer utilities from the IMB_imbuf.hh diff --git a/source/blender/imbuf/intern/allocimbuf.cc b/source/blender/imbuf/intern/allocimbuf.cc index 5cfea171dea..40b2e3aac97 100644 --- a/source/blender/imbuf/intern/allocimbuf.cc +++ b/source/blender/imbuf/intern/allocimbuf.cc @@ -84,6 +84,27 @@ template static void imb_free_buffer(BufferType &buffer) buffer.ownership = IB_DO_NOT_TAKE_OWNERSHIP; } +/* Free the specified DDS buffer storage, freeing memory when needed and restoring the state of the + * buffer to its defaults. */ +static void imb_free_dds_buffer(DDSData &dds_data) +{ + if (dds_data.data) { + switch (dds_data.ownership) { + case IB_DO_NOT_TAKE_OWNERSHIP: + break; + + case IB_TAKE_OWNERSHIP: + /* dds_data.data is allocated by DirectDrawSurface::readData(), so don't use MEM_freeN! */ + free(dds_data.data); + break; + } + } + + /* Reset buffer to defaults. */ + dds_data.data = nullptr; + dds_data.ownership = IB_DO_NOT_TAKE_OWNERSHIP; +} + /* Allocate pixel storage of the given buffer. The buffer owns the allocated memory. * Returns true of allocation succeeded, false otherwise. */ template @@ -249,11 +270,7 @@ void IMB_freeImBuf(ImBuf *ibuf) IMB_free_gpu_textures(ibuf); IMB_metadata_free(ibuf->metadata); colormanage_cache_free(ibuf); - - if (ibuf->dds_data.data != nullptr) { - /* dds_data.data is allocated by DirectDrawSurface::readData(), so don't use MEM_freeN! */ - free(ibuf->dds_data.data); - } + imb_free_dds_buffer(ibuf->dds_data); MEM_freeN(ibuf); } } @@ -472,6 +489,32 @@ void IMB_assign_float_buffer(ImBuf *ibuf, float *buffer_data, const ImBufOwnersh } } +void IMB_assign_byte_buffer(ImBuf *ibuf, + const ImBufByteBuffer &buffer, + const ImBufOwnership ownership) +{ + IMB_assign_byte_buffer(ibuf, buffer.data, ownership); + ibuf->byte_buffer.colorspace = buffer.colorspace; +} + +void IMB_assign_float_buffer(ImBuf *ibuf, + const ImBufFloatBuffer &buffer, + const ImBufOwnership ownership) +{ + IMB_assign_float_buffer(ibuf, buffer.data, ownership); + ibuf->float_buffer.colorspace = buffer.colorspace; +} + +void IMB_assign_dds_data(ImBuf *ibuf, const DDSData &data, const ImBufOwnership ownership) +{ + BLI_assert(ibuf->ftype == IMB_FTYPE_DDS); + + imb_free_dds_buffer(ibuf->dds_data); + + ibuf->dds_data = data; + ibuf->dds_data.ownership = ownership; +} + ImBuf *IMB_allocFromBufferOwn( uint8_t *byte_buffer, float *float_buffer, uint w, uint h, uint channels) { diff --git a/source/blender/imbuf/intern/format_dds.cc b/source/blender/imbuf/intern/format_dds.cc index b5b26a10ca0..0b6a88dbcb6 100644 --- a/source/blender/imbuf/intern/format_dds.cc +++ b/source/blender/imbuf/intern/format_dds.cc @@ -330,6 +330,7 @@ static void LoadDXTCImage(ImBuf *ibuf, Filesystem::IOMemReader &mem_reader) ibuf->dds_data.size = mem_reader.size() - dds_header_size; ibuf->dds_data.data = (uchar *)malloc(ibuf->dds_data.size); mem_reader.pread(ibuf->dds_data.data, ibuf->dds_data.size, dds_header_size); + ibuf->dds_data.ownership = IB_TAKE_OWNERSHIP; /* Flip compressed image data to match OpenGL convention. */ FlipDXTCImage(ibuf); diff --git a/source/blender/makesrna/intern/rna_modifier.cc b/source/blender/makesrna/intern/rna_modifier.cc index 862ede4a6d4..59cd6da07eb 100644 --- a/source/blender/makesrna/intern/rna_modifier.cc +++ b/source/blender/makesrna/intern/rna_modifier.cc @@ -7703,6 +7703,8 @@ static void rna_def_modifier_nodes_bake(BlenderRNA *brna) StructRNA *srna; PropertyRNA *prop; + RNA_define_lib_overridable(true); + srna = RNA_def_struct(brna, "NodesModifierBake", nullptr); RNA_def_struct_ui_text(srna, "Nodes Modifier Bake", ""); @@ -7758,6 +7760,8 @@ static void rna_def_modifier_nodes_bake(BlenderRNA *brna) RNA_def_property_struct_type(prop, "NodesModifierDataBlock"); RNA_def_property_collection_sdna(prop, nullptr, "data_blocks", "data_blocks_num"); RNA_def_property_srna(prop, "NodesModifierBakeDataBlocks"); + + RNA_define_lib_overridable(false); } static void rna_def_modifier_nodes_bakes(BlenderRNA *brna) diff --git a/source/blender/makesrna/intern/rna_scene.cc b/source/blender/makesrna/intern/rna_scene.cc index 1cece2ee975..a4bbd1de0ff 100644 --- a/source/blender/makesrna/intern/rna_scene.cc +++ b/source/blender/makesrna/intern/rna_scene.cc @@ -3574,7 +3574,7 @@ static void rna_def_tool_settings(BlenderRNA *brna) prop, "rna_ToolSettings_snap_mode_get", "rna_ToolSettings_snap_mode_set", nullptr); RNA_def_property_flag(prop, PROP_ENUM_FLAG); RNA_def_property_ui_text( - prop, "Snap Element", "Type of element for the \"Snap With\" to snap to"); + prop, "Snap Element", "Type of element for the \"Snap Base\" to snap to"); RNA_def_property_update(prop, NC_SCENE | ND_TOOLSETTINGS, nullptr); /* header redraw */ prop = RNA_def_property(srna, "snap_elements_individual", PROP_ENUM, PROP_NONE); diff --git a/source/blender/nodes/geometry/nodes/node_geo_realize_instances.cc b/source/blender/nodes/geometry/nodes/node_geo_realize_instances.cc index 2fd19b7b9df..3f9f64e6a47 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_realize_instances.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_realize_instances.cc @@ -18,13 +18,13 @@ static void node_declare(NodeDeclarationBuilder &b) b.add_input("Selection") .default_value(true) .hide_value() - .supports_field() + .field_on_all() .description("Which top-level instances to realize"); b.add_input("Realize All") .default_value(true) - .supports_field() + .field_on_all() .description("Determine wether to realize nested instances completly"); - b.add_input("Depth").default_value(0).min(0).supports_field().description( + b.add_input("Depth").default_value(0).min(0).field_on_all().description( "Number of levels of nested instances to realize for each top-level instance"); b.add_output("Geometry").propagate_all(); } @@ -38,24 +38,29 @@ static void node_geo_exec(GeoNodeExecParams params) } GeometryComponentEditData::remember_deformed_positions_if_necessary(geometry_set); - Field selection_field = params.extract_input>("Selection"); - Field realize_all_filed = params.extract_input>("Realize All"); + + Field realize_all_field = params.extract_input>("Realize All"); Field depth_field = params.extract_input>("Depth"); static auto depth_override = mf::build::SI2_SO( "depth_override", - [](int value, bool realize) { return realize ? -1 : std::max(value, 0); }, + [](int depth, bool realize_all_field) { + return realize_all_field ? geometry::VariedDepthOption::MAX_DEPTH : std::max(depth, 0); + }, mf::build::exec_presets::AllSpanOrSingle()); + Field depth_field_overrided(FieldOperation::Create( + depth_override, {std::move(depth_field), std::move(realize_all_field)})); + + Field selection_field = params.extract_input>("Selection"); + static auto selection_override = mf::build::SI2_SO( "selection_override", - [](int value, bool selection) { return value == 0 ? false : selection; }, + [](int depth_override, bool selection) { return depth_override == 0 ? false : selection; }, mf::build::exec_presets::AllSpanOrSingle()); - Field depth_field_overrided( - FieldOperation::Create(depth_override, {depth_field, realize_all_filed})); - Field selection_field_overrided( - FieldOperation::Create(selection_override, {depth_field_overrided, selection_field})); + Field selection_field_overrided(FieldOperation::Create( + selection_override, {depth_field_overrided, std::move(selection_field)})); const bke::Instances &instances = *geometry_set.get_instances(); const bke::InstancesFieldContext field_context(instances); diff --git a/source/blender/nodes/geometry/nodes/node_geo_set_curve_normal.cc b/source/blender/nodes/geometry/nodes/node_geo_set_curve_normal.cc index d97c8561b69..ded077ee86f 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_set_curve_normal.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_set_curve_normal.cc @@ -21,7 +21,10 @@ static void node_declare(NodeDeclarationBuilder &b) b.add_input("Curve").supported_type( {GeometryComponent::Type::Curve, GeometryComponent::Type::GreasePencil}); b.add_input("Selection").default_value(true).hide_value().field_on_all(); - b.add_input("Normal").default_value({0.0f, 0.0f, 1.0f}).field_on_all(); + b.add_input("Normal") + .default_value({0.0f, 0.0f, 1.0f}) + .subtype(PROP_XYZ) + .field_on_all(); b.add_output("Curve").propagate_all(); } diff --git a/source/blender/python/gpu/gpu_py_vertex_buffer.cc b/source/blender/python/gpu/gpu_py_vertex_buffer.cc index 310e4b5aa21..0602e5ecf98 100644 --- a/source/blender/python/gpu/gpu_py_vertex_buffer.cc +++ b/source/blender/python/gpu/gpu_py_vertex_buffer.cc @@ -241,7 +241,7 @@ static PyObject *pygpu_vertbuf__tp_new(PyTypeObject * /*type*/, PyObject *args, PY_ARG_PARSER_HEAD_COMPAT() "O!" /* `format` */ "I" /* `len` */ - ":blender::gpu::VertBuf.__new__", + ":GPUVertBuf.__new__", _keywords, nullptr, }; @@ -307,7 +307,7 @@ static PyObject *pygpu_vertbuf_attr_fill(BPyGPUVertBuf *self, PyObject *args, Py return nullptr; } - if (!pygpu_vertbuf_fill(self->buf, id, data, "blender::gpu::VertBuf.attr_fill")) { + if (!pygpu_vertbuf_fill(self->buf, id, data, "GPUVertBuf.attr_fill")) { return nullptr; } @@ -340,7 +340,7 @@ static void pygpu_vertbuf__tp_dealloc(BPyGPUVertBuf *self) PyDoc_STRVAR( /* Wrap. */ pygpu_vertbuf__tp_doc, - ".. class:: blender::gpu::VertBuf(format, len)\n" + ".. class:: GPUVertBuf(format, len)\n" "\n" " Contains a VBO.\n" "\n" @@ -350,7 +350,7 @@ PyDoc_STRVAR( " :type len: int\n"); PyTypeObject BPyGPUVertBuf_Type = { /*ob_base*/ PyVarObject_HEAD_INIT(nullptr, 0) - /*tp_name*/ "blender::gpu::VertBuf", + /*tp_name*/ "GPUVertBuf", /*tp_basicsize*/ sizeof(BPyGPUVertBuf), /*tp_itemsize*/ 0, /*tp_dealloc*/ (destructor)pygpu_vertbuf__tp_dealloc, diff --git a/source/blender/render/intern/engine.cc b/source/blender/render/intern/engine.cc index 5c68fc0ee98..d079fc1b70a 100644 --- a/source/blender/render/intern/engine.cc +++ b/source/blender/render/intern/engine.cc @@ -1314,8 +1314,8 @@ bool RE_engine_gpu_context_enable(RenderEngine *engine) /* Activate RenderEngine System and Blender GPU Context. */ WM_system_gpu_context_activate(engine->system_gpu_context); if (engine->blender_gpu_context) { - GPU_context_active_set(engine->blender_gpu_context); GPU_render_begin(); + GPU_context_active_set(engine->blender_gpu_context); } return true; } @@ -1330,8 +1330,8 @@ void RE_engine_gpu_context_disable(RenderEngine *engine) else { if (engine->system_gpu_context) { if (engine->blender_gpu_context) { - GPU_render_end(); GPU_context_active_set(nullptr); + GPU_render_end(); } WM_system_gpu_context_release(engine->system_gpu_context); /* Restore DRW state context if previously active. */ diff --git a/tests/data b/tests/data index 5038ad7165f..bf5c7083054 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 5038ad7165fd1a77e61e0d2d6efdadd6ea7c0dfb +Subproject commit bf5c70830540b215a3b1df21f28e0e80ead230f7