2023-11-23 12:35:38 +01:00
198 changed files with 5601 additions and 2245 deletions
--- a/2
+++ b/2
@ -599,7 +599,7 @@ doc_dna: .FORCE
 	@echo "docs written into: '$(BLENDER_DIR)/doc/blender_file_format/dna.html'"

 doc_man: .FORCE
-	@$(PYTHON) doc/manpage/blender.1.py --blender="$(BLENDER_BIN)" --output=blender.1 --verbose
+	@$(BLENDER_BIN) --background --python doc/manpage/blender.1.py -- --output=blender.1 --verbose

 help_features: .FORCE
 	@$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_print_build_options.py" $(BLENDER_DIR)"/CMakeLists.txt"
--- a/build_files/build_environment/install_linux_packages.py
+++ b/build_files/build_environment/install_linux_packages.py
@ -303,7 +303,7 @@ DEPS_MANDATORY_SUBPACKAGES = (
                                  },
            ),
    Package(name="FreeType Library",
-            distro_package_names={DISTRO_ID_DEBIAN: "libfreetype6-dev",
+            distro_package_names={DISTRO_ID_DEBIAN: "libfreetype-dev",
                                  DISTRO_ID_FEDORA: "freetype-devel",
                                  DISTRO_ID_SUSE: "freetype2-devel",
                                  DISTRO_ID_ARCH: "freetype2",
@ -505,6 +505,13 @@ DEPS_OPTIONAL_SUBPACKAGES = (
                                  DISTRO_ID_ARCH: ...,
                                  },
            ),
+    Package(name="Deflate Library",
+            distro_package_names={DISTRO_ID_DEBIAN: "libdeflate-dev",
+                                  DISTRO_ID_FEDORA: "libdeflate-devel",
+                                  DISTRO_ID_SUSE: "libdeflate-devel",
+                                  DISTRO_ID_ARCH: "libdeflate",
+                                  },
+            ),
 )


@ -548,7 +555,7 @@ PYTHON_SUBPACKAGES = (
                                  DISTRO_ID_ARCH: "python-urllib3",
                                  },
            ),
-    Package(name="Certifi", version="2021.10.08", version_short="2021.10", version_min="2021.0", version_mex="2023.0",
+    Package(name="Certifi", version="2021.10.08", version_short="2021.10", version_min="2021.0", version_mex="2025.0",
            distro_package_names={DISTRO_ID_DEBIAN: "python3-certifi",
                                  DISTRO_ID_FEDORA: "python3-certifi",
                                  DISTRO_ID_SUSE: suse_pypackages_name_gen("certifi"),
@ -569,14 +576,14 @@ PYTHON_SUBPACKAGES = (
                                  DISTRO_ID_ARCH: "python-zstandard",
                                  },
            ),
-    Package(name="NumPy", version="1.23.5", version_short="1.23", version_min="1.14", version_mex="2.0",
+    Package(name="NumPy", version="1.24.3", version_short="1.24", version_min="1.14", version_mex="2.0",
            distro_package_names={DISTRO_ID_DEBIAN: "python3-numpy",
                                  DISTRO_ID_FEDORA: "python3-numpy",
                                  DISTRO_ID_SUSE: suse_pypackages_name_gen("numpy"),
                                  DISTRO_ID_ARCH: "python-numpy",
                                  },
            ),
-    Package(name="NumPy Devel", version="1.23.5", version_short="1.23", version_min="1.14", version_mex="2.0",
+    Package(name="NumPy Devel", version="1.24.3", version_short="1.24", version_min="1.14", version_mex="2.0",
            distro_package_names={DISTRO_ID_DEBIAN: ...,
                                  DISTRO_ID_FEDORA: ...,
                                  DISTRO_ID_SUSE: suse_pypackages_name_gen("numpy-devel"),
@ -706,7 +713,7 @@ PACKAGES_ALL = (
                                  DISTRO_ID_ARCH: "clang",  # clang-format is part of the main clang package.
                                  },
            ),
-    Package(name="Python", is_mandatory=True, version="3.10.12", version_short="3.10", version_min="3.10", version_mex="3.12",
+    Package(name="Python", is_mandatory=True, version="3.11.6", version_short="3.11", version_min="3.11", version_mex="3.13",
            sub_packages=PYTHON_SUBPACKAGES,
            distro_package_names={DISTRO_ID_DEBIAN: "python3-dev",
                                  DISTRO_ID_FEDORA: "python3-devel",
@ -714,7 +721,7 @@ PACKAGES_ALL = (
                                  DISTRO_ID_ARCH: "python",
                                  },
            ),
-    Package(name="Boost Libraries", is_mandatory=True, version="1.80.0", version_short="1.80", version_min="1.49", version_mex="2.0",
+    Package(name="Boost Libraries", is_mandatory=True, version="1.82.0", version_short="1.82", version_min="1.49", version_mex="2.0",
            sub_packages=BOOST_SUBPACKAGES,
            distro_package_names={DISTRO_ID_DEBIAN: "libboost-dev",
                                  DISTRO_ID_FEDORA: "boost-devel",
@ -730,7 +737,7 @@ PACKAGES_ALL = (
                                  DISTRO_ID_ARCH: "intel-oneapi-tbb",
                                  },
            ),
-    Package(name="OpenColorIO Library", is_mandatory=False, version="2.2.0", version_short="2.2", version_min="2.0", version_mex="3.0",
+    Package(name="OpenColorIO Library", is_mandatory=False, version="2.3.0", version_short="2.3", version_min="2.0", version_mex="3.0",
            sub_packages=(),
            distro_package_names={DISTRO_ID_DEBIAN: "libopencolorio-dev",
                                  DISTRO_ID_FEDORA: "OpenColorIO-devel",
@ -738,7 +745,7 @@ PACKAGES_ALL = (
                                  DISTRO_ID_ARCH: "opencolorio",
                                  },
            ),
-    Package(name="IMath Library", is_mandatory=False, version="3.1.7", version_short="3.1", version_min="3.0", version_mex="4.0",
+    Package(name="IMath Library", is_mandatory=False, version="3.2.1", version_short="3.2", version_min="3.0", version_mex="4.0",
            sub_packages=(),
            distro_package_names={DISTRO_ID_DEBIAN: "libimath-dev",
                                  DISTRO_ID_FEDORA: "imath-devel",
@ -746,7 +753,7 @@ PACKAGES_ALL = (
                                  DISTRO_ID_ARCH: "imath",
                                  },
            ),
-    Package(name="OpenEXR Library", is_mandatory=False, version="3.1.7", version_short="3.1", version_min="3.0", version_mex="4.0",
+    Package(name="OpenEXR Library", is_mandatory=False, version="3.2.1", version_short="3.2", version_min="3.0", version_mex="4.0",
            sub_packages=(),
            distro_package_names={DISTRO_ID_DEBIAN: "libopenexr-dev",
                                  DISTRO_ID_FEDORA: "openexr-devel",
@ -801,7 +808,7 @@ PACKAGES_ALL = (
                                  DISTRO_ID_ARCH: "openshadinglanguage",
                                  },
            ),
-    Package(name="OpenSubDiv Library", is_mandatory=False, version="3.5.0", version_short="3.5", version_min="3.5", version_mex="4.0",
+    Package(name="OpenSubDiv Library", is_mandatory=False, version="3.6.0", version_short="3.6", version_min="3.5", version_mex="4.0",
            sub_packages=(),
            distro_package_names={DISTRO_ID_DEBIAN: "libosd-dev",
                                  DISTRO_ID_FEDORA: "opensubdiv-devel",
@ -809,7 +816,7 @@ PACKAGES_ALL = (
                                  DISTRO_ID_ARCH: "opensubdiv",
                                  },
            ),
-    Package(name="OpenVDB Library", is_mandatory=False, version="10.0.0", version_short="10.0", version_min="10.0", version_mex="11.0",
+    Package(name="OpenVDB Library", is_mandatory=False, version="11.0.0", version_short="11.0", version_min="10.0", version_mex="12.0",
            sub_packages=(
                # Assume packaged versions of the dependencies are compatible with OpenVDB package.
                Package(name="OpenVDB Dependencies", is_mandatory=False, is_group=True,
@ -845,7 +852,7 @@ PACKAGES_ALL = (
                                  DISTRO_ID_ARCH: "alembic",
                                  },
            ),
-    Package(name="MaterialX Library", is_mandatory=False, version="1.38.6", version_short="1.38", version_min="1.38", version_mex="1.40",
+    Package(name="MaterialX Library", is_mandatory=False, version="1.38.8", version_short="1.38", version_min="1.38", version_mex="1.40",
            sub_packages=(),
            distro_package_names={DISTRO_ID_DEBIAN: None,
                                  DISTRO_ID_FEDORA: None,
@ -876,7 +883,7 @@ PACKAGES_ALL = (
                                  DISTRO_ID_ARCH: "embree",
                                  },
            ),
-    Package(name="OpenImageDenoiser Library", is_mandatory=False, version="1.4.3", version_short="1.4", version_min="1.4.0", version_mex="1.5",
+    Package(name="OpenImageDenoiser Library", is_mandatory=False, version="2.1.0", version_short="2.1", version_min="2.0.0", version_mex="3.0",
            sub_packages=(),
            distro_package_names={DISTRO_ID_DEBIAN: None,
                                  DISTRO_ID_FEDORA: "oidn-devel",
--- a/doc/manpage/blender.1.py
+++ b/doc/manpage/blender.1.py
@ -41,7 +41,7 @@ def blender_extract_info() -> Dict[str, str]:
        # Happens when built without WITH_BUILD_INFO e.g.
        blender_date = time.strftime("%B %d, %Y", time.gmtime(int(os.environ.get('SOURCE_DATE_EPOCH', time.time()))))
    else:
-        blender_date = time.strftime("%B %d, %Y", time.strptime(blender_build_date_text, "%Y-%m-%d"))
+        blender_date = time.strftime("%B %d, %Y", time.strptime(blender_build_date_text.decode(), "%Y-%m-%d"))

    return {
        "help": blender_help_text,
--- a/extern/fmtlib/README.blender
+++ b/extern/fmtlib/README.blender
@ -1,7 +1,7 @@
 Project: {fmt}
 URL: https://github.com/fmtlib/fmt
 License: MIT
-Upstream version: 10.0.0 (a0b8a92, 2023 May 10)
+Upstream version: 10.1.1 (f5e5435, 2023 Aug 28)
 Local modifications:

 - Took only files needed for Blender:
--- a/extern/fmtlib/README.rst
+++ b/extern/fmtlib/README.rst
@ -22,6 +22,9 @@
   :alt: Ask questions at StackOverflow with the tag fmt
   :target: https://stackoverflow.com/questions/tagged/fmt

+.. image:: https://api.securityscorecards.dev/projects/github.com/fmtlib/fmt/badge
+   :target: https://securityscorecards.dev/viewer/?uri=github.com/fmtlib/fmt
+
 **{fmt}** is an open-source formatting library providing a fast and safe
 alternative to C stdio and C++ iostreams.

@ -49,6 +52,7 @@ Features
 * Fast IEEE 754 floating-point formatter with correct rounding, shortness and
  round-trip guarantees using the `Dragonbox <https://github.com/jk-jeon/dragonbox>`_
  algorithm
+* Portable Unicode support
 * Safe `printf implementation
  <https://fmt.dev/latest/api.html#printf-formatting>`_ including the POSIX
  extension for positional arguments
@ -65,7 +69,7 @@ Features
  <https://github.com/fmtlib/fmt/tree/master/test>`_ and is `continuously fuzzed
  <https://bugs.chromium.org/p/oss-fuzz/issues/list?colspec=ID%20Type%20
  Component%20Status%20Proj%20Reported%20Owner%20Summary&q=proj%3Dfmt&can=1>`_
-* Safety: the library is fully type safe, errors in format strings can be
+* Safety: the library is fully type-safe, errors in format strings can be
  reported at compile time, automatic memory management prevents buffer overflow
  errors
 * Ease of use: small self-contained code base, no external dependencies,
@ -75,7 +79,7 @@ Features
  consistent output across platforms and support for older compilers
 * Clean warning-free codebase even on high warning levels such as
  ``-Wall -Wextra -pedantic``
-* Locale-independence by default
+* Locale independence by default
 * Optional header-only configuration enabled with the ``FMT_HEADER_ONLY`` macro

 See the `documentation <https://fmt.dev>`_ for more details.
@ -225,7 +229,7 @@ The script `bloat-test.py
 from `format-benchmark <https://github.com/fmtlib/format-benchmark>`_
 tests compile time and code bloat for nontrivial projects.
 It generates 100 translation units and uses ``printf()`` or its alternative
-five times in each to simulate a medium sized project.  The resulting
+five times in each to simulate a medium-sized project.  The resulting
 executable size and compile time (Apple LLVM version 8.1.0 (clang-802.0.42),
 macOS Sierra, best of three) is shown in the following tables.

@ -246,7 +250,7 @@ As you can see, {fmt} has 60% less overhead in terms of resulting binary code
 size compared to iostreams and comes pretty close to ``printf``. Boost Format
 and Folly Format have the largest overheads.

-``printf+string`` is the same as ``printf`` but with extra ``<string>``
+``printf+string`` is the same as ``printf`` but with an extra ``<string>``
 include to measure the overhead of the latter.

 **Non-optimized build**
@ -262,14 +266,14 @@ Boost Format             54.1                  365                303
 Folly Format             79.9                  445                430
 ============= =============== ==================== ==================

-``libc``, ``lib(std)c++`` and ``libfmt`` are all linked as shared libraries to
+``libc``, ``lib(std)c++``, and ``libfmt`` are all linked as shared libraries to
 compare formatting function overhead only. Boost Format is a
 header-only library so it doesn't provide any linkage options.

 Running the tests
 ~~~~~~~~~~~~~~~~~

-Please refer to `Building the library`__ for the instructions on how to build
+Please refer to `Building the library`__ for instructions on how to build
 the library and run the unit tests.

 __ https://fmt.dev/latest/usage.html#building-the-library
@ -294,9 +298,12 @@ or the bloat test::
 Migrating code
 --------------

-`clang-tidy-fmt <https://github.com/mikecrowe/clang-tidy-fmt>`_ provides clang
-tidy checks for converting occurrences of ``printf`` and ``fprintf`` to
-``fmt::print``.
+`clang-tidy <https://clang.llvm.org/extra/clang-tidy/>`_ v17 (not yet
+released) provides the `modernize-use-std-print
+<https://clang.llvm.org/extra/clang-tidy/checks/modernize/use-std-print.html>`_
+check that is capable of converting occurrences of ``printf`` and
+``fprintf`` to ``fmt::print`` if configured to do so. (By default it
+converts to ``std::print``.)

 Projects using this library
 ---------------------------
@ -304,8 +311,6 @@ Projects using this library
 * `0 A.D. <https://play0ad.com/>`_: a free, open-source, cross-platform
  real-time strategy game

-* `2GIS <https://2gis.ru/>`_: free business listings with a city map
-
 * `AMPL/MP <https://github.com/ampl/mp>`_:
  an open-source library for mathematical programming

@ -396,7 +401,7 @@ Projects using this library
  proxy

 * `redpanda <https://vectorized.io/redpanda>`_: a 10x faster Kafka® replacement
-  for mission critical systems written in C++
+  for mission-critical systems written in C++

 * `rpclib <http://rpclib.net/>`_: a modern C++ msgpack-RPC server and client
  library
@ -480,7 +485,7 @@ error handling is awkward.
 Boost Format
 ~~~~~~~~~~~~

-This is a very powerful library which supports both ``printf``-like format
+This is a very powerful library that supports both ``printf``-like format
 strings and positional arguments. Its main drawback is performance. According to
 various benchmarks, it is much slower than other methods considered here. Boost
 Format also has excessive build times and severe code bloat issues (see
@ -489,7 +494,7 @@ Format also has excessive build times and severe code bloat issues (see
 FastFormat
 ~~~~~~~~~~

-This is an interesting library which is fast, safe and has positional arguments.
+This is an interesting library that is fast, safe, and has positional arguments.
 However, it has significant limitations, citing its author:

    Three features that have no hope of being accommodated within the
@ -505,7 +510,7 @@ restrictive for using it in some projects.
 Boost Spirit.Karma
 ~~~~~~~~~~~~~~~~~~

-This is not really a formatting library but I decided to include it here for
+This is not a formatting library but I decided to include it here for
 completeness. As iostreams, it suffers from the problem of mixing verbatim text
 with arguments. The library is pretty fast, but slower on integer formatting
 than ``fmt::format_to`` with format string compilation on Karma's own benchmark,
@ -524,7 +529,7 @@ Documentation License
 The `Format String Syntax <https://fmt.dev/latest/syntax.html>`_
 section in the documentation is based on the one from Python `string module
 documentation <https://docs.python.org/3/library/string.html#module-string>`_.
-For this reason the documentation is distributed under the Python Software
+For this reason, the documentation is distributed under the Python Software
 Foundation license available in `doc/python-license.txt
 <https://raw.github.com/fmtlib/fmt/master/doc/python-license.txt>`_.
 It only applies if you distribute the documentation of {fmt}.
--- a/extern/fmtlib/include/fmt/core.h
+++ b/extern/fmtlib/include/fmt/core.h
@ -13,11 +13,12 @@
 #include <cstring>  // std::strlen
 #include <iterator>
 #include <limits>
+#include <memory>  // std::addressof
 #include <string>
 #include <type_traits>

 // The fmt library version in the form major * 10000 + minor * 100 + patch.
-#define FMT_VERSION 100000
+#define FMT_VERSION 100101

 #if defined(__clang__) && !defined(__ibmxl__)
 #  define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__)
@ -92,7 +93,7 @@
 #ifndef FMT_USE_CONSTEXPR
 #  if (FMT_HAS_FEATURE(cxx_relaxed_constexpr) || FMT_MSC_VERSION >= 1912 || \
       (FMT_GCC_VERSION >= 600 && FMT_CPLUSPLUS >= 201402L)) &&             \
-      !FMT_ICC_VERSION && !defined(__NVCC__)
+      !FMT_ICC_VERSION && (!defined(__NVCC__) || FMT_CPLUSPLUS >= 202002L)
 #    define FMT_USE_CONSTEXPR 1
 #  else
 #    define FMT_USE_CONSTEXPR 0
@ -162,9 +163,6 @@
 #  endif
 #endif

-// An inline std::forward replacement.
-#define FMT_FORWARD(...) static_cast<decltype(__VA_ARGS__)&&>(__VA_ARGS__)
-
 #ifdef _MSC_VER
 #  define FMT_UNCHECKED_ITERATOR(It) \
    using _Unchecked_type = It  // Mark iterator as checked.
@ -181,8 +179,8 @@
    }
 #endif

-#ifndef FMT_MODULE_EXPORT
-#  define FMT_MODULE_EXPORT
+#ifndef FMT_EXPORT
+#  define FMT_EXPORT
 #  define FMT_BEGIN_EXPORT
 #  define FMT_END_EXPORT
 #endif
@ -244,12 +242,6 @@
 #  endif
 #endif

-#if defined __cpp_inline_variables && __cpp_inline_variables >= 201606L
-#  define FMT_INLINE_VARIABLE inline
-#else
-#  define FMT_INLINE_VARIABLE
-#endif
-
 // Enable minimal optimizations for more compact code in debug mode.
 FMT_GCC_PRAGMA("GCC push_options")
 #if !defined(__OPTIMIZE__) && !defined(__NVCOMPILER) && !defined(__LCC__) && \
@ -276,6 +268,11 @@ template <typename T> using type_identity_t = typename type_identity<T>::type;
 template <typename T>
 using underlying_t = typename std::underlying_type<T>::type;

+// Checks whether T is a container with contiguous storage.
+template <typename T> struct is_contiguous : std::false_type {};
+template <typename Char>
+struct is_contiguous<std::basic_string<Char>> : std::true_type {};
+
 struct monostate {
  constexpr monostate() {}
 };
@ -289,8 +286,11 @@ struct monostate {
 #  define FMT_ENABLE_IF(...) fmt::enable_if_t<(__VA_ARGS__), int> = 0
 #endif

+// This is defined in core.h instead of format.h to avoid injecting in std.
+// It is a template to avoid undesirable implicit conversions to std::byte.
 #ifdef __cpp_lib_byte
-inline auto format_as(std::byte b) -> unsigned char {
+template <typename T, FMT_ENABLE_IF(std::is_same<T, std::byte>::value)>
+inline auto format_as(T b) -> unsigned char {
  return static_cast<unsigned char>(b);
 }
 #endif
@ -394,7 +394,7 @@ FMT_CONSTEXPR inline auto is_utf8() -> bool {
  compiled with a different ``-std`` option than the client code (which is not
  recommended).
 */
-FMT_MODULE_EXPORT
+FMT_EXPORT
 template <typename Char> class basic_string_view {
 private:
  const Char* data_;
@ -497,11 +497,11 @@ template <typename Char> class basic_string_view {
  }
 };

-FMT_MODULE_EXPORT
+FMT_EXPORT
 using string_view = basic_string_view<char>;

 /** Specifies if ``T`` is a character type. Can be specialized by users. */
-FMT_MODULE_EXPORT
+FMT_EXPORT
 template <typename T> struct is_char : std::false_type {};
 template <> struct is_char<char> : std::true_type {};

@ -639,6 +639,9 @@ struct error_handler {
 };
 }  // namespace detail

+/** Throws ``format_error`` with a given message. */
+using detail::throw_format_error;
+
 /** String's character type. */
 template <typename S> using char_t = typename detail::char_t_impl<S>::type;

@ -649,7 +652,7 @@ template <typename S> using char_t = typename detail::char_t_impl<S>::type;
  You can use the ``format_parse_context`` type alias for ``char`` instead.
  \endrst
 */
-FMT_MODULE_EXPORT
+FMT_EXPORT
 template <typename Char> class basic_format_parse_context {
 private:
  basic_string_view<Char> format_str_;
@ -715,7 +718,7 @@ template <typename Char> class basic_format_parse_context {
  FMT_CONSTEXPR void check_dynamic_spec(int arg_id);
 };

-FMT_MODULE_EXPORT
+FMT_EXPORT
 using format_parse_context = basic_format_parse_context<char>;

 namespace detail {
@ -756,72 +759,6 @@ class compile_parse_context : public basic_format_parse_context<Char> {
 #endif
  }
 };
-}  // namespace detail
-
-template <typename Char>
-FMT_CONSTEXPR void basic_format_parse_context<Char>::do_check_arg_id(int id) {
-  // Argument id is only checked at compile-time during parsing because
-  // formatting has its own validation.
-  if (detail::is_constant_evaluated() &&
-      (!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) {
-    using context = detail::compile_parse_context<Char>;
-    if (id >= static_cast<context*>(this)->num_args())
-      detail::throw_format_error("argument not found");
-  }
-}
-
-template <typename Char>
-FMT_CONSTEXPR void basic_format_parse_context<Char>::check_dynamic_spec(
-    int arg_id) {
-  if (detail::is_constant_evaluated() &&
-      (!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) {
-    using context = detail::compile_parse_context<Char>;
-    static_cast<context*>(this)->check_dynamic_spec(arg_id);
-  }
-}
-
-FMT_MODULE_EXPORT template <typename Context> class basic_format_arg;
-FMT_MODULE_EXPORT template <typename Context> class basic_format_args;
-FMT_MODULE_EXPORT template <typename Context> class dynamic_format_arg_store;
-
-// A formatter for objects of type T.
-FMT_MODULE_EXPORT
-template <typename T, typename Char = char, typename Enable = void>
-struct formatter {
-  // A deleted default constructor indicates a disabled formatter.
-  formatter() = delete;
-};
-
-// Specifies if T has an enabled formatter specialization. A type can be
-// formattable even if it doesn't have a formatter e.g. via a conversion.
-template <typename T, typename Context>
-using has_formatter =
-    std::is_constructible<typename Context::template formatter_type<T>>;
-
-// Checks whether T is a container with contiguous storage.
-template <typename T> struct is_contiguous : std::false_type {};
-template <typename Char>
-struct is_contiguous<std::basic_string<Char>> : std::true_type {};
-
-class appender;
-
-namespace detail {
-
-template <typename Context, typename T>
-constexpr auto has_const_formatter_impl(T*)
-    -> decltype(typename Context::template formatter_type<T>().format(
-                    std::declval<const T&>(), std::declval<Context&>()),
-                true) {
-  return true;
-}
-template <typename Context>
-constexpr auto has_const_formatter_impl(...) -> bool {
-  return false;
-}
-template <typename T, typename Context>
-constexpr auto has_const_formatter() -> bool {
-  return has_const_formatter_impl<Context>(static_cast<T*>(nullptr));
-}

 // Extracts a reference to the container from back_insert_iterator.
 template <typename Container>
@ -903,10 +840,8 @@ template <typename T> class buffer {
  /** Returns the capacity of this buffer. */
  constexpr auto capacity() const noexcept -> size_t { return capacity_; }

-  /** Returns a pointer to the buffer data. */
+  /** Returns a pointer to the buffer data (not null-terminated). */
  FMT_CONSTEXPR auto data() noexcept -> T* { return ptr_; }
-
-  /** Returns a pointer to the buffer data. */
  FMT_CONSTEXPR auto data() const noexcept -> const T* { return ptr_; }

  /** Clears this buffer. */
@ -1099,6 +1034,79 @@ template <typename T = char> class counting_buffer final : public buffer<T> {

  auto count() -> size_t { return count_ + this->size(); }
 };
+}  // namespace detail
+
+template <typename Char>
+FMT_CONSTEXPR void basic_format_parse_context<Char>::do_check_arg_id(int id) {
+  // Argument id is only checked at compile-time during parsing because
+  // formatting has its own validation.
+  if (detail::is_constant_evaluated() &&
+      (!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) {
+    using context = detail::compile_parse_context<Char>;
+    if (id >= static_cast<context*>(this)->num_args())
+      detail::throw_format_error("argument not found");
+  }
+}
+
+template <typename Char>
+FMT_CONSTEXPR void basic_format_parse_context<Char>::check_dynamic_spec(
+    int arg_id) {
+  if (detail::is_constant_evaluated() &&
+      (!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) {
+    using context = detail::compile_parse_context<Char>;
+    static_cast<context*>(this)->check_dynamic_spec(arg_id);
+  }
+}
+
+FMT_EXPORT template <typename Context> class basic_format_arg;
+FMT_EXPORT template <typename Context> class basic_format_args;
+FMT_EXPORT template <typename Context> class dynamic_format_arg_store;
+
+// A formatter for objects of type T.
+FMT_EXPORT
+template <typename T, typename Char = char, typename Enable = void>
+struct formatter {
+  // A deleted default constructor indicates a disabled formatter.
+  formatter() = delete;
+};
+
+// Specifies if T has an enabled formatter specialization. A type can be
+// formattable even if it doesn't have a formatter e.g. via a conversion.
+template <typename T, typename Context>
+using has_formatter =
+    std::is_constructible<typename Context::template formatter_type<T>>;
+
+// An output iterator that appends to a buffer.
+// It is used to reduce symbol sizes for the common case.
+class appender : public std::back_insert_iterator<detail::buffer<char>> {
+  using base = std::back_insert_iterator<detail::buffer<char>>;
+
+ public:
+  using std::back_insert_iterator<detail::buffer<char>>::back_insert_iterator;
+  appender(base it) noexcept : base(it) {}
+  FMT_UNCHECKED_ITERATOR(appender);
+
+  auto operator++() noexcept -> appender& { return *this; }
+  auto operator++(int) noexcept -> appender { return *this; }
+};
+
+namespace detail {
+
+template <typename Context, typename T>
+constexpr auto has_const_formatter_impl(T*)
+    -> decltype(typename Context::template formatter_type<T>().format(
+                    std::declval<const T&>(), std::declval<Context&>()),
+                true) {
+  return true;
+}
+template <typename Context>
+constexpr auto has_const_formatter_impl(...) -> bool {
+  return false;
+}
+template <typename T, typename Context>
+constexpr auto has_const_formatter() -> bool {
+  return has_const_formatter_impl<Context>(static_cast<T*>(nullptr));
+}

 template <typename T>
 using buffer_appender = conditional_t<std::is_same<T, char>::value, appender,
@ -1274,9 +1282,9 @@ template <typename Context> class value {
  FMT_INLINE value(const named_arg_info<char_type>* args, size_t size)
      : named_args{args, size} {}

-  template <typename T> FMT_CONSTEXPR FMT_INLINE value(T& val) {
-    using value_type = remove_cvref_t<T>;
-    custom.value = const_cast<value_type*>(&val);
+  template <typename T> FMT_CONSTEXPR20 FMT_INLINE value(T& val) {
+    using value_type = remove_const_t<T>;
+    custom.value = const_cast<value_type*>(std::addressof(val));
    // Get the formatter type through the context to allow different contexts
    // have different extension points, e.g. `formatter<T>` for `format` and
    // `printf_formatter<T>` for `printf`.
@ -1301,9 +1309,6 @@ template <typename Context> class value {
  }
 };

-template <typename Context, typename T>
-FMT_CONSTEXPR auto make_arg(T&& value) -> basic_format_arg<Context>;
-
 // To minimize the number of types we need to deal with, long is translated
 // either to int or to long long depending on its size.
 enum { long_short = sizeof(long) == sizeof(int) };
@ -1415,9 +1420,8 @@ template <typename Context> struct arg_mapper {
      FMT_ENABLE_IF(
          std::is_pointer<T>::value || std::is_member_pointer<T>::value ||
          std::is_function<typename std::remove_pointer<T>::type>::value ||
-          (std::is_convertible<const T&, const void*>::value &&
-           !std::is_convertible<const T&, const char_type*>::value &&
-           !has_formatter<T, Context>::value))>
+          (std::is_array<T>::value &&
+           !std::is_convertible<T, const char_type*>::value))>
  FMT_CONSTEXPR auto map(const T&) -> unformattable_pointer {
    return {};
  }
@ -1435,30 +1439,28 @@ template <typename Context> struct arg_mapper {
    return map(format_as(val));
  }

-  template <typename T, typename U = remove_cvref_t<T>>
-  struct formattable
-      : bool_constant<has_const_formatter<U, Context>() ||
-                      (has_formatter<U, Context>::value &&
-                       !std::is_const<remove_reference_t<T>>::value)> {};
+  template <typename T, typename U = remove_const_t<T>>
+  struct formattable : bool_constant<has_const_formatter<U, Context>() ||
+                                     (has_formatter<U, Context>::value &&
+                                      !std::is_const<T>::value)> {};

  template <typename T, FMT_ENABLE_IF(formattable<T>::value)>
-  FMT_CONSTEXPR FMT_INLINE auto do_map(T&& val) -> T& {
+  FMT_CONSTEXPR FMT_INLINE auto do_map(T& val) -> T& {
    return val;
  }
  template <typename T, FMT_ENABLE_IF(!formattable<T>::value)>
-  FMT_CONSTEXPR FMT_INLINE auto do_map(T&&) -> unformattable {
+  FMT_CONSTEXPR FMT_INLINE auto do_map(T&) -> unformattable {
    return {};
  }

-  template <typename T, typename U = remove_cvref_t<T>,
+  template <typename T, typename U = remove_const_t<T>,
            FMT_ENABLE_IF((std::is_class<U>::value || std::is_enum<U>::value ||
                           std::is_union<U>::value) &&
                          !is_string<U>::value && !is_char<U>::value &&
                          !is_named_arg<U>::value &&
                          !std::is_arithmetic<format_as_t<U>>::value)>
-  FMT_CONSTEXPR FMT_INLINE auto map(T&& val)
-      -> decltype(this->do_map(std::forward<T>(val))) {
-    return do_map(std::forward<T>(val));
+  FMT_CONSTEXPR FMT_INLINE auto map(T& val) -> decltype(this->do_map(val)) {
+    return do_map(val);
  }

  template <typename T, FMT_ENABLE_IF(is_named_arg<T>::value)>
@ -1481,22 +1483,121 @@ enum { packed_arg_bits = 4 };
 enum { max_packed_args = 62 / packed_arg_bits };
 enum : unsigned long long { is_unpacked_bit = 1ULL << 63 };
 enum : unsigned long long { has_named_args_bit = 1ULL << 62 };
-}  // namespace detail

-// An output iterator that appends to a buffer.
-// It is used to reduce symbol sizes for the common case.
-class appender : public std::back_insert_iterator<detail::buffer<char>> {
-  using base = std::back_insert_iterator<detail::buffer<char>>;
+template <typename Char, typename InputIt>
+auto copy_str(InputIt begin, InputIt end, appender out) -> appender {
+  get_container(out).append(begin, end);
+  return out;
+}
+template <typename Char, typename InputIt>
+auto copy_str(InputIt begin, InputIt end,
+              std::back_insert_iterator<std::string> out)
+    -> std::back_insert_iterator<std::string> {
+  get_container(out).append(begin, end);
+  return out;
+}
+
+template <typename Char, typename R, typename OutputIt>
+FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt {
+  return detail::copy_str<Char>(rng.begin(), rng.end(), out);
+}
+
+#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500
+// A workaround for gcc 4.8 to make void_t work in a SFINAE context.
+template <typename...> struct void_t_impl { using type = void; };
+template <typename... T> using void_t = typename void_t_impl<T...>::type;
+#else
+template <typename...> using void_t = void;
+#endif
+
+template <typename It, typename T, typename Enable = void>
+struct is_output_iterator : std::false_type {};
+
+template <typename It, typename T>
+struct is_output_iterator<
+    It, T,
+    void_t<typename std::iterator_traits<It>::iterator_category,
+           decltype(*std::declval<It>() = std::declval<T>())>>
+    : std::true_type {};
+
+template <typename It> struct is_back_insert_iterator : std::false_type {};
+template <typename Container>
+struct is_back_insert_iterator<std::back_insert_iterator<Container>>
+    : std::true_type {};
+
+// A type-erased reference to an std::locale to avoid a heavy <locale> include.
+class locale_ref {
+ private:
+  const void* locale_;  // A type-erased pointer to std::locale.

 public:
-  using std::back_insert_iterator<detail::buffer<char>>::back_insert_iterator;
-  appender(base it) noexcept : base(it) {}
-  FMT_UNCHECKED_ITERATOR(appender);
+  constexpr FMT_INLINE locale_ref() : locale_(nullptr) {}
+  template <typename Locale> explicit locale_ref(const Locale& loc);

-  auto operator++() noexcept -> appender& { return *this; }
-  auto operator++(int) noexcept -> appender { return *this; }
+  explicit operator bool() const noexcept { return locale_ != nullptr; }
+
+  template <typename Locale> auto get() const -> Locale;
 };

+template <typename> constexpr auto encode_types() -> unsigned long long {
+  return 0;
+}
+
+template <typename Context, typename Arg, typename... Args>
+constexpr auto encode_types() -> unsigned long long {
+  return static_cast<unsigned>(mapped_type_constant<Arg, Context>::value) |
+         (encode_types<Context, Args...>() << packed_arg_bits);
+}
+
+#if defined(__cpp_if_constexpr)
+// This type is intentionally undefined, only used for errors
+template <typename T, typename Char> struct type_is_unformattable_for;
+#endif
+
+template <bool PACKED, typename Context, typename T, FMT_ENABLE_IF(PACKED)>
+FMT_CONSTEXPR FMT_INLINE auto make_arg(T& val) -> value<Context> {
+  using arg_type = remove_cvref_t<decltype(arg_mapper<Context>().map(val))>;
+
+  constexpr bool formattable_char =
+      !std::is_same<arg_type, unformattable_char>::value;
+  static_assert(formattable_char, "Mixing character types is disallowed.");
+
+  // Formatting of arbitrary pointers is disallowed. If you want to format a
+  // pointer cast it to `void*` or `const void*`. In particular, this forbids
+  // formatting of `[const] volatile char*` printed as bool by iostreams.
+  constexpr bool formattable_pointer =
+      !std::is_same<arg_type, unformattable_pointer>::value;
+  static_assert(formattable_pointer,
+                "Formatting of non-void pointers is disallowed.");
+
+  constexpr bool formattable = !std::is_same<arg_type, unformattable>::value;
+#if defined(__cpp_if_constexpr)
+  if constexpr (!formattable) {
+    type_is_unformattable_for<T, typename Context::char_type> _;
+  }
+#endif
+  static_assert(
+      formattable,
+      "Cannot format an argument. To make type T formattable provide a "
+      "formatter<T> specialization: https://fmt.dev/latest/api.html#udt");
+  return {arg_mapper<Context>().map(val)};
+}
+
+template <typename Context, typename T>
+FMT_CONSTEXPR auto make_arg(T& val) -> basic_format_arg<Context> {
+  auto arg = basic_format_arg<Context>();
+  arg.type_ = mapped_type_constant<T, Context>::value;
+  arg.value_ = make_arg<true, Context>(val);
+  return arg;
+}
+
+template <bool PACKED, typename Context, typename T, FMT_ENABLE_IF(!PACKED)>
+FMT_CONSTEXPR inline auto make_arg(T& val) -> basic_format_arg<Context> {
+  return make_arg<Context>(val);
+}
+}  // namespace detail
+FMT_BEGIN_EXPORT
+
 // A formatting argument. It is a trivially copyable/constructible type to
 // allow storage in basic_memory_buffer.
 template <typename Context> class basic_format_arg {
@ -1505,7 +1606,7 @@ template <typename Context> class basic_format_arg {
  detail::type type_;

  template <typename ContextType, typename T>
-  friend FMT_CONSTEXPR auto detail::make_arg(T&& value)
+  friend FMT_CONSTEXPR auto detail::make_arg(T& value)
      -> basic_format_arg<ContextType>;

  template <typename Visitor, typename Ctx>
@ -1559,7 +1660,7 @@ template <typename Context> class basic_format_arg {
  ``vis(value)`` will be called with the value of type ``double``.
  \endrst
 */
-FMT_MODULE_EXPORT
+// DEPRECATED!
 template <typename Visitor, typename Context>
 FMT_CONSTEXPR FMT_INLINE auto visit_format_arg(
    Visitor&& vis, const basic_format_arg<Context>& arg) -> decltype(vis(0)) {
@ -1601,124 +1702,6 @@ FMT_CONSTEXPR FMT_INLINE auto visit_format_arg(
  return vis(monostate());
 }

-namespace detail {
-
-template <typename Char, typename InputIt>
-auto copy_str(InputIt begin, InputIt end, appender out) -> appender {
-  get_container(out).append(begin, end);
-  return out;
-}
-
-template <typename Char, typename R, typename OutputIt>
-FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt {
-  return detail::copy_str<Char>(rng.begin(), rng.end(), out);
-}
-
-#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500
-// A workaround for gcc 4.8 to make void_t work in a SFINAE context.
-template <typename...> struct void_t_impl { using type = void; };
-template <typename... T> using void_t = typename void_t_impl<T...>::type;
-#else
-template <typename...> using void_t = void;
-#endif
-
-template <typename It, typename T, typename Enable = void>
-struct is_output_iterator : std::false_type {};
-
-template <typename It, typename T>
-struct is_output_iterator<
-    It, T,
-    void_t<typename std::iterator_traits<It>::iterator_category,
-           decltype(*std::declval<It>() = std::declval<T>())>>
-    : std::true_type {};
-
-template <typename It> struct is_back_insert_iterator : std::false_type {};
-template <typename Container>
-struct is_back_insert_iterator<std::back_insert_iterator<Container>>
-    : std::true_type {};
-
-template <typename It>
-struct is_contiguous_back_insert_iterator : std::false_type {};
-template <typename Container>
-struct is_contiguous_back_insert_iterator<std::back_insert_iterator<Container>>
-    : is_contiguous<Container> {};
-template <>
-struct is_contiguous_back_insert_iterator<appender> : std::true_type {};
-
-// A type-erased reference to an std::locale to avoid a heavy <locale> include.
-class locale_ref {
- private:
-  const void* locale_;  // A type-erased pointer to std::locale.
-
- public:
-  constexpr FMT_INLINE locale_ref() : locale_(nullptr) {}
-  template <typename Locale> explicit locale_ref(const Locale& loc);
-
-  explicit operator bool() const noexcept { return locale_ != nullptr; }
-
-  template <typename Locale> auto get() const -> Locale;
-};
-
-template <typename> constexpr auto encode_types() -> unsigned long long {
-  return 0;
-}
-
-template <typename Context, typename Arg, typename... Args>
-constexpr auto encode_types() -> unsigned long long {
-  return static_cast<unsigned>(mapped_type_constant<Arg, Context>::value) |
-         (encode_types<Context, Args...>() << packed_arg_bits);
-}
-
-template <typename Context, typename T>
-FMT_CONSTEXPR FMT_INLINE auto make_value(T&& val) -> value<Context> {
-  auto&& arg = arg_mapper<Context>().map(FMT_FORWARD(val));
-  using arg_type = remove_cvref_t<decltype(arg)>;
-
-  constexpr bool formattable_char =
-      !std::is_same<arg_type, unformattable_char>::value;
-  static_assert(formattable_char, "Mixing character types is disallowed.");
-
-  // Formatting of arbitrary pointers is disallowed. If you want to format a
-  // pointer cast it to `void*` or `const void*`. In particular, this forbids
-  // formatting of `[const] volatile char*` printed as bool by iostreams.
-  constexpr bool formattable_pointer =
-      !std::is_same<arg_type, unformattable_pointer>::value;
-  static_assert(formattable_pointer,
-                "Formatting of non-void pointers is disallowed.");
-
-  constexpr bool formattable = !std::is_same<arg_type, unformattable>::value;
-  static_assert(
-      formattable,
-      "Cannot format an argument. To make type T formattable provide a "
-      "formatter<T> specialization: https://fmt.dev/latest/api.html#udt");
-  return {arg};
-}
-
-template <typename Context, typename T>
-FMT_CONSTEXPR auto make_arg(T&& value) -> basic_format_arg<Context> {
-  auto arg = basic_format_arg<Context>();
-  arg.type_ = mapped_type_constant<T, Context>::value;
-  arg.value_ = make_value<Context>(value);
-  return arg;
-}
-
-// The DEPRECATED type template parameter is there to avoid an ODR violation
-// when using a fallback formatter in one translation unit and an implicit
-// conversion in another (not recommended).
-template <bool IS_PACKED, typename Context, type, typename T,
-          FMT_ENABLE_IF(IS_PACKED)>
-FMT_CONSTEXPR FMT_INLINE auto make_arg(T&& val) -> value<Context> {
-  return make_value<Context>(val);
-}
-
-template <bool IS_PACKED, typename Context, type, typename T,
-          FMT_ENABLE_IF(!IS_PACKED)>
-FMT_CONSTEXPR inline auto make_arg(T&& value) -> basic_format_arg<Context> {
-  return make_arg<Context>(value);
-}
-}  // namespace detail
-FMT_BEGIN_EXPORT
-
 // Formatting context.
 template <typename OutputIt, typename Char> class basic_format_context {
 private:
@ -1778,7 +1761,7 @@ using format_context = buffer_context<char>;
 template <typename T, typename Char = char>
 using is_formattable = bool_constant<!std::is_base_of<
    detail::unformattable, decltype(detail::arg_mapper<buffer_context<Char>>()
-                                        .map(std::declval<T>()))>::value>;
+                                        .map(std::declval<T&>()))>::value>;

 /**
  \rst
@ -1796,7 +1779,7 @@ class format_arg_store
 {
 private:
  static const size_t num_args = sizeof...(Args);
-  static const size_t num_named_args = detail::count_named_args<Args...>();
+  static constexpr size_t num_named_args = detail::count_named_args<Args...>();
  static const bool is_packed = num_args <= detail::max_packed_args;

  using value_type = conditional_t<is_packed, detail::value<Context>,
@ -1817,16 +1800,14 @@ class format_arg_store

 public:
  template <typename... T>
-  FMT_CONSTEXPR FMT_INLINE format_arg_store(T&&... args)
+  FMT_CONSTEXPR FMT_INLINE format_arg_store(T&... args)
      :
 #if FMT_GCC_VERSION && FMT_GCC_VERSION < 409
        basic_format_args<Context>(*this),
 #endif
-        data_{detail::make_arg<
-            is_packed, Context,
-            detail::mapped_type_constant<remove_cvref_t<T>, Context>::value>(
-            FMT_FORWARD(args))...} {
-    detail::init_named_args(data_.named_args(), 0, 0, args...);
+        data_{detail::make_arg<is_packed, Context>(args)...} {
+    if (detail::const_check(num_named_args != 0))
+      detail::init_named_args(data_.named_args(), 0, 0, args...);
  }
 };

@ -1834,14 +1815,15 @@ class format_arg_store
  \rst
  Constructs a `~fmt::format_arg_store` object that contains references to
  arguments and can be implicitly converted to `~fmt::format_args`. `Context`
-  can be omitted in which case it defaults to `~fmt::context`.
+  can be omitted in which case it defaults to `~fmt::format_context`.
  See `~fmt::arg` for lifetime considerations.
  \endrst
 */
+// Arguments are taken by lvalue references to avoid some lifetime issues.
 template <typename Context = format_context, typename... T>
-constexpr auto make_format_args(T&&... args)
+constexpr auto make_format_args(T&... args)
    -> format_arg_store<Context, remove_cvref_t<T>...> {
-  return {FMT_FORWARD(args)...};
+  return {args...};
 }

 /**
@ -1869,7 +1851,7 @@ FMT_END_EXPORT
  ``vformat``::

    void vlog(string_view format_str, format_args args);  // OK
-    format_args args = make_format_args(42);  // Error: dangling reference
+    format_args args = make_format_args();  // Error: dangling reference
  \endrst
 */
 template <typename Context> class basic_format_args {
@ -1986,7 +1968,7 @@ template <typename Context> class basic_format_args {
 /** An alias to ``basic_format_args<format_context>``. */
 // A separate type would result in shorter symbols but break ABI compatibility
 // between clang and gcc on ARM (#1919).
-FMT_MODULE_EXPORT using format_args = basic_format_args<format_context>;
+FMT_EXPORT using format_args = basic_format_args<format_context>;

 // We cannot use enum classes as bit fields because of a gcc bug, so we put them
 // in namespaces instead (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414).
@ -2558,7 +2540,17 @@ FMT_CONSTEXPR auto parse_format_specs(ParseContext& ctx)
      mapped_type_constant<T, context>::value != type::custom_type,
      decltype(arg_mapper<context>().map(std::declval<const T&>())),
      typename strip_named_arg<T>::type>;
+#if defined(__cpp_if_constexpr)
+  if constexpr (std::is_default_constructible_v<
+                    formatter<mapped_type, char_type>>) {
+    return formatter<mapped_type, char_type>().parse(ctx);
+  } else {
+    type_is_unformattable_for<T, char_type> _;
+    return ctx.begin();
+  }
+#else
  return formatter<mapped_type, char_type>().parse(ctx);
+#endif
 }

 // Checks char specs and returns true iff the presentation type is char-like.
@ -2574,8 +2566,6 @@ FMT_CONSTEXPR auto check_char_specs(const format_specs<Char>& specs) -> bool {
  return true;
 }

-constexpr FMT_INLINE_VARIABLE int invalid_arg_index = -1;
-
 #if FMT_USE_NONTYPE_TEMPLATE_ARGS
 template <int N, typename T, typename... Args, typename Char>
 constexpr auto get_arg_index_by_name(basic_string_view<Char> name) -> int {
@ -2585,7 +2575,7 @@ constexpr auto get_arg_index_by_name(basic_string_view<Char> name) -> int {
  if constexpr (sizeof...(Args) > 0)
    return get_arg_index_by_name<N + 1, Args...>(name);
  (void)name;  // Workaround an MSVC bug about "unused" parameter.
-  return invalid_arg_index;
+  return -1;
 }
 #endif

@ -2596,7 +2586,7 @@ FMT_CONSTEXPR auto get_arg_index_by_name(basic_string_view<Char> name) -> int {
    return get_arg_index_by_name<0, Args...>(name);
 #endif
  (void)name;
-  return invalid_arg_index;
+  return -1;
 }

 template <typename Char, typename... Args> class format_string_checker {
@ -2610,15 +2600,15 @@ template <typename Char, typename... Args> class format_string_checker {
  // needed for compile-time checks: https://godbolt.org/z/GvWzcTjh1.
  using parse_func = const Char* (*)(parse_context_type&);

+  type types_[num_args > 0 ? static_cast<size_t>(num_args) : 1];
  parse_context_type context_;
  parse_func parse_funcs_[num_args > 0 ? static_cast<size_t>(num_args) : 1];
-  type types_[num_args > 0 ? static_cast<size_t>(num_args) : 1];

 public:
  explicit FMT_CONSTEXPR format_string_checker(basic_string_view<Char> fmt)
-      : context_(fmt, num_args, types_),
-        parse_funcs_{&parse_format_specs<Args, parse_context_type>...},
-        types_{mapped_type_constant<Args, buffer_context<Char>>::value...} {}
+      : types_{mapped_type_constant<Args, buffer_context<Char>>::value...},
+        context_(fmt, num_args, types_),
+        parse_funcs_{&parse_format_specs<Args, parse_context_type>...} {}

  FMT_CONSTEXPR void on_text(const Char*, const Char*) {}

@ -2629,7 +2619,7 @@ template <typename Char, typename... Args> class format_string_checker {
  FMT_CONSTEXPR auto on_arg_id(basic_string_view<Char> id) -> int {
 #if FMT_USE_NONTYPE_TEMPLATE_ARGS
    auto index = get_arg_index_by_name<Args...>(id);
-    if (index == invalid_arg_index) on_error("named argument is not found");
+    if (index < 0) on_error("named argument is not found");
    return index;
 #else
    (void)id;
@ -2638,7 +2628,9 @@ template <typename Char, typename... Args> class format_string_checker {
 #endif
  }

-  FMT_CONSTEXPR void on_replacement_field(int, const Char*) {}
+  FMT_CONSTEXPR void on_replacement_field(int id, const Char* begin) {
+    on_format_specs(id, begin, begin);  // Call parse() on empty specs.
+  }

  FMT_CONSTEXPR auto on_format_specs(int id, const Char* begin, const Char*)
      -> const Char* {
@ -2721,27 +2713,6 @@ struct formatter<T, Char,
      -> decltype(ctx.out());
 };

-#define FMT_FORMAT_AS(Type, Base)                                        \
-  template <typename Char>                                               \
-  struct formatter<Type, Char> : formatter<Base, Char> {                 \
-    template <typename FormatContext>                                    \
-    auto format(const Type& val, FormatContext& ctx) const               \
-        -> decltype(ctx.out()) {                                         \
-      return formatter<Base, Char>::format(static_cast<Base>(val), ctx); \
-    }                                                                    \
-  }
-
-FMT_FORMAT_AS(signed char, int);
-FMT_FORMAT_AS(unsigned char, unsigned);
-FMT_FORMAT_AS(short, int);
-FMT_FORMAT_AS(unsigned short, unsigned);
-FMT_FORMAT_AS(long, long long);
-FMT_FORMAT_AS(unsigned long, unsigned long long);
-FMT_FORMAT_AS(Char*, const Char*);
-FMT_FORMAT_AS(std::basic_string<Char>, basic_string_view<Char>);
-FMT_FORMAT_AS(std::nullptr_t, const void*);
-FMT_FORMAT_AS(detail::std_string_view<Char>, basic_string_view<Char>);
-
 template <typename Char = char> struct runtime_format_string {
  basic_string_view<Char> str;
 };
--- a/extern/fmtlib/include/fmt/format-inl.h
+++ b/extern/fmtlib/include/fmt/format-inl.h
@ -1128,16 +1128,12 @@ bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept {
 }

 // Remove trailing zeros from n and return the number of zeros removed (float)
-FMT_INLINE int remove_trailing_zeros(uint32_t& n) noexcept {
+FMT_INLINE int remove_trailing_zeros(uint32_t& n, int s = 0) noexcept {
  FMT_ASSERT(n != 0, "");
  // Modular inverse of 5 (mod 2^32): (mod_inv_5 * 5) mod 2^32 = 1.
-  // See https://github.com/fmtlib/fmt/issues/3163 for more details.
-  const uint32_t mod_inv_5 = 0xcccccccd;
-  // Casts are needed to workaround a bug in MSVC 19.22 and older.
-  const uint32_t mod_inv_25 =
-      static_cast<uint32_t>(uint64_t(mod_inv_5) * mod_inv_5);
+  constexpr uint32_t mod_inv_5 = 0xcccccccd;
+  constexpr uint32_t mod_inv_25 = 0xc28f5c29; // = mod_inv_5 * mod_inv_5

-  int s = 0;
  while (true) {
    auto q = rotr(n * mod_inv_25, 2);
    if (q > max_value<uint32_t>() / 100) break;
@ -1162,32 +1158,17 @@ FMT_INLINE int remove_trailing_zeros(uint64_t& n) noexcept {

  // Is n is divisible by 10^8?
  if ((nm.high() & ((1ull << (90 - 64)) - 1)) == 0 && nm.low() < magic_number) {
-    // If yes, work with the quotient.
+    // If yes, work with the quotient...
    auto n32 = static_cast<uint32_t>(nm.high() >> (90 - 64));
-
-    const uint32_t mod_inv_5 = 0xcccccccd;
-    const uint32_t mod_inv_25 = mod_inv_5 * mod_inv_5;
-
-    int s = 8;
-    while (true) {
-      auto q = rotr(n32 * mod_inv_25, 2);
-      if (q > max_value<uint32_t>() / 100) break;
-      n32 = q;
-      s += 2;
-    }
-    auto q = rotr(n32 * mod_inv_5, 1);
-    if (q <= max_value<uint32_t>() / 10) {
-      n32 = q;
-      s |= 1;
-    }
-
+    // ... and use the 32 bit variant of the function
+    int s = remove_trailing_zeros(n32, 8);
    n = n32;
    return s;
  }

  // If n is not divisible by 10^8, work with n itself.
-  const uint64_t mod_inv_5 = 0xcccccccccccccccd;
-  const uint64_t mod_inv_25 = mod_inv_5 * mod_inv_5;
+  constexpr uint64_t mod_inv_5 = 0xcccccccccccccccd;
+  constexpr uint64_t mod_inv_25 = 0x8f5c28f5c28f5c29; // = mod_inv_5 * mod_inv_5

  int s = 0;
  while (true) {
@ -1458,7 +1439,7 @@ FMT_FUNC bool write_console(std::FILE* f, string_view text) {
  auto u16 = utf8_to_utf16(text);
  auto written = dword();
  return WriteConsoleW(reinterpret_cast<void*>(_get_osfhandle(fd)), u16.c_str(),
-                       static_cast<uint32_t>(u16.size()), &written, nullptr);
+                       static_cast<uint32_t>(u16.size()), &written, nullptr) != 0;
 }

 // Print assuming legacy (non-Unicode) encoding.
--- a/extern/fmtlib/include/fmt/format.h
+++ b/extern/fmtlib/include/fmt/format.h
@ -48,9 +48,10 @@

 #include "core.h"

-#ifndef FMT_BEGIN_DETAIL_NAMESPACE
-#  define FMT_BEGIN_DETAIL_NAMESPACE namespace detail {
-#  define FMT_END_DETAIL_NAMESPACE }
+#if defined __cpp_inline_variables && __cpp_inline_variables >= 201606L
+#  define FMT_INLINE_VARIABLE inline
+#else
+#  define FMT_INLINE_VARIABLE
 #endif

 #if FMT_HAS_CPP17_ATTRIBUTE(fallthrough)
@ -78,16 +79,24 @@
 #  endif
 #endif

-#if FMT_GCC_VERSION
-#  define FMT_GCC_VISIBILITY_HIDDEN __attribute__((visibility("hidden")))
-#else
-#  define FMT_GCC_VISIBILITY_HIDDEN
+#ifndef FMT_NO_UNIQUE_ADDRESS
+#  if FMT_CPLUSPLUS >= 202002L
+#    if FMT_HAS_CPP_ATTRIBUTE(no_unique_address)
+#      define FMT_NO_UNIQUE_ADDRESS [[no_unique_address]]
+// VS2019 v16.10 and later except clang-cl (https://reviews.llvm.org/D110485)
+#    elif (FMT_MSC_VERSION >= 1929) && !FMT_CLANG_VERSION
+#      define FMT_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]]
+#    endif
+#  endif
+#endif
+#ifndef FMT_NO_UNIQUE_ADDRESS
+#  define FMT_NO_UNIQUE_ADDRESS
 #endif

-#ifdef __NVCC__
-#  define FMT_CUDA_VERSION (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__)
+#if FMT_GCC_VERSION || defined(__clang__)
+#  define FMT_VISIBILITY(value) __attribute__((visibility(value)))
 #else
-#  define FMT_CUDA_VERSION 0
+#  define FMT_VISIBILITY(value)
 #endif

 #ifdef __has_builtin
@ -120,10 +129,8 @@ FMT_END_NAMESPACE
 #      define FMT_THROW(x) throw x
 #    endif
 #  else
-#    define FMT_THROW(x)               \
-      do {                             \
-        FMT_ASSERT(false, (x).what()); \
-      } while (false)
+#    define FMT_THROW(x) \
+      ::fmt::detail::assert_fail(__FILE__, __LINE__, (x).what())
 #  endif
 #endif

@ -362,8 +369,6 @@ class uint128_fallback {
 private:
  uint64_t lo_, hi_;

-  friend uint128_fallback umul128(uint64_t x, uint64_t y) noexcept;
-
 public:
  constexpr uint128_fallback(uint64_t hi, uint64_t lo) : lo_(lo), hi_(hi) {}
  constexpr uint128_fallback(uint64_t value = 0) : lo_(value), hi_(0) {}
@ -536,6 +541,8 @@ FMT_INLINE void assume(bool condition) {
  (void)condition;
 #if FMT_HAS_BUILTIN(__builtin_assume) && !FMT_ICC_VERSION
  __builtin_assume(condition);
+#elif FMT_GCC_VERSION
+  if (!condition) __builtin_unreachable();
 #endif
 }

@ -554,20 +561,6 @@ inline auto get_data(Container& c) -> typename Container::value_type* {
  return c.data();
 }

-#if defined(_SECURE_SCL) && _SECURE_SCL
-// Make a checked iterator to avoid MSVC warnings.
-template <typename T> using checked_ptr = stdext::checked_array_iterator<T*>;
-template <typename T>
-constexpr auto make_checked(T* p, size_t size) -> checked_ptr<T> {
-  return {p, size};
-}
-#else
-template <typename T> using checked_ptr = T*;
-template <typename T> constexpr auto make_checked(T* p, size_t) -> T* {
-  return p;
-}
-#endif
-
 // Attempts to reserve space for n extra characters in the output range.
 // Returns a pointer to the reserved range or a reference to it.
 template <typename Container, FMT_ENABLE_IF(is_contiguous<Container>::value)>
@ -575,12 +568,12 @@ template <typename Container, FMT_ENABLE_IF(is_contiguous<Container>::value)>
 __attribute__((no_sanitize("undefined")))
 #endif
 inline auto
-reserve(std::back_insert_iterator<Container> it, size_t n)
-    -> checked_ptr<typename Container::value_type> {
+reserve(std::back_insert_iterator<Container> it, size_t n) ->
+    typename Container::value_type* {
  Container& c = get_container(it);
  size_t size = c.size();
  c.resize(size + n);
-  return make_checked(get_data(c) + size, n);
+  return get_data(c) + size;
 }

 template <typename T>
@ -612,8 +605,8 @@ template <typename T> auto to_pointer(buffer_appender<T> it, size_t n) -> T* {
 }

 template <typename Container, FMT_ENABLE_IF(is_contiguous<Container>::value)>
-inline auto base_iterator(std::back_insert_iterator<Container>& it,
-                          checked_ptr<typename Container::value_type>)
+inline auto base_iterator(std::back_insert_iterator<Container> it,
+                          typename Container::value_type*)
    -> std::back_insert_iterator<Container> {
  return it;
 }
@ -881,7 +874,7 @@ void buffer<T>::append(const U* begin, const U* end) {
    try_reserve(size_ + count);
    auto free_cap = capacity_ - size_;
    if (free_cap < count) count = free_cap;
-    std::uninitialized_copy_n(begin, count, make_checked(ptr_ + size_, count));
+    std::uninitialized_copy_n(begin, count, ptr_ + size_);
    size_ += count;
    begin += count;
  }
@ -926,8 +919,8 @@ class basic_memory_buffer final : public detail::buffer<T> {
 private:
  T store_[SIZE];

-  // Don't inherit from Allocator avoid generating type_info for it.
-  Allocator alloc_;
+  // Don't inherit from Allocator to avoid generating type_info for it.
+  FMT_NO_UNIQUE_ADDRESS Allocator alloc_;

  // Deallocate memory allocated by the buffer.
  FMT_CONSTEXPR20 void deallocate() {
@ -948,9 +941,10 @@ class basic_memory_buffer final : public detail::buffer<T> {
    T* old_data = this->data();
    T* new_data =
        std::allocator_traits<Allocator>::allocate(alloc_, new_capacity);
+    // Suppress a bogus -Wstringop-overflow in gcc 13.1 (#3481).
+    detail::assume(this->size() <= new_capacity);
    // The following code doesn't throw, so the raw pointer above doesn't leak.
-    std::uninitialized_copy(old_data, old_data + this->size(),
-                            detail::make_checked(new_data, new_capacity));
+    std::uninitialized_copy_n(old_data, this->size(), new_data);
    this->set(new_data, new_capacity);
    // deallocate must not throw according to the standard, but even if it does,
    // the buffer already uses the new storage and will deallocate it in
@ -978,8 +972,7 @@ class basic_memory_buffer final : public detail::buffer<T> {
    size_t size = other.size(), capacity = other.capacity();
    if (data == other.store_) {
      this->set(store_, capacity);
-      detail::copy_str<T>(other.store_, other.store_ + size,
-                          detail::make_checked(store_, capacity));
+      detail::copy_str<T>(other.store_, other.store_ + size, store_);
    } else {
      this->set(data, capacity);
      // Set pointer to the inline array so that delete is not called
@ -1044,6 +1037,7 @@ namespace detail {
 FMT_API bool write_console(std::FILE* f, string_view text);
 FMT_API void print(std::FILE*, string_view);
 }  // namespace detail
+
 FMT_BEGIN_EXPORT

 // Suppress a misleading warning in older versions of clang.
@ -1052,7 +1046,7 @@ FMT_BEGIN_EXPORT
 #endif

 /** An error reported from a formatting function. */
-class FMT_API format_error : public std::runtime_error {
+class FMT_VISIBILITY("default") format_error : public std::runtime_error {
 public:
  using std::runtime_error::runtime_error;
 };
@ -1128,7 +1122,7 @@ template <typename Locale> class format_facet : public Locale::facet {
  }
 };

-FMT_BEGIN_DETAIL_NAMESPACE
+namespace detail {

 // Returns true if value is negative, false otherwise.
 // Same as `value < 0` but doesn't produce warnings if T is an unsigned type.
@ -1257,7 +1251,7 @@ FMT_CONSTEXPR auto count_digits(UInt n) -> int {
 FMT_INLINE auto do_count_digits(uint32_t n) -> int {
 // An optimization by Kendall Willets from https://bit.ly/3uOIQrB.
 // This increments the upper 32 bits (log10(T) - 1) when >= T is added.
-#  define FMT_INC(T) (((sizeof(#  T) - 1ull) << 32) - T)
+#  define FMT_INC(T) (((sizeof(#T) - 1ull) << 32) - T)
  static constexpr uint64_t table[] = {
      FMT_INC(0),          FMT_INC(0),          FMT_INC(0),           // 8
      FMT_INC(10),         FMT_INC(10),         FMT_INC(10),          // 64
@ -1393,8 +1387,8 @@ FMT_CONSTEXPR auto format_uint(Char* buffer, UInt value, int num_digits,
 }

 template <unsigned BASE_BITS, typename Char, typename It, typename UInt>
-inline auto format_uint(It out, UInt value, int num_digits, bool upper = false)
-    -> It {
+FMT_CONSTEXPR inline auto format_uint(It out, UInt value, int num_digits,
+                                      bool upper = false) -> It {
  if (auto ptr = to_pointer<Char>(out, to_unsigned(num_digits))) {
    format_uint<BASE_BITS>(ptr, value, num_digits, upper);
    return out;
@ -1418,19 +1412,20 @@ class utf8_to_utf16 {
  auto str() const -> std::wstring { return {&buffer_[0], size()}; }
 };

+enum class to_utf8_error_policy { abort, replace };
+
 // A converter from UTF-16/UTF-32 (host endian) to UTF-8.
-template <typename WChar, typename Buffer = memory_buffer>
-class unicode_to_utf8 {
+template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
 private:
  Buffer buffer_;

 public:
-  unicode_to_utf8() {}
-  explicit unicode_to_utf8(basic_string_view<WChar> s) {
+  to_utf8() {}
+  explicit to_utf8(basic_string_view<WChar> s,
+                   to_utf8_error_policy policy = to_utf8_error_policy::abort) {
    static_assert(sizeof(WChar) == 2 || sizeof(WChar) == 4,
                  "Expect utf16 or utf32");
-
-    if (!convert(s))
+    if (!convert(s, policy))
      FMT_THROW(std::runtime_error(sizeof(WChar) == 2 ? "invalid utf16"
                                                      : "invalid utf32"));
  }
@ -1442,23 +1437,28 @@ class unicode_to_utf8 {
  // Performs conversion returning a bool instead of throwing exception on
  // conversion error. This method may still throw in case of memory allocation
  // error.
-  bool convert(basic_string_view<WChar> s) {
-    if (!convert(buffer_, s)) return false;
+  bool convert(basic_string_view<WChar> s,
+               to_utf8_error_policy policy = to_utf8_error_policy::abort) {
+    if (!convert(buffer_, s, policy)) return false;
    buffer_.push_back(0);
    return true;
  }
-  static bool convert(Buffer& buf, basic_string_view<WChar> s) {
+  static bool convert(
+      Buffer& buf, basic_string_view<WChar> s,
+      to_utf8_error_policy policy = to_utf8_error_policy::abort) {
    for (auto p = s.begin(); p != s.end(); ++p) {
      uint32_t c = static_cast<uint32_t>(*p);
      if (sizeof(WChar) == 2 && c >= 0xd800 && c <= 0xdfff) {
-        // surrogate pair
+        // Handle a surrogate pair.
        ++p;
        if (p == s.end() || (c & 0xfc00) != 0xd800 || (*p & 0xfc00) != 0xdc00) {
-          return false;
+          if (policy == to_utf8_error_policy::abort) return false;
+          buf.append(string_view("\xEF\xBF\xBD"));
+          --p;
+        } else {
+          c = (c << 10) + static_cast<uint32_t>(*p) - 0x35fdc00;
        }
-        c = (c << 10) + static_cast<uint32_t>(*p) - 0x35fdc00;
-      }
-      if (c < 0x80) {
+      } else if (c < 0x80) {
        buf.push_back(static_cast<char>(c));
      } else if (c < 0x800) {
        buf.push_back(static_cast<char>(0xc0 | (c >> 6)));
@ -1486,9 +1486,9 @@ inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept {
  auto p = static_cast<uint128_opt>(x) * static_cast<uint128_opt>(y);
  return {static_cast<uint64_t>(p >> 64), static_cast<uint64_t>(p)};
 #elif defined(_MSC_VER) && defined(_M_X64)
-  auto result = uint128_fallback();
-  result.lo_ = _umul128(x, y, &result.hi_);
-  return result;
+  auto hi = uint64_t();
+  auto lo = _umul128(x, y, &hi);
+  return {hi, lo};
 #else
  const uint64_t mask = static_cast<uint64_t>(max_value<uint32_t>());

@ -1737,119 +1737,31 @@ FMT_CONSTEXPR inline fp operator*(fp x, fp y) {
 }

 template <typename T = void> struct basic_data {
-  // Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340.
-  // These are generated by support/compute-powers.py.
-  static constexpr uint64_t pow10_significands[87] = {
-      0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76,
-      0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df,
-      0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c,
-      0x8dd01fad907ffc3c, 0xd3515c2831559a83, 0x9d71ac8fada6c9b5,
-      0xea9c227723ee8bcb, 0xaecc49914078536d, 0x823c12795db6ce57,
-      0xc21094364dfb5637, 0x9096ea6f3848984f, 0xd77485cb25823ac7,
-      0xa086cfcd97bf97f4, 0xef340a98172aace5, 0xb23867fb2a35b28e,
-      0x84c8d4dfd2c63f3b, 0xc5dd44271ad3cdba, 0x936b9fcebb25c996,
-      0xdbac6c247d62a584, 0xa3ab66580d5fdaf6, 0xf3e2f893dec3f126,
-      0xb5b5ada8aaff80b8, 0x87625f056c7c4a8b, 0xc9bcff6034c13053,
-      0x964e858c91ba2655, 0xdff9772470297ebd, 0xa6dfbd9fb8e5b88f,
-      0xf8a95fcf88747d94, 0xb94470938fa89bcf, 0x8a08f0f8bf0f156b,
-      0xcdb02555653131b6, 0x993fe2c6d07b7fac, 0xe45c10c42a2b3b06,
-      0xaa242499697392d3, 0xfd87b5f28300ca0e, 0xbce5086492111aeb,
-      0x8cbccc096f5088cc, 0xd1b71758e219652c, 0x9c40000000000000,
-      0xe8d4a51000000000, 0xad78ebc5ac620000, 0x813f3978f8940984,
-      0xc097ce7bc90715b3, 0x8f7e32ce7bea5c70, 0xd5d238a4abe98068,
-      0x9f4f2726179a2245, 0xed63a231d4c4fb27, 0xb0de65388cc8ada8,
-      0x83c7088e1aab65db, 0xc45d1df942711d9a, 0x924d692ca61be758,
-      0xda01ee641a708dea, 0xa26da3999aef774a, 0xf209787bb47d6b85,
-      0xb454e4a179dd1877, 0x865b86925b9bc5c2, 0xc83553c5c8965d3d,
-      0x952ab45cfa97a0b3, 0xde469fbd99a05fe3, 0xa59bc234db398c25,
-      0xf6c69a72a3989f5c, 0xb7dcbf5354e9bece, 0x88fcf317f22241e2,
-      0xcc20ce9bd35c78a5, 0x98165af37b2153df, 0xe2a0b5dc971f303a,
-      0xa8d9d1535ce3b396, 0xfb9b7cd9a4a7443c, 0xbb764c4ca7a44410,
-      0x8bab8eefb6409c1a, 0xd01fef10a657842c, 0x9b10a4e5e9913129,
-      0xe7109bfba19c0c9d, 0xac2820d9623bf429, 0x80444b5e7aa7cf85,
-      0xbf21e44003acdd2d, 0x8e679c2f5e44ff8f, 0xd433179d9c8cb841,
-      0x9e19db92b4e31ba9, 0xeb96bf6ebadf77d9, 0xaf87023b9bf0ee6b,
-  };
-
-#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409
-#  pragma GCC diagnostic push
-#  pragma GCC diagnostic ignored "-Wnarrowing"
-#endif
-  // Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding
-  // to significands above.
-  static constexpr int16_t pow10_exponents[87] = {
-      -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954,
-      -927,  -901,  -874,  -847,  -821,  -794,  -768,  -741,  -715,  -688, -661,
-      -635,  -608,  -582,  -555,  -529,  -502,  -475,  -449,  -422,  -396, -369,
-      -343,  -316,  -289,  -263,  -236,  -210,  -183,  -157,  -130,  -103, -77,
-      -50,   -24,   3,     30,    56,    83,    109,   136,   162,   189,  216,
-      242,   269,   295,   322,   348,   375,   402,   428,   455,   481,  508,
-      534,   561,   588,   614,   641,   667,   694,   720,   747,   774,  800,
-      827,   853,   880,   907,   933,   960,   986,   1013,  1039,  1066};
-#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409
-#  pragma GCC diagnostic pop
-#endif
-
-  static constexpr uint64_t power_of_10_64[20] = {
-      1, FMT_POWERS_OF_10(1ULL), FMT_POWERS_OF_10(1000000000ULL),
-      10000000000000000000ULL};
-
  // For checking rounding thresholds.
  // The kth entry is chosen to be the smallest integer such that the
  // upper 32-bits of 10^(k+1) times it is strictly bigger than 5 * 10^k.
  static constexpr uint32_t fractional_part_rounding_thresholds[8] = {
-      2576980378,  // ceil(2^31 + 2^32/10^1)
-      2190433321,  // ceil(2^31 + 2^32/10^2)
-      2151778616,  // ceil(2^31 + 2^32/10^3)
-      2147913145,  // ceil(2^31 + 2^32/10^4)
-      2147526598,  // ceil(2^31 + 2^32/10^5)
-      2147487943,  // ceil(2^31 + 2^32/10^6)
-      2147484078,  // ceil(2^31 + 2^32/10^7)
-      2147483691   // ceil(2^31 + 2^32/10^8)
+      2576980378U,  // ceil(2^31 + 2^32/10^1)
+      2190433321U,  // ceil(2^31 + 2^32/10^2)
+      2151778616U,  // ceil(2^31 + 2^32/10^3)
+      2147913145U,  // ceil(2^31 + 2^32/10^4)
+      2147526598U,  // ceil(2^31 + 2^32/10^5)
+      2147487943U,  // ceil(2^31 + 2^32/10^6)
+      2147484078U,  // ceil(2^31 + 2^32/10^7)
+      2147483691U   // ceil(2^31 + 2^32/10^8)
  };
 };
+// This is a struct rather than an alias to avoid shadowing warnings in gcc.
+struct data : basic_data<> {};

 #if FMT_CPLUSPLUS < 201703L
-template <typename T> constexpr uint64_t basic_data<T>::pow10_significands[];
-template <typename T> constexpr int16_t basic_data<T>::pow10_exponents[];
-template <typename T> constexpr uint64_t basic_data<T>::power_of_10_64[];
 template <typename T>
 constexpr uint32_t basic_data<T>::fractional_part_rounding_thresholds[];
 #endif

-// This is a struct rather than an alias to avoid shadowing warnings in gcc.
-struct data : basic_data<> {};
-
-// Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its
-// (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`.
-FMT_CONSTEXPR inline fp get_cached_power(int min_exponent,
-                                         int& pow10_exponent) {
-  const int shift = 32;
-  // log10(2) = 0x0.4d104d427de7fbcc...
-  const int64_t significand = 0x4d104d427de7fbcc;
-  int index = static_cast<int>(
-      ((min_exponent + fp::num_significand_bits - 1) * (significand >> shift) +
-       ((int64_t(1) << shift) - 1))  // ceil
-      >> 32                          // arithmetic shift
-  );
-  // Decimal exponent of the first (smallest) cached power of 10.
-  const int first_dec_exp = -348;
-  // Difference between 2 consecutive decimal exponents in cached powers of 10.
-  const int dec_exp_step = 8;
-  index = (index - first_dec_exp - 1) / dec_exp_step + 1;
-  pow10_exponent = first_dec_exp + index * dec_exp_step;
-  // Using *(x + index) instead of x[index] avoids an issue with some compilers
-  // using the EDG frontend (e.g. nvhpc/22.3 in C++17 mode).
-  return {*(data::pow10_significands + index),
-          *(data::pow10_exponents + index)};
-}
-
-template <typename T>
+template <typename T, bool doublish = num_bits<T>() == num_bits<double>()>
 using convert_float_result =
-    conditional_t<std::is_same<T, float>::value ||
-                      std::numeric_limits<T>::digits ==
-                          std::numeric_limits<double>::digits,
-                  double, T>;
+    conditional_t<std::is_same<T, float>::value || doublish, double, T>;

 template <typename T>
 constexpr auto convert_float(T value) -> convert_float_result<T> {
@ -1970,7 +1882,7 @@ inline auto find_escape(const char* begin, const char* end)
  [] {                                                                        \
    /* Use the hidden visibility as a workaround for a GCC bug (#1973). */    \
    /* Use a macro-like name to avoid shadowing warnings. */                  \
-    struct FMT_GCC_VISIBILITY_HIDDEN FMT_COMPILE_STRING : base {              \
+    struct FMT_VISIBILITY("hidden") FMT_COMPILE_STRING : base {               \
      using char_type FMT_MAYBE_UNUSED = fmt::remove_cvref_t<decltype(s[0])>; \
      FMT_MAYBE_UNUSED FMT_CONSTEXPR explicit                                 \
      operator fmt::basic_string_view<char_type>() const {                    \
@ -2475,6 +2387,49 @@ FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt {
  return base_iterator(out, it);
 }

+// DEPRECATED!
+template <typename Char>
+FMT_CONSTEXPR auto parse_align(const Char* begin, const Char* end,
+                               format_specs<Char>& specs) -> const Char* {
+  FMT_ASSERT(begin != end, "");
+  auto align = align::none;
+  auto p = begin + code_point_length(begin);
+  if (end - p <= 0) p = begin;
+  for (;;) {
+    switch (to_ascii(*p)) {
+    case '<':
+      align = align::left;
+      break;
+    case '>':
+      align = align::right;
+      break;
+    case '^':
+      align = align::center;
+      break;
+    }
+    if (align != align::none) {
+      if (p != begin) {
+        auto c = *begin;
+        if (c == '}') return begin;
+        if (c == '{') {
+          throw_format_error("invalid fill character '{'");
+          return begin;
+        }
+        specs.fill = {begin, to_unsigned(p - begin)};
+        begin = p + 1;
+      } else {
+        ++begin;
+      }
+      break;
+    } else if (p == begin) {
+      break;
+    }
+    p = begin;
+  }
+  specs.align = align;
+  return begin;
+}
+
 // A floating-point presentation format.
 enum class float_format : unsigned char {
  general,  // General: exponent notation or fixed point based on magnitude.
@ -2833,78 +2788,6 @@ FMT_INLINE FMT_CONSTEXPR bool signbit(T value) {
  return std::signbit(static_cast<double>(value));
 }

-enum class round_direction { unknown, up, down };
-
-// Given the divisor (normally a power of 10), the remainder = v % divisor for
-// some number v and the error, returns whether v should be rounded up, down, or
-// whether the rounding direction can't be determined due to error.
-// error should be less than divisor / 2.
-FMT_CONSTEXPR inline round_direction get_round_direction(uint64_t divisor,
-                                                         uint64_t remainder,
-                                                         uint64_t error) {
-  FMT_ASSERT(remainder < divisor, "");  // divisor - remainder won't overflow.
-  FMT_ASSERT(error < divisor, "");      // divisor - error won't overflow.
-  FMT_ASSERT(error < divisor - error, "");  // error * 2 won't overflow.
-  // Round down if (remainder + error) * 2 <= divisor.
-  if (remainder <= divisor - remainder && error * 2 <= divisor - remainder * 2)
-    return round_direction::down;
-  // Round up if (remainder - error) * 2 >= divisor.
-  if (remainder >= error &&
-      remainder - error >= divisor - (remainder - error)) {
-    return round_direction::up;
-  }
-  return round_direction::unknown;
-}
-
-namespace digits {
-enum result {
-  more,  // Generate more digits.
-  done,  // Done generating digits.
-  error  // Digit generation cancelled due to an error.
-};
-}
-
-struct gen_digits_handler {
-  char* buf;
-  int size;
-  int precision;
-  int exp10;
-  bool fixed;
-
-  FMT_CONSTEXPR digits::result on_digit(char digit, uint64_t divisor,
-                                        uint64_t remainder, uint64_t error,
-                                        bool integral) {
-    FMT_ASSERT(remainder < divisor, "");
-    buf[size++] = digit;
-    if (!integral && error >= remainder) return digits::error;
-    if (size < precision) return digits::more;
-    if (!integral) {
-      // Check if error * 2 < divisor with overflow prevention.
-      // The check is not needed for the integral part because error = 1
-      // and divisor > (1 << 32) there.
-      if (error >= divisor || error >= divisor - error) return digits::error;
-    } else {
-      FMT_ASSERT(error == 1 && divisor > 2, "");
-    }
-    auto dir = get_round_direction(divisor, remainder, error);
-    if (dir != round_direction::up)
-      return dir == round_direction::down ? digits::done : digits::error;
-    ++buf[size - 1];
-    for (int i = size - 1; i > 0 && buf[i] > '9'; --i) {
-      buf[i] = '0';
-      ++buf[i - 1];
-    }
-    if (buf[0] > '9') {
-      buf[0] = '1';
-      if (fixed)
-        buf[size++] = '0';
-      else
-        ++exp10;
-    }
-    return digits::done;
-  }
-};
-
 inline FMT_CONSTEXPR20 void adjust_precision(int& precision, int exp10) {
  // Adjust fixed precision by exponent because it is relative to decimal
  // point.
@ -2913,101 +2796,6 @@ inline FMT_CONSTEXPR20 void adjust_precision(int& precision, int exp10) {
  precision += exp10;
 }

-// Generates output using the Grisu digit-gen algorithm.
-// error: the size of the region (lower, upper) outside of which numbers
-// definitely do not round to value (Delta in Grisu3).
-FMT_INLINE FMT_CONSTEXPR20 auto grisu_gen_digits(fp value, uint64_t error,
-                                                 int& exp,
-                                                 gen_digits_handler& handler)
-    -> digits::result {
-  const fp one(1ULL << -value.e, value.e);
-  // The integral part of scaled value (p1 in Grisu) = value / one. It cannot be
-  // zero because it contains a product of two 64-bit numbers with MSB set (due
-  // to normalization) - 1, shifted right by at most 60 bits.
-  auto integral = static_cast<uint32_t>(value.f >> -one.e);
-  FMT_ASSERT(integral != 0, "");
-  FMT_ASSERT(integral == value.f >> -one.e, "");
-  // The fractional part of scaled value (p2 in Grisu) c = value % one.
-  uint64_t fractional = value.f & (one.f - 1);
-  exp = count_digits(integral);  // kappa in Grisu.
-  // Non-fixed formats require at least one digit and no precision adjustment.
-  if (handler.fixed) {
-    adjust_precision(handler.precision, exp + handler.exp10);
-    // Check if precision is satisfied just by leading zeros, e.g.
-    // format("{:.2f}", 0.001) gives "0.00" without generating any digits.
-    if (handler.precision <= 0) {
-      if (handler.precision < 0) return digits::done;
-      // Divide by 10 to prevent overflow.
-      uint64_t divisor = data::power_of_10_64[exp - 1] << -one.e;
-      auto dir = get_round_direction(divisor, value.f / 10, error * 10);
-      if (dir == round_direction::unknown) return digits::error;
-      handler.buf[handler.size++] = dir == round_direction::up ? '1' : '0';
-      return digits::done;
-    }
-  }
-  // Generate digits for the integral part. This can produce up to 10 digits.
-  do {
-    uint32_t digit = 0;
-    auto divmod_integral = [&](uint32_t divisor) {
-      digit = integral / divisor;
-      integral %= divisor;
-    };
-    // This optimization by Milo Yip reduces the number of integer divisions by
-    // one per iteration.
-    switch (exp) {
-    case 10:
-      divmod_integral(1000000000);
-      break;
-    case 9:
-      divmod_integral(100000000);
-      break;
-    case 8:
-      divmod_integral(10000000);
-      break;
-    case 7:
-      divmod_integral(1000000);
-      break;
-    case 6:
-      divmod_integral(100000);
-      break;
-    case 5:
-      divmod_integral(10000);
-      break;
-    case 4:
-      divmod_integral(1000);
-      break;
-    case 3:
-      divmod_integral(100);
-      break;
-    case 2:
-      divmod_integral(10);
-      break;
-    case 1:
-      digit = integral;
-      integral = 0;
-      break;
-    default:
-      FMT_ASSERT(false, "invalid number of digits");
-    }
-    --exp;
-    auto remainder = (static_cast<uint64_t>(integral) << -one.e) + fractional;
-    auto result = handler.on_digit(static_cast<char>('0' + digit),
-                                   data::power_of_10_64[exp] << -one.e,
-                                   remainder, error, true);
-    if (result != digits::more) return result;
-  } while (exp > 0);
-  // Generate digits for the fractional part.
-  for (;;) {
-    fractional *= 10;
-    error *= 10;
-    char digit = static_cast<char>('0' + (fractional >> -one.e));
-    fractional &= one.f - 1;
-    --exp;
-    auto result = handler.on_digit(digit, one.f, fractional, error, false);
-    if (result != digits::more) return result;
-  }
-}
-
 class bigint {
 private:
  // A bigint is stored as an array of bigits (big digits), with bigit at index
@ -3108,7 +2896,7 @@ class bigint {
    auto size = other.bigits_.size();
    bigits_.resize(size);
    auto data = other.bigits_.data();
-    std::copy(data, data + size, make_checked(bigits_.data(), size));
+    copy_str<bigit>(data, data + size, bigits_.data());
    exp_ = other.exp_;
  }

@ -3322,6 +3110,7 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp<uint128_t> value,
  }
  int even = static_cast<int>((value.f & 1) == 0);
  if (!upper) upper = &lower;
+  bool shortest = num_digits < 0;
  if ((flags & dragon::fixup) != 0) {
    if (add_compare(numerator, *upper, denominator) + even <= 0) {
      --exp10;
@ -3334,7 +3123,7 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp<uint128_t> value,
    if ((flags & dragon::fixed) != 0) adjust_precision(num_digits, exp10 + 1);
  }
  // Invariant: value == (numerator / denominator) * pow(10, exp10).
-  if (num_digits < 0) {
+  if (shortest) {
    // Generate the shortest representation.
    num_digits = 0;
    char* data = buf.data();
@ -3364,7 +3153,7 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp<uint128_t> value,
  }
  // Generate the given number of digits.
  exp10 -= num_digits - 1;
-  if (num_digits == 0) {
+  if (num_digits <= 0) {
    denominator *= 10;
    auto digit = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0';
    buf.push_back(digit);
@ -3389,7 +3178,8 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp<uint128_t> value,
      }
      if (buf[0] == overflow) {
        buf[0] = '1';
-        ++exp10;
+        if ((flags & dragon::fixed) != 0) buf.push_back('0');
+        else ++exp10;
      }
      return;
    }
@ -3508,7 +3298,7 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
  int exp = 0;
  bool use_dragon = true;
  unsigned dragon_flags = 0;
-  if (!is_fast_float<Float>()) {
+  if (!is_fast_float<Float>() || is_constant_evaluated()) {
    const auto inv_log2_10 = 0.3010299956639812;  // 1 / log2(10)
    using info = dragonbox::float_info<decltype(converted_value)>;
    const auto f = basic_fp<typename info::carrier_uint>(converted_value);
@ -3516,10 +3306,11 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
    //   10^(exp - 1) <= value < 10^exp or 10^exp <= value < 10^(exp + 1).
    // This is based on log10(value) == log2(value) / log2(10) and approximation
    // of log2(value) by e + num_fraction_bits idea from double-conversion.
-    exp = static_cast<int>(
-        std::ceil((f.e + count_digits<1>(f.f) - 1) * inv_log2_10 - 1e-10));
+    auto e = (f.e + count_digits<1>(f.f) - 1) * inv_log2_10 - 1e-10;
+    exp = static_cast<int>(e);
+    if (e > exp) ++exp;  // Compute ceil.
    dragon_flags = dragon::fixup;
-  } else if (!is_constant_evaluated() && precision < 0) {
+  } else if (precision < 0) {
    // Use Dragonbox for the shortest format.
    if (specs.binary32) {
      auto dec = dragonbox::to_decimal(static_cast<float>(value));
@ -3529,25 +3320,6 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
    auto dec = dragonbox::to_decimal(static_cast<double>(value));
    write<char>(buffer_appender<char>(buf), dec.significand);
    return dec.exponent;
-  } else if (is_constant_evaluated()) {
-    // Use Grisu + Dragon4 for the given precision:
-    // https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf.
-    const int min_exp = -60;  // alpha in Grisu.
-    int cached_exp10 = 0;     // K in Grisu.
-    fp normalized = normalize(fp(converted_value));
-    const auto cached_pow = get_cached_power(
-        min_exp - (normalized.e + fp::num_significand_bits), cached_exp10);
-    normalized = normalized * cached_pow;
-    gen_digits_handler handler{buf.data(), 0, precision, -cached_exp10, fixed};
-    if (grisu_gen_digits(normalized, 1, exp, handler) != digits::error &&
-        !is_constant_evaluated()) {
-      exp += handler.exp10;
-      buf.try_resize(to_unsigned(handler.size));
-      use_dragon = false;
-    } else {
-      exp += handler.size - cached_exp10 - 1;
-      precision = handler.precision;
-    }
  } else {
    // Extract significand bits and exponent bits.
    using info = dragonbox::float_info<double>;
@ -3566,7 +3338,7 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
      significand <<= 1;
    } else {
      // Normalize subnormal inputs.
-      FMT_ASSERT(significand != 0, "zeros should not appear hear");
+      FMT_ASSERT(significand != 0, "zeros should not appear here");
      int shift = countl_zero(significand);
      FMT_ASSERT(shift >= num_bits<uint64_t>() - num_significand_bits<double>(),
                 "");
@ -3603,9 +3375,7 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
    }

    // Compute the actual number of decimal digits to print.
-    if (fixed) {
-      adjust_precision(precision, exp + digits_in_the_first_segment);
-    }
+    if (fixed) adjust_precision(precision, exp + digits_in_the_first_segment);

    // Use Dragon4 only when there might be not enough digits in the first
    // segment.
@ -4091,8 +3861,7 @@ FMT_CONSTEXPR auto get_dynamic_spec(FormatArg arg, ErrorHandler eh) -> int {
 }

 template <typename Context, typename ID>
-FMT_CONSTEXPR auto get_arg(Context& ctx, ID id) ->
-    typename Context::format_arg {
+FMT_CONSTEXPR auto get_arg(Context& ctx, ID id) -> decltype(ctx.arg(id)) {
  auto arg = ctx.arg(id);
  if (!arg) ctx.on_error("argument not found");
  return arg;
@ -4117,15 +3886,6 @@ FMT_CONSTEXPR void handle_dynamic_spec(int& value,
 }

 #if FMT_USE_USER_DEFINED_LITERALS
-template <typename Char> struct udl_formatter {
-  basic_string_view<Char> str;
-
-  template <typename... T>
-  auto operator()(T&&... args) const -> std::basic_string<Char> {
-    return vformat(str, fmt::make_format_args<buffer_context<Char>>(args...));
-  }
-};
-
 #  if FMT_USE_NONTYPE_TEMPLATE_ARGS
 template <typename T, typename Char, size_t N,
          fmt::detail_exported::fixed_string<Char, N> Str>
@ -4179,28 +3939,28 @@ FMT_API void format_error_code(buffer<char>& out, int error_code,

 FMT_API void report_error(format_func func, int error_code,
                          const char* message) noexcept;
-FMT_END_DETAIL_NAMESPACE
+}  // namespace detail

 FMT_API auto vsystem_error(int error_code, string_view format_str,
                           format_args args) -> std::system_error;

 /**
- \rst
- Constructs :class:`std::system_error` with a message formatted with
- ``fmt::format(fmt, args...)``.
+  \rst
+  Constructs :class:`std::system_error` with a message formatted with
+  ``fmt::format(fmt, args...)``.
  *error_code* is a system error code as given by ``errno``.

- **Example**::
+  **Example**::

-   // This throws std::system_error with the description
-   //   cannot open file 'madeup': No such file or directory
-   // or similar (system message may vary).
-   const char* filename = "madeup";
-   std::FILE* file = std::fopen(filename, "r");
-   if (!file)
-     throw fmt::system_error(errno, "cannot open file '{}'", filename);
- \endrst
-*/
+    // This throws std::system_error with the description
+    //   cannot open file 'madeup': No such file or directory
+    // or similar (system message may vary).
+    const char* filename = "madeup";
+    std::FILE* file = std::fopen(filename, "r");
+    if (!file)
+      throw fmt::system_error(errno, "cannot open file '{}'", filename);
+  \endrst
+ */
 template <typename... T>
 auto system_error(int error_code, format_string<T...> fmt, T&&... args)
    -> std::system_error {
@ -4292,8 +4052,8 @@ class format_int {

 template <typename T, typename Char>
 struct formatter<T, Char, enable_if_t<detail::has_format_as<T>::value>>
-    : private formatter<detail::format_as_t<T>> {
-  using base = formatter<detail::format_as_t<T>>;
+    : private formatter<detail::format_as_t<T>, Char> {
+  using base = formatter<detail::format_as_t<T>, Char>;
  using base::parse;

  template <typename FormatContext>
@ -4302,22 +4062,24 @@ struct formatter<T, Char, enable_if_t<detail::has_format_as<T>::value>>
  }
 };

-template <typename Char>
-struct formatter<void*, Char> : formatter<const void*, Char> {
-  template <typename FormatContext>
-  auto format(void* val, FormatContext& ctx) const -> decltype(ctx.out()) {
-    return formatter<const void*, Char>::format(val, ctx);
-  }
-};
+#define FMT_FORMAT_AS(Type, Base) \
+  template <typename Char>        \
+  struct formatter<Type, Char> : formatter<Base, Char> {}
+
+FMT_FORMAT_AS(signed char, int);
+FMT_FORMAT_AS(unsigned char, unsigned);
+FMT_FORMAT_AS(short, int);
+FMT_FORMAT_AS(unsigned short, unsigned);
+FMT_FORMAT_AS(long, detail::long_type);
+FMT_FORMAT_AS(unsigned long, detail::ulong_type);
+FMT_FORMAT_AS(Char*, const Char*);
+FMT_FORMAT_AS(std::basic_string<Char>, basic_string_view<Char>);
+FMT_FORMAT_AS(std::nullptr_t, const void*);
+FMT_FORMAT_AS(detail::std_string_view<Char>, basic_string_view<Char>);
+FMT_FORMAT_AS(void*, const void*);

 template <typename Char, size_t N>
-struct formatter<Char[N], Char> : formatter<basic_string_view<Char>, Char> {
-  template <typename FormatContext>
-  FMT_CONSTEXPR auto format(const Char* val, FormatContext& ctx) const
-      -> decltype(ctx.out()) {
-    return formatter<basic_string_view<Char>, Char>::format(val, ctx);
-  }
-};
+struct formatter<Char[N], Char> : formatter<basic_string_view<Char>, Char> {};

 /**
  \rst
@ -4393,7 +4155,9 @@ template <> struct formatter<bytes> {
 };

 // group_digits_view is not derived from view because it copies the argument.
-template <typename T> struct group_digits_view { T value; };
+template <typename T> struct group_digits_view {
+  T value;
+};

 /**
  \rst
@ -4523,7 +4287,8 @@ auto join(Range&& range, string_view sep)
    std::string answer = fmt::to_string(42);
  \endrst
 */
-template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value &&
+                                    !detail::has_format_as<T>::value)>
 inline auto to_string(const T& value) -> std::string {
  auto buffer = memory_buffer();
  detail::write<char>(appender(buffer), value);
@ -4548,7 +4313,15 @@ FMT_NODISCARD auto to_string(const basic_memory_buffer<Char, SIZE>& buf)
  return std::basic_string<Char>(buf.data(), size);
 }

-FMT_BEGIN_DETAIL_NAMESPACE
+template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value &&
+                                    detail::has_format_as<T>::value)>
+inline auto to_string(const T& value) -> std::string {
+  return to_string(format_as(value));
+}
+
+FMT_END_EXPORT
+
+namespace detail {

 template <typename Char>
 void vformat_to(buffer<Char>& buf, basic_string_view<Char> fmt,
@ -4619,6 +4392,8 @@ void vformat_to(buffer<Char>& buf, basic_string_view<Char> fmt,
  detail::parse_format_string<false>(fmt, format_handler(out, fmt, args, loc));
 }

+FMT_BEGIN_EXPORT
+
 #ifndef FMT_HEADER_ONLY
 extern template FMT_API void vformat_to(buffer<char>&, string_view,
                                        typename vformat_args<>::type,
@ -4631,7 +4406,7 @@ extern template FMT_API auto decimal_point_impl(locale_ref) -> char;
 extern template FMT_API auto decimal_point_impl(locale_ref) -> wchar_t;
 #endif  // FMT_HEADER_ONLY

-FMT_END_DETAIL_NAMESPACE
+}  // namespace detail

 #if FMT_USE_USER_DEFINED_LITERALS
 inline namespace literals {
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@ -1966,7 +1966,7 @@ class CYCLES_MATERIAL_PT_settings_surface(CyclesButtonsPanel, Panel):
        cmat = mat.cycles

        col = layout.column()
-        col.prop(cmat, "displacement_method", text="Displacement")
+        col.prop(mat, "displacement_method", text="Displacement")
        col.prop(cmat, "emission_sampling")
        col.prop(mat, "use_transparent_shadow")
        col.prop(cmat, "use_bump_map_correction")
--- a/intern/cycles/blender/attribute_convert.h
+++ b/intern/cycles/blender/attribute_convert.h
@ -66,10 +66,10 @@ template<> struct AttributeConverter<blender::ColorGeometry4b> {
  static constexpr auto type_desc = TypeRGBA;
  static CyclesT convert(const blender::ColorGeometry4b &value)
  {
-    return color_srgb_to_linear(make_float4(byte_to_float(value[0]),
-                                            byte_to_float(value[1]),
-                                            byte_to_float(value[2]),
-                                            byte_to_float(value[3])));
+    return color_srgb_to_linear_v4(make_float4(byte_to_float(value[0]),
+                                               byte_to_float(value[1]),
+                                               byte_to_float(value[2]),
+                                               byte_to_float(value[3])));
  }
 };
 template<> struct AttributeConverter<bool> {
--- a/intern/cycles/blender/shader.cpp
+++ b/intern/cycles/blender/shader.cpp
@ -56,12 +56,6 @@ static VolumeInterpolation get_volume_interpolation(PointerRNA &ptr)
      ptr, "volume_interpolation", VOLUME_NUM_INTERPOLATION, VOLUME_INTERPOLATION_LINEAR);
 }

-static DisplacementMethod get_displacement_method(PointerRNA &ptr)
-{
-  return (DisplacementMethod)get_enum(
-      ptr, "displacement_method", DISPLACE_NUM_METHODS, DISPLACE_BUMP);
-}
-
 static EmissionSampling get_emission_sampling(PointerRNA &ptr)
 {
  return (EmissionSampling)get_enum(
@ -76,6 +70,12 @@ static int validate_enum_value(int value, int num_values, int default_value)
  return value;
 }

+static DisplacementMethod get_displacement_method(BL::Material &b_mat)
+{
+  int value = b_mat.displacement_method();
+  return (DisplacementMethod)validate_enum_value(value, DISPLACE_NUM_METHODS, DISPLACE_BUMP);
+}
+
 template<typename NodeType> static InterpolationType get_image_interpolation(NodeType &b_node)
 {
  int value = b_node.interpolation();
@ -1548,7 +1548,7 @@ void BlenderSync::sync_materials(BL::Depsgraph &b_depsgraph, bool update_all)
      shader->set_volume_sampling_method(get_volume_sampling(cmat));
      shader->set_volume_interpolation_method(get_volume_interpolation(cmat));
      shader->set_volume_step_rate(get_float(cmat, "volume_step_rate"));
-      shader->set_displacement_method(get_displacement_method(cmat));
+      shader->set_displacement_method(get_displacement_method(b_mat));

      shader->set_graph(graph);

--- a/intern/cycles/integrator/denoiser_oidn.cpp
+++ b/intern/cycles/integrator/denoiser_oidn.cpp
@ -642,7 +642,7 @@ Device *OIDNDenoiser::ensure_denoiser_device(Progress *progress)
 {
 #ifndef WITH_OPENIMAGEDENOISE
  (void)progress;
-  path_trace_device_->set_error("Build without OpenImageDenoiser");
+  path_trace_device_->set_error("Failed to denoise, build has no OpenImageDenoise support");
  return nullptr;
 #else
  if (!openimagedenoise_supported()) {
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@ -503,14 +503,12 @@ if(WITH_CYCLES_CUDA_BINARIES)
      add_custom_command(
        OUTPUT ${cuda_file}
        COMMAND ${CCACHE_PROGRAM} ${cuda_nvcc_executable} ${_cuda_nvcc_args}
-        DEPENDS ${kernel_sources}
-        USES_TERMINAL)
+        DEPENDS ${kernel_sources})
    else()
      add_custom_command(
        OUTPUT ${cuda_file}
        COMMAND ${cuda_nvcc_executable} ${_cuda_nvcc_args}
-        DEPENDS ${kernel_sources}
-        USES_TERMINAL)
+        DEPENDS ${kernel_sources})
    endif()

    unset(_cuda_nvcc_args)
@ -647,8 +645,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
    add_custom_command(
      OUTPUT ${hip_file}
      COMMAND ${hip_command} ${hip_flags}
-      DEPENDS ${kernel_sources}
-      USES_TERMINAL)
+      DEPENDS ${kernel_sources})
    delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file}" ${CYCLES_INSTALL_PATH}/lib)
    list(APPEND hip_fatbins ${hip_file})
  endmacro()
@ -716,8 +713,7 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
  add_custom_command(
    OUTPUT  ${bitcode_file}
    COMMAND ${hiprt_compile_command} ${hiprt_compile_flags}
-    DEPENDS ${kernel_sources}
-    USES_TERMINAL)
+    DEPENDS ${kernel_sources})
  if(WIN32)
    set(hiprt_link_command ${CMAKE_COMMAND})
    set(hiprt_link_flags   -E env "HIP_PATH=${HIP_ROOT_DIR}"
@ -737,8 +733,7 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
  add_custom_command(
    OUTPUT  ${hiprt_file}
    COMMAND ${hiprt_link_command} ${hiprt_link_flags}
-    DEPENDS ${bitcode_file}
-    USES_TERMINAL)
+    DEPENDS ${bitcode_file})
  delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file}" ${CYCLES_INSTALL_PATH}/lib)
  add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file})
  cycles_set_solution_folder(cycles_kernel_hiprt)
@ -789,8 +784,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
        ${cuda_flags}
        ${input}
      WORKING_DIRECTORY
-        "${CMAKE_CURRENT_SOURCE_DIR}"
-      USES_TERMINAL)
+        "${CMAKE_CURRENT_SOURCE_DIR}")

    list(APPEND optix_ptx ${output})

@ -1083,8 +1077,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
              "$<$<CONFIG:Debug>:${sycl_compiler_flags_Debug}>"
              "$<$<CONFIG:MinSizeRel>:${sycl_compiler_flags_Release}>"
      COMMAND_EXPAND_LISTS
-      DEPENDS ${cycles_oneapi_kernel_sources}
-      USES_TERMINAL)
+      DEPENDS ${cycles_oneapi_kernel_sources})
  else()
    if(NOT IGC_INSTALL_DIR)
      get_filename_component(IGC_INSTALL_DIR "${sycl_compiler_root}/../lib/igc" ABSOLUTE)
@ -1109,8 +1102,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
              "$<$<CONFIG:Debug>:${sycl_compiler_flags_Debug_str}>"
              "$<$<CONFIG:MinSizeRel>:${sycl_compiler_flags_Release_str}>"
      COMMAND_EXPAND_LISTS
-      DEPENDS ${cycles_oneapi_kernel_sources}
-      USES_TERMINAL)
+      DEPENDS ${cycles_oneapi_kernel_sources})
  endif()

  if(NOT WITH_BLENDER)
--- a/intern/cycles/kernel/closure/bsdf_util.h
+++ b/intern/cycles/kernel/closure/bsdf_util.h
@ -117,7 +117,7 @@ ccl_device float3 ensure_valid_specular_reflection(float3 Ng, float3 I, float3 N
  const float3 R = 2 * dot(N, I) * N - I;

  const float Iz = dot(I, Ng);
-  kernel_assert(Iz > 0);
+  kernel_assert(Iz >= 0);

  /* Reflection rays may always be at least as shallow as the incoming ray. */
  const float threshold = min(0.9f * Iz, 0.01f);
--- a/intern/cycles/util/color.h
+++ b/intern/cycles/util/color.h
@ -253,7 +253,7 @@ ccl_device float3 xyY_to_xyz(float x, float y, float Y)
 * exp = exponent, encoded as uint32_t
 * e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t
 */
-template<unsigned exp, unsigned e2coeff> ccl_device_inline float4 fastpow(const float4 &arg)
+template<unsigned exp, unsigned e2coeff> ccl_device_inline float4 fastpow_sse2(const float4 &arg)
 {
  float4 ret = arg * cast(make_int4(e2coeff));
  ret = make_float4(cast(ret));
@ -263,7 +263,7 @@ template<unsigned exp, unsigned e2coeff> ccl_device_inline float4 fastpow(const
 }

 /* Improve x ^ 1.0f/5.0f solution with Newton-Raphson method */
-ccl_device_inline float4 improve_5throot_solution(const float4 &old_result, const float4 &x)
+ccl_device_inline float4 improve_5throot_solution_sse2(const float4 &old_result, const float4 &x)
 {
  float4 approx2 = old_result * old_result;
  float4 approx4 = approx2 * approx2;
@ -273,7 +273,7 @@ ccl_device_inline float4 improve_5throot_solution(const float4 &old_result, cons
 }

 /* Calculate powf(x, 2.4). Working domain: 1e-10 < x < 1e+10 */
-ccl_device_inline float4 fastpow24(const float4 &arg)
+ccl_device_inline float4 fastpow24_sse2(const float4 &arg)
 {
  /* max, avg and |avg| errors were calculated in gcc without FMA instructions
   * The final precision should be better than powf in glibc */
@ -281,27 +281,27 @@ ccl_device_inline float4 fastpow24(const float4 &arg)
  /* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize avg error */
  /* 0x3F4CCCCD = 4/5 */
  /* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */
-  float4 x = fastpow<0x3F4CCCCD, 0x4F55A7FB>(
+  float4 x = fastpow_sse2<0x3F4CCCCD, 0x4F55A7FB>(
      arg);  // error max = 0.17  avg = 0.0018    |avg| = 0.05
  float4 arg2 = arg * arg;
  float4 arg4 = arg2 * arg2;

  /* error max = 0.018     avg = 0.0031    |avg| = 0.0031 */
-  x = improve_5throot_solution(x, arg4);
+  x = improve_5throot_solution_sse2(x, arg4);
  /* error max = 0.00021   avg = 1.6e-05   |avg| = 1.6e-05 */
-  x = improve_5throot_solution(x, arg4);
+  x = improve_5throot_solution_sse2(x, arg4);
  /* error max = 6.1e-07   avg = 5.2e-08   |avg| = 1.1e-07 */
-  x = improve_5throot_solution(x, arg4);
+  x = improve_5throot_solution_sse2(x, arg4);

  return x * (x * x);
 }

-ccl_device float4 color_srgb_to_linear(const float4 &c)
+ccl_device float4 color_srgb_to_linear_sse2(const float4 &c)
 {
  int4 cmp = c < make_float4(0.04045f);
  float4 lt = max(c * make_float4(1.0f / 12.92f), make_float4(0.0f));
  float4 gtebase = (c + make_float4(0.055f)) * make_float4(1.0f / 1.055f); /* fma */
-  float4 gte = fastpow24(gtebase);
+  float4 gte = fastpow24_sse2(gtebase);
  return select(cmp, lt, gte);
 }
 #endif /* __KERNEL_SSE2__ */
@ -328,7 +328,7 @@ ccl_device float4 color_srgb_to_linear_v4(float4 c)
 {
 #ifdef __KERNEL_SSE2__
  float4 r = c;
-  r = color_srgb_to_linear(r);
+  r = color_srgb_to_linear_sse2(r);
  r.w = c.w;
  return r;
 #else
--- a/intern/cycles/util/thread.cpp
+++ b/intern/cycles/util/thread.cpp
@ -13,8 +13,8 @@ CCL_NAMESPACE_BEGIN

 thread::thread(function<void()> run_cb) : run_cb_(run_cb), joined_(false)
 {
-#ifdef __APPLE__
-  /* Set the stack size to 2MB to match Linux. The default 512KB on macOS is
+#if defined(__APPLE__) || defined(__linux__) && !defined(__GLIBC__)
+  /* Set the stack size to 2MB to match glibc. The default 512KB on macOS is
   * too small for Embree, and consistent stack size also makes things more
   * predictable in general. */
  pthread_attr_t attribute;
@ -43,7 +43,7 @@ void *thread::run(void *arg)
 bool thread::join()
 {
  joined_ = true;
-#ifdef __APPLE__
+#if defined(__APPLE__) || defined(__linux__) && !defined(__GLIBC__)
  return pthread_join(pthread_id, NULL) == 0;
 #else
  try {
--- a/intern/cycles/util/thread.h
+++ b/intern/cycles/util/thread.h
@ -43,7 +43,7 @@ class thread {

 protected:
  function<void()> run_cb_;
-#ifdef __APPLE__
+#if defined(__APPLE__) || defined(__linux__) && !defined(__GLIBC__)
  pthread_t pthread_id;
 #else
  std::thread std_thread;
--- a/release/datafiles/userdef/userdef_default.c
+++ b/release/datafiles/userdef/userdef_default.c
@ -165,6 +165,7 @@ const UserDef U_default = {
    .glalphaclip = 0.004,
    .autokey_mode = (AUTOKEY_MODE_NORMAL & ~AUTOKEY_ON),
    .autokey_flag = AUTOKEY_FLAG_XYZ2RGB,
+    .key_insert_channels = USER_ANIM_KEY_CHANNEL_LOCATION,
    .animation_flag = USER_ANIM_HIGH_QUALITY_DRAWING,
    .text_render = 0,
    .navigation_mode = VIEW_NAVIGATION_WALK,
--- a/scripts/presets/keyconfig/keymap_data/blender_default.py
+++ b/scripts/presets/keyconfig/keymap_data/blender_default.py
@ -4684,7 +4684,7 @@ def km_object_mode(params):
        ("object.join", {"type": 'J', "value": 'PRESS', "ctrl": True}, None),
        ("wm.context_toggle", {"type": 'PERIOD', "value": 'PRESS', "ctrl": True},
         {"properties": [("data_path", 'tool_settings.use_transform_data_origin')]}),
-        ("anim.keyframe_insert_menu", {"type": 'I', "value": 'PRESS'}, None),
+        ("anim.keyframe_insert", {"type": 'I', "value": 'PRESS'}, None),
        ("anim.keyframe_delete_v3d", {"type": 'I', "value": 'PRESS', "alt": True}, None),
        ("anim.keying_set_active_set", {"type": 'I', "value": 'PRESS', "shift": True, "ctrl": True, "alt": True}, None),
        ("collection.create", {"type": 'G', "value": 'PRESS', "ctrl": True}, None),
@ -4823,7 +4823,7 @@ def km_pose(params):
        op_menu("VIEW3D_MT_bone_collections", {"type": 'M', "value": 'PRESS', "shift": True}),
        ("armature.move_to_collection", {"type": 'M', "value": 'PRESS'}, None),
        ("transform.bbone_resize", {"type": 'S', "value": 'PRESS', "shift": True, "ctrl": True, "alt": True}, None),
-        ("anim.keyframe_insert_menu", {"type": 'I', "value": 'PRESS'}, None),
+        ("anim.keyframe_insert", {"type": 'I', "value": 'PRESS'}, None),
        ("anim.keyframe_delete_v3d", {"type": 'I', "value": 'PRESS', "alt": True}, None),
        ("anim.keying_set_active_set", {"type": 'I', "value": 'PRESS', "shift": True, "ctrl": True, "alt": True}, None),
        ("pose.push", {"type": 'E', "value": 'PRESS', "ctrl": True}, None),
--- a/scripts/presets/keyconfig/keymap_data/industry_compatible_data.py
+++ b/scripts/presets/keyconfig/keymap_data/industry_compatible_data.py
@ -3225,7 +3225,7 @@ def km_pose(params):
        ("pose.select_hierarchy", {"type": 'DOWN_ARROW', "value": 'PRESS', "shift": True, "repeat": True},
         {"properties": [("direction", 'CHILD'), ("extend", True)]}),
        ("pose.select_linked", {"type": 'L', "value": 'PRESS', "ctrl": True}, None),
-        ("anim.keyframe_insert_menu", {"type": 'S', "value": 'PRESS', "shift": True}, None),
+        ("anim.keyframe_insert", {"type": 'S', "value": 'PRESS', "shift": True}, None),
        ("anim.keyframe_insert_by_name", {"type": 'S', "value": 'PRESS'},
         {"properties": [("type", 'LocRotScale')]}),
        ("anim.keyframe_insert_by_name", {"type": 'W', "value": 'PRESS', "shift": True},
@ -3297,7 +3297,7 @@ def km_object_mode(params):
         {"properties": [("use_global", True), ("confirm", False)]}),
        ("object.duplicate_move", {"type": 'D', "value": 'PRESS', "ctrl": True}, None),
        # Keyframing
-        ("anim.keyframe_insert_menu", {"type": 'S', "value": 'PRESS', "shift": True}, None),
+        ("anim.keyframe_insert", {"type": 'S', "value": 'PRESS', "shift": True}, None),
        ("anim.keyframe_insert_by_name", {"type": 'S', "value": 'PRESS'},
         {"properties": [("type", 'LocRotScale')]}),
        ("anim.keyframe_insert_by_name", {"type": 'W', "value": 'PRESS', "shift": True},
--- a/scripts/startup/bl_operators/geometry_nodes.py
+++ b/scripts/startup/bl_operators/geometry_nodes.py
@ -4,6 +4,7 @@

 import bpy
 from bpy.types import Operator
+from bpy.props import IntProperty

 from bpy.app.translations import pgettext_data as data_

@ -459,6 +460,55 @@ class RepeatZoneItemMoveOperator(RepeatZoneOperator, ZoneMoveItemOperator, Opera
    bl_options = {'REGISTER', 'UNDO'}


+def _editable_tree_with_active_node_type(context, node_type):
+    space = context.space_data
+    # Needs active node editor and a tree.
+    if not space or space.type != 'NODE_EDITOR' or not space.edit_tree or space.edit_tree.library:
+        return False
+    node = context.active_node
+    if node is None or node.bl_idname != node_type:
+        return False
+    return True
+
+
+class IndexSwitchItemAddOperator(Operator):
+    """Add an item to the index switch"""
+    bl_idname = "node.index_switch_item_add"
+    bl_label = "Add Item"
+    bl_options = {'REGISTER', 'UNDO'}
+
+    @classmethod
+    def poll(cls, context):
+        return _editable_tree_with_active_node_type(context, 'GeometryNodeIndexSwitch')
+
+    def execute(self, context):
+        node = context.active_node
+        node.index_switch_items.new()
+        return {'FINISHED'}
+
+
+class IndexSwitchItemRemoveOperator(Operator):
+    """Remove an item from the index switch"""
+    bl_idname = "node.index_switch_item_remove"
+    bl_label = "Remove Item"
+    bl_options = {'REGISTER', 'UNDO'}
+
+    index: IntProperty(
+        name="Index",
+        description="Index of item to remove",
+    )
+
+    @classmethod
+    def poll(cls, context):
+        return _editable_tree_with_active_node_type(context, 'GeometryNodeIndexSwitch')
+
+    def execute(self, context):
+        node = context.active_node
+        items = node.index_switch_items
+        items.remove(items[self.index])
+        return {'FINISHED'}
+
+
 classes = (
    NewGeometryNodesModifier,
    NewGeometryNodeTreeAssign,
@ -470,4 +520,6 @@ classes = (
    RepeatZoneItemAddOperator,
    RepeatZoneItemRemoveOperator,
    RepeatZoneItemMoveOperator,
+    IndexSwitchItemAddOperator,
+    IndexSwitchItemRemoveOperator,
 )
--- a/scripts/startup/bl_ui/node_add_menu_geometry.py
+++ b/scripts/startup/bl_ui/node_add_menu_geometry.py
@ -546,6 +546,7 @@ class NODE_MT_category_GEO_UTILITIES(Menu):
        node_add_menu.add_node_type(layout, "FunctionNodeRandomValue")
        node_add_menu.add_repeat_zone(layout, label="Repeat Zone")
        node_add_menu.add_node_type(layout, "GeometryNodeSwitch")
+        node_add_menu.add_node_type(layout, "GeometryNodeIndexSwitch")
        node_add_menu.draw_assets_for_catalog(layout, self.bl_label)


--- a/scripts/startup/bl_ui/properties_data_lightprobe.py
+++ b/scripts/startup/bl_ui/properties_data_lightprobe.py
@ -238,7 +238,7 @@ class DATA_PT_lightprobe_parallax(DataButtonsPanel, Panel):
 class DATA_PT_lightprobe_display(DataButtonsPanel, Panel):
    bl_label = "Viewport Display"
    bl_options = {'DEFAULT_CLOSED'}
-    COMPAT_ENGINES = {'BLENDER_EEVEE', 'BLENDER_RENDER'}
+    COMPAT_ENGINES = {'BLENDER_EEVEE', 'BLENDER_EEVEE_NEXT', 'BLENDER_RENDER'}

    def draw(self, context):
        layout = self.layout
--- a/scripts/startup/bl_ui/properties_data_mesh.py
+++ b/scripts/startup/bl_ui/properties_data_mesh.py
@ -426,9 +426,7 @@ class DATA_PT_remesh(MeshButtonsPanel, Panel):

            col = layout.column(heading="Preserve")
            col.prop(mesh, "use_remesh_preserve_volume", text="Volume")
-            col.prop(mesh, "use_remesh_preserve_paint_mask", text="Paint Mask")
-            col.prop(mesh, "use_remesh_preserve_sculpt_face_sets", text="Face Sets")
-            col.prop(mesh, "use_remesh_preserve_vertex_colors", text="Color Attributes")
+            col.prop(mesh, "use_remesh_preserve_attributes", text="Attributes")

            col.operator("object.voxel_remesh", text="Voxel Remesh")
        else:
--- a/scripts/startup/bl_ui/properties_material.py
+++ b/scripts/startup/bl_ui/properties_material.py
@ -299,6 +299,9 @@ class EEVEE_NEXT_MATERIAL_PT_settings_surface(MaterialButtonsPanel, Panel):
        col.prop(mat, "use_backface_culling", text="Camera")
        col.prop(mat, "use_backface_culling_shadow", text="Shadow")

+        layout.prop(mat, "displacement_method", text="Displacement")
+        if mat.displacement_method == 'DISPLACEMENT':
+            layout.label(text="Unsupported displacement method", icon='ERROR')
        layout.prop(mat, "max_vertex_displacement", text="Max Displacement")

        layout.prop(mat, "use_transparent_shadow")
--- a/scripts/startup/bl_ui/properties_object.py
+++ b/scripts/startup/bl_ui/properties_object.py
@ -399,8 +399,8 @@ class OBJECT_PT_visibility(ObjectButtonsPanel, Panel):
                layout.separator()
                col = layout.column(heading="Light Probes")
                col.prop(ob, "hide_probe_volume", text="Volume", toggle=False, invert_checkbox=True)
-                col.prop(ob, "hide_probe_cubemap", text="Cubemap", toggle=False, invert_checkbox=True)
-                col.prop(ob, "hide_probe_planar", text="Planar", toggle=False, invert_checkbox=True)
+                col.prop(ob, "hide_probe_sphere", text="Sphere", toggle=False, invert_checkbox=True)
+                col.prop(ob, "hide_probe_plane", text="Plane", toggle=False, invert_checkbox=True)

        if ob.type == 'GPENCIL':
            col = layout.column(heading="Grease Pencil")
--- a/scripts/startup/bl_ui/properties_render.py
+++ b/scripts/startup/bl_ui/properties_render.py
@ -615,6 +615,7 @@ class EeveeRaytracingScreenOption(RenderButtonsPanel, Panel):

        layout.prop(props, "screen_trace_quality", text="Precision")
        layout.prop(props, "screen_trace_thickness", text="Thickness")
+        layout.prop(props, "screen_trace_max_roughness", text="Max Roughness")


 class EeveeRaytracingDenoisePanel(RenderButtonsPanel, Panel):
--- a/scripts/startup/bl_ui/space_node.py
+++ b/scripts/startup/bl_ui/space_node.py
@ -1145,6 +1145,34 @@ class NODE_PT_repeat_zone_items(Panel):
        layout.prop(output_node, "inspection_index")


+class NODE_PT_index_switch_node_items(Panel):
+    bl_space_type = 'NODE_EDITOR'
+    bl_region_type = 'UI'
+    bl_category = "Node"
+    bl_label = "Index Switch"
+
+    @classmethod
+    def poll(cls, context):
+        snode = context.space_data
+        if snode is None:
+            return False
+        node = context.active_node
+        print()
+        if node is None or node.bl_idname != 'GeometryNodeIndexSwitch':
+            return False
+        return True
+
+    def draw(self, context):
+        layout = self.layout
+        node = context.active_node
+        layout.operator("node.index_switch_item_add", icon='ADD', text="Add Item")
+        col = layout.column()
+        for i, item in enumerate(node.index_switch_items):
+            row = col.row()
+            row.label(text=node.inputs[i + 1].name)
+            row.operator("node.index_switch_item_remove", icon='REMOVE', text="").index = i
+
+
 # Grease Pencil properties
 class NODE_PT_annotation(AnnotationDataPanel, Panel):
    bl_space_type = 'NODE_EDITOR'
@ -1213,6 +1241,7 @@ classes = (
    NODE_UL_simulation_zone_items,
    NODE_PT_simulation_zone_items,
    NODE_UL_repeat_zone_items,
+    NODE_PT_index_switch_node_items,
    NODE_PT_repeat_zone_items,
    NODE_PT_active_node_properties,

--- a/scripts/startup/bl_ui/space_userpref.py
+++ b/scripts/startup/bl_ui/space_userpref.py
@ -576,6 +576,8 @@ class USERPREF_PT_animation_keyframes(AnimationPanel, CenterAlignMixIn, Panel):
        prefs = context.preferences
        edit = prefs.edit

+        layout.prop(edit, "key_insert_channels", expand=True)
+
        col = layout.column()
        col.prop(edit, "use_visual_keying")
        col.prop(edit, "use_keyframe_insert_needed", text="Only Insert Needed")
--- a/scripts/startup/bl_ui/space_view3d.py
+++ b/scripts/startup/bl_ui/space_view3d.py
@ -2755,7 +2755,8 @@ class VIEW3D_MT_object_animation(Menu):
    def draw(self, _context):
        layout = self.layout

-        layout.operator("anim.keyframe_insert_menu", text="Insert Keyframe...")
+        layout.operator("anim.keyframe_insert", text="Insert Keyframe")
+        layout.operator("anim.keyframe_insert_menu", text="Insert Keyframe with Keying Set")
        layout.operator("anim.keyframe_delete_v3d", text="Delete Keyframes...")
        layout.operator("anim.keyframe_clear_v3d", text="Clear Keyframes...")
        layout.operator("anim.keying_set_active_set", text="Change Keying Set...")
@ -3019,7 +3020,8 @@ class VIEW3D_MT_object_context_menu(Menu):

        layout.separator()

-        layout.operator("anim.keyframe_insert_menu", text="Insert Keyframe...")
+        layout.operator("anim.keyframe_insert", text="Insert Keyframe")
+        layout.operator("anim.keyframe_insert_menu", text="Insert Keyframe with Keying Set")

        layout.separator()

@ -4177,7 +4179,8 @@ class VIEW3D_MT_pose_context_menu(Menu):

        layout.operator_context = 'INVOKE_REGION_WIN'

-        layout.operator("anim.keyframe_insert_menu", text="Insert Keyframe...")
+        layout.operator("anim.keyframe_insert", text="Insert Keyframe")
+        layout.operator("anim.keyframe_insert_menu", text="Insert Keyframe with Keying Set")

        layout.separator()

@ -6729,7 +6732,7 @@ class VIEW3D_PT_shading_render_pass(Panel):
    bl_region_type = 'HEADER'
    bl_label = "Render Pass"
    bl_parent_id = "VIEW3D_PT_shading"
-    COMPAT_ENGINES = {'BLENDER_EEVEE'}
+    COMPAT_ENGINES = {'BLENDER_EEVEE', 'BLENDER_EEVEE_NEXT'}

    @classmethod
    def poll(cls, context):
--- a/scripts/startup/bl_ui/space_view3d_toolbar.py
+++ b/scripts/startup/bl_ui/space_view3d_toolbar.py
@ -1045,9 +1045,7 @@ class VIEW3D_PT_sculpt_voxel_remesh(Panel, View3DPaintPanel):

        col = layout.column(heading="Preserve", align=True)
        col.prop(mesh, "use_remesh_preserve_volume", text="Volume")
-        col.prop(mesh, "use_remesh_preserve_paint_mask", text="Paint Mask")
-        col.prop(mesh, "use_remesh_preserve_sculpt_face_sets", text="Face Sets")
-        col.prop(mesh, "use_remesh_preserve_vertex_colors", text="Color Attributes")
+        col.prop(mesh, "use_remesh_preserve_attributes", text="Attributes")

        layout.operator("object.voxel_remesh", text="Remesh")

--- a/source/blender/animrig/ANIM_keyframing.hh
+++ b/source/blender/animrig/ANIM_keyframing.hh
@ -10,6 +10,9 @@

 #pragma once

+#include <string>
+
+#include "BLI_vector.hh"
 #include "DNA_anim_types.h"
 #include "ED_transform.hh"
 #include "RNA_types.hh"
@ -129,7 +132,7 @@ bool is_autokey_on(const Scene *scene);
 bool is_autokey_mode(const Scene *scene, eAutokey_Mode mode);

 /** Check if a flag is set for auto-key-framing (per scene takes precedence). */
-bool is_autokey_flag(const Scene *scene, eAutokey_Flag flag);
+bool is_autokey_flag(const Scene *scene, eKeyInsert_Flag flag);

 /**
 * Auto-keyframing feature - checks for whether anything should be done for the current frame.
@ -175,4 +178,20 @@ bool autokeyframe_property(bContext *C,

 /** \} */

+/**
+ * Insert keys for the given rna_path in the given action. The length of the values Span is
+ * expected to be the size of the property array.
+ * \param frame is expected to be in the local time of the action, meaning it has to be NLA mapped
+ * already.
+ * \returns The number of keys inserted.
+ */
+int insert_key_action(Main *bmain,
+                      bAction *action,
+                      PointerRNA *ptr,
+                      const std::string &rna_path,
+                      float frame,
+                      const Span<float> values,
+                      eInsertKeyFlags insert_key_flag,
+                      eBezTriple_KeyframeType key_type);
+
 }  // namespace blender::animrig
--- a/source/blender/animrig/intern/keyframing.cc
+++ b/source/blender/animrig/intern/keyframing.cc
@ -8,6 +8,7 @@

 #include <cfloat>
 #include <cmath>
+#include <string>

 #include "ANIM_action.hh"
 #include "ANIM_animdata.hh"
@ -38,6 +39,7 @@
 #include "RNA_access.hh"
 #include "RNA_define.hh"
 #include "RNA_path.hh"
+#include "RNA_prototypes.h"
 #include "RNA_types.hh"

 #include "WM_api.hh"
@ -965,4 +967,40 @@ int clear_keyframe(Main *bmain,
  return key_count;
 }

+int insert_key_action(Main *bmain,
+                      bAction *action,
+                      PointerRNA *ptr,
+                      const std::string &rna_path,
+                      const float frame,
+                      const Span<float> values,
+                      eInsertKeyFlags insert_key_flag,
+                      eBezTriple_KeyframeType key_type)
+{
+  BLI_assert(bmain != nullptr);
+  BLI_assert(action != nullptr);
+
+  std::string group;
+  if (ptr->type == &RNA_PoseBone) {
+    bPoseChannel *pose_channel = static_cast<bPoseChannel *>(ptr->data);
+    group = pose_channel->name;
+  }
+  else {
+    group = "Object Transforms";
+  }
+
+  int property_array_index = 0;
+  int inserted_keys = 0;
+  for (float value : values) {
+    FCurve *fcurve = action_fcurve_ensure(
+        bmain, action, group.c_str(), ptr, rna_path.c_str(), property_array_index);
+    const bool inserted_key = insert_keyframe_value(
+        fcurve, frame, value, key_type, insert_key_flag);
+    if (inserted_key) {
+      inserted_keys++;
+    }
+    property_array_index++;
+  }
+  return inserted_keys;
+}
+
 }  // namespace blender::animrig
--- a/source/blender/animrig/intern/keyframing_auto.cc
+++ b/source/blender/animrig/intern/keyframing_auto.cc
@ -48,7 +48,7 @@ bool is_autokey_mode(const Scene *scene, const eAutokey_Mode mode)
  return U.autokey_mode == mode;
 }

-bool is_autokey_flag(const Scene *scene, const eAutokey_Flag flag)
+bool is_autokey_flag(const Scene *scene, const eKeyInsert_Flag flag)
 {
  if (scene) {
    return (scene->toolsettings->autokey_flag & flag) || (U.autokey_flag & flag);
@ -114,7 +114,7 @@ void autokeyframe_object(
        C, &sources, active_ks, MODIFYKEY_MODE_INSERT, anim_eval_context.eval_time);
  }

-  else if (is_autokey_flag(scene, AUTOKEY_FLAG_INSERTAVAIL)) {
+  else if (is_autokey_flag(scene, AUTOKEY_FLAG_INSERTAVAILABLE)) {
    /* Only key on available channels. */
    AnimData *adt = ob->adt;
    ToolSettings *ts = scene->toolsettings;
@ -280,7 +280,7 @@ void autokeyframe_pose(bContext *C, Scene *scene, Object *ob, int tmode, short t
          C, &sources, active_ks, MODIFYKEY_MODE_INSERT, anim_eval_context.eval_time);
    }
    /* only insert into available channels? */
-    else if (blender::animrig::is_autokey_flag(scene, AUTOKEY_FLAG_INSERTAVAIL)) {
+    else if (blender::animrig::is_autokey_flag(scene, AUTOKEY_FLAG_INSERTAVAILABLE)) {
      if (act) {
        LISTBASE_FOREACH (FCurve *, fcu, &act->curves) {
          /* only insert keyframes for this F-Curve if it affects the current bone */
--- a/source/blender/blenkernel/BKE_attribute.hh
+++ b/source/blender/blenkernel/BKE_attribute.hh
@ -891,6 +891,14 @@ void gather_attributes_group_to_group(AttributeAccessor src_attributes,
                                      const IndexMask &selection,
                                      MutableAttributeAccessor dst_attributes);

+void gather_attributes_to_groups(AttributeAccessor src_attributes,
+                                 eAttrDomain domain,
+                                 const AnonymousAttributePropagationInfo &propagation_info,
+                                 const Set<std::string> &skip,
+                                 OffsetIndices<int> dst_offsets,
+                                 const IndexMask &src_selection,
+                                 MutableAttributeAccessor dst_attributes);
+
 void copy_attributes(const AttributeAccessor src_attributes,
                     const eAttrDomain domain,
                     const AnonymousAttributePropagationInfo &propagation_info,
--- a/source/blender/blenkernel/BKE_attribute_math.hh
+++ b/source/blender/blenkernel/BKE_attribute_math.hh
@ -14,6 +14,7 @@
 #include "BLI_math_quaternion.hh"
 #include "BLI_math_vector.h"
 #include "BLI_math_vector.hh"
+#include "BLI_offset_indices.hh"

 #include "BKE_customdata.hh"

@ -634,6 +635,15 @@ template<typename T> using DefaultMixer = typename DefaultMixerStruct<T>::type;

 void gather(GSpan src, Span<int> map, GMutableSpan dst);
 void gather(const GVArray &src, Span<int> map, GMutableSpan dst);
+void gather_group_to_group(OffsetIndices<int> src_offsets,
+                           OffsetIndices<int> dst_offsets,
+                           const IndexMask &selection,
+                           GSpan src,
+                           GMutableSpan dst);
+void gather_to_groups(OffsetIndices<int> dst_offsets,
+                      const IndexMask &src_selection,
+                      GSpan src,
+                      GMutableSpan dst);

 /** \} */

--- a/source/blender/blenkernel/BKE_compute_contexts.hh
+++ b/source/blender/blenkernel/BKE_compute_contexts.hh
@ -13,6 +13,7 @@
 #include "BLI_compute_context.hh"

 struct bNode;
+struct bNodeTree;

 namespace blender::bke {

@ -39,27 +40,41 @@ class ModifierComputeContext : public ComputeContext {
  void print_current_in_line(std::ostream &stream) const override;
 };

-class NodeGroupComputeContext : public ComputeContext {
+class GroupNodeComputeContext : public ComputeContext {
 private:
  static constexpr const char *s_static_type = "NODE_GROUP";

  int32_t node_id_;
-
-#ifdef DEBUG
-  std::string debug_node_name_;
-#endif
+  /**
+   * The caller node tree and group node are not always necessary or even available, but storing
+   * them here simplifies "walking up" the compute context to the parent node groups.
+   */
+  const bNodeTree *caller_tree_ = nullptr;
+  const bNode *caller_group_node_ = nullptr;

 public:
-  NodeGroupComputeContext(const ComputeContext *parent,
+  GroupNodeComputeContext(const ComputeContext *parent,
                          int32_t node_id,
                          const std::optional<ComputeContextHash> &cached_hash = {});
-  NodeGroupComputeContext(const ComputeContext *parent, const bNode &node);
+  GroupNodeComputeContext(const ComputeContext *parent,
+                          const bNode &node,
+                          const bNodeTree &caller_tree);

  int32_t node_id() const
  {
    return node_id_;
  }

+  const bNode *caller_group_node() const
+  {
+    return caller_group_node_;
+  }
+
+  const bNodeTree *caller_tree() const
+  {
+    return caller_tree_;
+  }
+
 private:
  void print_current_in_line(std::ostream &stream) const override;
 };
--- a/source/blender/blenkernel/BKE_mesh_remesh_voxel.hh
+++ b/source/blender/blenkernel/BKE_mesh_remesh_voxel.hh
@ -21,7 +21,6 @@ Mesh *BKE_mesh_remesh_quadriflow(const Mesh *mesh,
                                 void (*update_cb)(void *, float progress, int *cancel),
                                 void *update_cb_data);

-/* Data reprojection functions */
-void BKE_mesh_remesh_reproject_paint_mask(Mesh *target, const Mesh *source);
-void BKE_remesh_reproject_vertex_paint(Mesh *target, const Mesh *source);
-void BKE_remesh_reproject_sculpt_face_sets(Mesh *target, const Mesh *source);
+namespace blender::bke {
+void mesh_remesh_reproject_attributes(const Mesh &src, Mesh &dst);
+}
--- a/source/blender/blenkernel/BKE_node.h
+++ b/source/blender/blenkernel/BKE_node.h
@ -1316,6 +1316,7 @@ void BKE_nodetree_remove_layer_n(struct bNodeTree *ntree, struct Scene *scene, i
 #define GEO_NODE_INPUT_EDGE_SMOOTH 2115
 #define GEO_NODE_SPLIT_TO_INSTANCES 2116
 #define GEO_NODE_INPUT_NAMED_LAYER_SELECTION 2117
+#define GEO_NODE_INDEX_SWITCH 2118

 /** \} */

--- a/source/blender/blenkernel/intern/attribute_access.cc
+++ b/source/blender/blenkernel/intern/attribute_access.cc
@ -905,7 +905,7 @@ static bool indices_are_range(const Span<int> indices, const IndexRange range)
        }
        return true;
      },
-      [](const bool a, const bool b) { return a && b; });
+      std::logical_and());
 }

 void gather_attributes(const AttributeAccessor src_attributes,
@ -942,30 +942,6 @@ void gather_attributes(const AttributeAccessor src_attributes,
  }
 }

-template<typename T>
-static void gather_group_to_group(const OffsetIndices<int> src_offsets,
-                                  const OffsetIndices<int> dst_offsets,
-                                  const IndexMask &selection,
-                                  const Span<T> src,
-                                  MutableSpan<T> dst)
-{
-  selection.foreach_index(GrainSize(512), [&](const int64_t src_i, const int64_t dst_i) {
-    dst.slice(dst_offsets[dst_i]).copy_from(src.slice(src_offsets[src_i]));
-  });
-}
-
-static void gather_group_to_group(const OffsetIndices<int> src_offsets,
-                                  const OffsetIndices<int> dst_offsets,
-                                  const IndexMask &selection,
-                                  const GSpan src,
-                                  GMutableSpan dst)
-{
-  attribute_math::convert_to_static_type(src.type(), [&](auto dummy) {
-    using T = decltype(dummy);
-    gather_group_to_group(src_offsets, dst_offsets, selection, src.typed<T>(), dst.typed<T>());
-  });
-}
-
 void gather_attributes_group_to_group(const AttributeAccessor src_attributes,
                                      const eAttrDomain domain,
                                      const AnonymousAttributePropagationInfo &propagation_info,
@ -991,7 +967,37 @@ void gather_attributes_group_to_group(const AttributeAccessor src_attributes,
    if (!dst) {
      return true;
    }
-    gather_group_to_group(src_offsets, dst_offsets, selection, src, dst.span);
+    attribute_math::gather_group_to_group(src_offsets, dst_offsets, selection, src, dst.span);
+    dst.finish();
+    return true;
+  });
+}
+
+void gather_attributes_to_groups(const AttributeAccessor src_attributes,
+                                 const eAttrDomain domain,
+                                 const AnonymousAttributePropagationInfo &propagation_info,
+                                 const Set<std::string> &skip,
+                                 const OffsetIndices<int> dst_offsets,
+                                 const IndexMask &src_selection,
+                                 MutableAttributeAccessor dst_attributes)
+{
+  src_attributes.for_all([&](const AttributeIDRef &id, const AttributeMetaData meta_data) {
+    if (meta_data.domain != domain) {
+      return true;
+    }
+    if (id.is_anonymous() && !propagation_info.propagate(id.anonymous_id())) {
+      return true;
+    }
+    if (skip.contains(id.name())) {
+      return true;
+    }
+    const GVArraySpan src = *src_attributes.lookup(id, domain);
+    bke::GSpanAttributeWriter dst = dst_attributes.lookup_or_add_for_write_only_span(
+        id, domain, meta_data.data_type);
+    if (!dst) {
+      return true;
+    }
+    attribute_math::gather_to_groups(dst_offsets, src_selection, src, dst.span);
    dst.finish();
    return true;
  });
--- a/source/blender/blenkernel/intern/attribute_math.cc
+++ b/source/blender/blenkernel/intern/attribute_math.cc
@ -176,4 +176,28 @@ void gather(const GVArray &src, const Span<int> map, GMutableSpan dst)
  });
 }

+void gather_group_to_group(const OffsetIndices<int> src_offsets,
+                           const OffsetIndices<int> dst_offsets,
+                           const IndexMask &selection,
+                           const GSpan src,
+                           GMutableSpan dst)
+{
+  attribute_math::convert_to_static_type(src.type(), [&](auto dummy) {
+    using T = decltype(dummy);
+    array_utils::gather_group_to_group(
+        src_offsets, dst_offsets, selection, src.typed<T>(), dst.typed<T>());
+  });
+}
+
+void gather_to_groups(const OffsetIndices<int> dst_offsets,
+                      const IndexMask &src_selection,
+                      const GSpan src,
+                      GMutableSpan dst)
+{
+  bke::attribute_math::convert_to_static_type(src.type(), [&](auto dummy) {
+    using T = decltype(dummy);
+    array_utils::gather_to_groups(dst_offsets, src_selection, src.typed<T>(), dst.typed<T>());
+  });
+}
+
 }  // namespace blender::bke::attribute_math
--- a/source/blender/blenkernel/intern/compute_contexts.cc
+++ b/source/blender/blenkernel/intern/compute_contexts.cc
@ -23,7 +23,7 @@ void ModifierComputeContext::print_current_in_line(std::ostream &stream) const
  stream << "Modifier: " << modifier_name_;
 }

-NodeGroupComputeContext::NodeGroupComputeContext(
+GroupNodeComputeContext::GroupNodeComputeContext(
    const ComputeContext *parent,
    const int32_t node_id,
    const std::optional<ComputeContextHash> &cached_hash)
@ -45,23 +45,21 @@ NodeGroupComputeContext::NodeGroupComputeContext(
  }
 }

-NodeGroupComputeContext::NodeGroupComputeContext(const ComputeContext *parent, const bNode &node)
-    : NodeGroupComputeContext(parent, node.identifier)
+GroupNodeComputeContext::GroupNodeComputeContext(const ComputeContext *parent,
+                                                 const bNode &node,
+                                                 const bNodeTree &caller_tree)
+    : GroupNodeComputeContext(parent, node.identifier)
 {
-#ifdef DEBUG
-  debug_node_name_ = node.name;
-#endif
+  caller_group_node_ = &node;
+  caller_tree_ = &caller_tree;
 }

-void NodeGroupComputeContext::print_current_in_line(std::ostream &stream) const
+void GroupNodeComputeContext::print_current_in_line(std::ostream &stream) const
 {
-#ifdef DEBUG
-  if (!debug_node_name_.empty()) {
-    stream << "Node: " << debug_node_name_;
+  if (caller_group_node_ != nullptr) {
+    stream << "Node: " << caller_group_node_->name;
    return;
  }
-#endif
-  stream << "Node ID: " << node_id_;
 }

 SimulationZoneComputeContext::SimulationZoneComputeContext(const ComputeContext *parent,
--- a/source/blender/blenkernel/intern/crazyspace.cc
+++ b/source/blender/blenkernel/intern/crazyspace.cc
@ -204,29 +204,29 @@ void BKE_crazyspace_set_quats_mesh(Mesh *me,
    for (const int corner : face) {
      const int vert = corner_verts[corner];
      if (!vert_tag[vert]) {
-        const int corner_prev = mesh::face_corner_prev(face, corner);
-        const int corner_next = mesh::face_corner_next(face, corner);
+        const int vert_prev = corner_verts[mesh::face_corner_prev(face, corner)];
+        const int vert_next = corner_verts[mesh::face_corner_next(face, corner)];

        const float *co_prev, *co_curr, *co_next; /* orig */
        const float *vd_prev, *vd_curr, *vd_next; /* deform */

        /* retrieve mapped coordinates */
-        vd_prev = mappedcos[corner_prev];
-        vd_curr = mappedcos[corner];
-        vd_next = mappedcos[corner_next];
+        vd_prev = mappedcos[vert_prev];
+        vd_curr = mappedcos[vert];
+        vd_next = mappedcos[vert_next];

        if (!origcos.is_empty()) {
-          co_prev = origcos[corner_prev];
-          co_curr = origcos[corner];
-          co_next = origcos[corner_next];
+          co_prev = origcos[vert_prev];
+          co_curr = origcos[vert];
+          co_next = origcos[vert_next];
        }
        else {
-          co_prev = positions[corner_prev];
-          co_curr = positions[corner];
-          co_next = positions[corner_next];
+          co_prev = positions[vert_prev];
+          co_curr = positions[vert];
+          co_next = positions[vert_next];
        }

-        set_crazy_vertex_quat(quats[corner], co_curr, co_next, co_prev, vd_curr, vd_next, vd_prev);
+        set_crazy_vertex_quat(quats[vert], co_curr, co_next, co_prev, vd_curr, vd_next, vd_prev);

        vert_tag[vert].set();
      }
--- a/source/blender/blenkernel/intern/mesh_mapping.cc
+++ b/source/blender/blenkernel/intern/mesh_mapping.cc
@ -347,6 +347,25 @@ static Array<int> reverse_indices_in_groups(const Span<int> group_indices,
  return results;
 }

+/* A version of #reverse_indices_in_groups that stores face indices instead of corner indices. */
+static void reverse_group_indices_in_groups(const OffsetIndices<int> groups,
+                                            const Span<int> group_to_elem,
+                                            const OffsetIndices<int> offsets,
+                                            MutableSpan<int> results)
+{
+  int *counts = MEM_cnew_array<int>(size_t(offsets.size()), __func__);
+  BLI_SCOPED_DEFER([&]() { MEM_freeN(counts); })
+  threading::parallel_for(groups.index_range(), 1024, [&](const IndexRange range) {
+    for (const int64_t face : range) {
+      for (const int elem : group_to_elem.slice(groups[face])) {
+        const int index_in_group = atomic_fetch_and_add_int32(&counts[elem], 1);
+        results[offsets[elem][index_in_group]] = int(face);
+      }
+    }
+  });
+  sort_small_groups(offsets, 1024, results);
+}
+
 static GroupedSpan<int> gather_groups(const Span<int> group_indices,
                                      const int groups_num,
                                      Array<int> &r_offsets,
@ -370,16 +389,21 @@ GroupedSpan<int> build_vert_to_edge_map(const Span<int2> edges,
                                        Array<int> &r_indices)
 {
  r_offsets = create_reverse_offsets(edges.cast<int>(), verts_num);
-  r_indices.reinitialize(r_offsets.last());
-  Array<int> counts(verts_num, 0);
+  const OffsetIndices<int> offsets(r_offsets);
+  r_indices.reinitialize(offsets.total_size());

-  for (const int64_t edge_i : edges.index_range()) {
-    for (const int vert : {edges[edge_i][0], edges[edge_i][1]}) {
-      r_indices[r_offsets[vert] + counts[vert]] = int(edge_i);
-      counts[vert]++;
+  /* Version of #reverse_indices_in_groups that accounts for storing two indices for each edge. */
+  int *counts = MEM_cnew_array<int>(size_t(offsets.size()), __func__);
+  BLI_SCOPED_DEFER([&]() { MEM_freeN(counts); })
+  threading::parallel_for(edges.index_range(), 1024, [&](const IndexRange range) {
+    for (const int64_t edge : range) {
+      for (const int vert : {edges[edge][0], edges[edge][1]}) {
+        const int index_in_group = atomic_fetch_and_add_int32(&counts[vert], 1);
+        r_indices[offsets[vert][index_in_group]] = int(edge);
+      }
    }
-  }
-  return {OffsetIndices<int>(r_offsets), r_indices};
+  });
+  return {offsets, r_indices};
 }

 void build_vert_to_face_indices(const OffsetIndices<int> faces,
@ -387,13 +411,7 @@ void build_vert_to_face_indices(const OffsetIndices<int> faces,
                                const OffsetIndices<int> offsets,
                                MutableSpan<int> r_indices)
 {
-  Array<int> counts(offsets.size(), 0);
-  for (const int64_t face_i : faces.index_range()) {
-    for (const int vert : corner_verts.slice(faces[face_i])) {
-      r_indices[offsets[vert].start() + counts[vert]] = int(face_i);
-      counts[vert]++;
-    }
-  }
+  reverse_group_indices_in_groups(faces, corner_verts, offsets, r_indices);
 }

 GroupedSpan<int> build_vert_to_face_map(const OffsetIndices<int> faces,
@ -438,14 +456,7 @@ GroupedSpan<int> build_edge_to_face_map(const OffsetIndices<int> faces,
 {
  r_offsets = create_reverse_offsets(corner_edges, edges_num);
  r_indices.reinitialize(r_offsets.last());
-  Array<int> counts(edges_num, 0);
-
-  for (const int64_t face_i : faces.index_range()) {
-    for (const int edge : corner_edges.slice(faces[face_i])) {
-      r_indices[r_offsets[edge] + counts[edge]] = int(face_i);
-      counts[edge]++;
-    }
-  }
+  reverse_group_indices_in_groups(faces, corner_edges, OffsetIndices<int>(r_offsets), r_indices);
  return {OffsetIndices<int>(r_offsets), r_indices};
 }

--- a/source/blender/blenkernel/intern/mesh_remesh_voxel.cc
+++ b/source/blender/blenkernel/intern/mesh_remesh_voxel.cc
@ -17,6 +17,7 @@

 #include "BLI_array.hh"
 #include "BLI_array_utils.hh"
+#include "BLI_enumerable_thread_specific.hh"
 #include "BLI_index_range.hh"
 #include "BLI_math_vector.h"
 #include "BLI_span.hh"
@ -277,191 +278,329 @@ Mesh *BKE_mesh_remesh_voxel(const Mesh *mesh,
 #endif
 }

-void BKE_mesh_remesh_reproject_paint_mask(Mesh *target, const Mesh *source)
+namespace blender::bke {
+
+static void calc_edge_centers(const Span<float3> positions,
+                              const Span<int2> edges,
+                              MutableSpan<float3> edge_centers)
 {
-  BVHTreeFromMesh bvhtree = {nullptr};
-  BKE_bvhtree_from_mesh_get(&bvhtree, source, BVHTREE_FROM_VERTS, 2);
-  const Span<float3> target_positions = target->vert_positions();
-  const float *source_mask = (const float *)CustomData_get_layer_named(
-      &source->vert_data, CD_PROP_FLOAT, ".sculpt_mask");
-  if (source_mask == nullptr) {
-    return;
+  for (const int i : edges.index_range()) {
+    edge_centers[i] = math::midpoint(positions[edges[i][0]], positions[edges[i][1]]);
  }
+}

-  float *target_mask;
-  if (CustomData_has_layer_named(&target->vert_data, CD_PROP_FLOAT, ".sculpt_mask")) {
-    target_mask = (float *)CustomData_get_layer_named(
-        &target->vert_data, CD_PROP_FLOAT, ".sculpt_mask");
-  }
-  else {
-    target_mask = (float *)CustomData_add_layer_named(
-        &target->vert_data, CD_PROP_FLOAT, CD_CONSTRUCT, target->totvert, ".sculpt_mask");
+static void calc_face_centers(const Span<float3> positions,
+                              const OffsetIndices<int> faces,
+                              const Span<int> corner_verts,
+                              MutableSpan<float3> face_centers)
+{
+  for (const int i : faces.index_range()) {
+    face_centers[i] = mesh::face_center_calc(positions, corner_verts.slice(faces[i]));
  }
+}

-  blender::threading::parallel_for(IndexRange(target->totvert), 4096, [&](const IndexRange range) {
-    for (const int i : range) {
-      BVHTreeNearest nearest;
-      nearest.index = -1;
-      nearest.dist_sq = FLT_MAX;
-      BLI_bvhtree_find_nearest(
-          bvhtree.tree, target_positions[i], &nearest, bvhtree.nearest_callback, &bvhtree);
-      if (nearest.index != -1) {
-        target_mask[i] = source_mask[nearest.index];
+static void find_nearest_tris(const Span<float3> positions,
+                              BVHTreeFromMesh &bvhtree,
+                              MutableSpan<int> tris)
+{
+  for (const int i : positions.index_range()) {
+    BVHTreeNearest nearest;
+    nearest.index = -1;
+    nearest.dist_sq = FLT_MAX;
+    BLI_bvhtree_find_nearest(
+        bvhtree.tree, positions[i], &nearest, bvhtree.nearest_callback, &bvhtree);
+    tris[i] = nearest.index;
+  }
+}
+
+static void find_nearest_tris_parallel(const Span<float3> positions,
+                                       BVHTreeFromMesh &bvhtree,
+                                       MutableSpan<int> tris)
+{
+  threading::parallel_for(tris.index_range(), 512, [&](const IndexRange range) {
+    find_nearest_tris(positions.slice(range), bvhtree, tris.slice(range));
+  });
+}
+
+static void find_nearest_verts(const Span<float3> positions,
+                               const Span<int> corner_verts,
+                               const Span<MLoopTri> src_tris,
+                               const Span<float3> dst_positions,
+                               const Span<int> nearest_vert_tris,
+                               MutableSpan<int> nearest_verts)
+{
+  threading::parallel_for(dst_positions.index_range(), 512, [&](const IndexRange range) {
+    for (const int dst_vert : range) {
+      const float3 &dst_position = dst_positions[dst_vert];
+      const MLoopTri &src_tri = src_tris[nearest_vert_tris[dst_vert]];
+
+      std::array<float, 3> distances;
+      for (const int i : IndexRange(3)) {
+        const int src_vert = corner_verts[src_tri.tri[i]];
+        distances[i] = math::distance_squared(positions[src_vert], dst_position);
      }
+
+      const int min = std::min_element(distances.begin(), distances.end()) - distances.begin();
+      nearest_verts[dst_vert] = corner_verts[src_tri.tri[min]];
    }
  });
-  free_bvhtree_from_mesh(&bvhtree);
 }

-void BKE_remesh_reproject_sculpt_face_sets(Mesh *target, const Mesh *source)
+static void find_nearest_faces(const Span<int> src_tri_faces,
+                               const Span<float3> dst_positions,
+                               const OffsetIndices<int> dst_faces,
+                               const Span<int> dst_corner_verts,
+                               BVHTreeFromMesh &bvhtree,
+                               MutableSpan<int> nearest_faces)
 {
-  using namespace blender;
-  using namespace blender::bke;
-  const AttributeAccessor src_attributes = source->attributes();
-  MutableAttributeAccessor dst_attributes = target->attributes_for_write();
-  const Span<float3> target_positions = target->vert_positions();
-  const OffsetIndices target_faces = target->faces();
-  const Span<int> target_corner_verts = target->corner_verts();
+  struct TLS {
+    Vector<float3> face_centers;
+    Vector<int> tri_indices;
+  };
+  threading::EnumerableThreadSpecific<TLS> all_tls;
+  threading::parallel_for(dst_faces.index_range(), 512, [&](const IndexRange range) {
+    TLS &tls = all_tls.local();
+    Vector<float3> &face_centers = tls.face_centers;
+    face_centers.reinitialize(range.size());
+    calc_face_centers(dst_positions, dst_faces.slice(range), dst_corner_verts, face_centers);

-  const VArray src_face_sets = *src_attributes.lookup<int>(".sculpt_face_set", ATTR_DOMAIN_FACE);
-  if (!src_face_sets) {
-    return;
-  }
-  SpanAttributeWriter<int> dst_face_sets = dst_attributes.lookup_or_add_for_write_only_span<int>(
-      ".sculpt_face_set", ATTR_DOMAIN_FACE);
-  if (!dst_face_sets) {
-    return;
-  }
+    Vector<int> &tri_indices = tls.tri_indices;
+    tri_indices.reinitialize(range.size());
+    find_nearest_tris(face_centers, bvhtree, tri_indices);

-  const VArraySpan<int> src(src_face_sets);
-  MutableSpan<int> dst = dst_face_sets.span;
-
-  const blender::Span<int> looptri_faces = source->looptri_faces();
-  BVHTreeFromMesh bvhtree = {nullptr};
-  BKE_bvhtree_from_mesh_get(&bvhtree, source, BVHTREE_FROM_LOOPTRI, 2);
-
-  blender::threading::parallel_for(
-      IndexRange(target->faces_num), 2048, [&](const IndexRange range) {
-        for (const int i : range) {
-          BVHTreeNearest nearest;
-          nearest.index = -1;
-          nearest.dist_sq = FLT_MAX;
-          const float3 from_co = mesh::face_center_calc(
-              target_positions, target_corner_verts.slice(target_faces[i]));
-          BLI_bvhtree_find_nearest(
-              bvhtree.tree, from_co, &nearest, bvhtree.nearest_callback, &bvhtree);
-          if (nearest.index != -1) {
-            dst[i] = src[looptri_faces[nearest.index]];
-          }
-          else {
-            dst[i] = 1;
-          }
-        }
-      });
-  free_bvhtree_from_mesh(&bvhtree);
-  dst_face_sets.finish();
+    array_utils::gather(src_tri_faces, tri_indices.as_span(), nearest_faces.slice(range));
+  });
 }

-void BKE_remesh_reproject_vertex_paint(Mesh *target, const Mesh *source)
+static void find_nearest_corners(const Span<float3> src_positions,
+                                 const OffsetIndices<int> src_faces,
+                                 const Span<int> src_corner_verts,
+                                 const Span<int> src_tri_faces,
+                                 const Span<float3> dst_positions,
+                                 const Span<int> dst_corner_verts,
+                                 const Span<int> nearest_vert_tris,
+                                 MutableSpan<int> nearest_corners)
 {
-  using namespace blender;
-  using namespace blender::bke;
-  const AttributeAccessor src_attributes = source->attributes();
-  MutableAttributeAccessor dst_attributes = target->attributes_for_write();
+  threading::parallel_for(nearest_corners.index_range(), 512, [&](const IndexRange range) {
+    Vector<float, 64> distances;
+    for (const int dst_corner : range) {
+      const int dst_vert = dst_corner_verts[dst_corner];
+      const float3 &dst_position = dst_positions[dst_vert];

+      const int src_tri = nearest_vert_tris[dst_vert];
+      const IndexRange src_face = src_faces[src_tri_faces[src_tri]];
+      const Span<int> src_face_verts = src_corner_verts.slice(src_face);
+
+      /* Find the corner in the face that's closest in the closest face. */
+      distances.reinitialize(src_face_verts.size());
+      for (const int i : src_face_verts.index_range()) {
+        const int src_vert = src_face_verts[i];
+        distances[i] = math::distance_squared(src_positions[src_vert], dst_position);
+      }
+
+      const int min = std::min_element(distances.begin(), distances.end()) - distances.begin();
+      nearest_corners[dst_corner] = src_face[min];
+    }
+  });
+}
+
+static void find_nearest_edges(const Span<float3> src_positions,
+                               const Span<int2> src_edges,
+                               const OffsetIndices<int> src_faces,
+                               const Span<int> src_corner_edges,
+                               const Span<int> src_tri_faces,
+                               const Span<float3> dst_positions,
+                               const Span<int2> dst_edges,
+                               BVHTreeFromMesh &bvhtree,
+                               MutableSpan<int> nearest_edges)
+{
+  struct TLS {
+    Vector<float3> edge_centers;
+    Vector<int> tri_indices;
+    Vector<int> face_indices;
+    Vector<float> distances;
+  };
+  threading::EnumerableThreadSpecific<TLS> all_tls;
+  threading::parallel_for(nearest_edges.index_range(), 512, [&](const IndexRange range) {
+    TLS &tls = all_tls.local();
+    Vector<float3> &edge_centers = tls.edge_centers;
+    edge_centers.reinitialize(range.size());
+    calc_edge_centers(dst_positions, dst_edges.slice(range), edge_centers);
+
+    Vector<int> &tri_indices = tls.tri_indices;
+    tri_indices.reinitialize(range.size());
+    find_nearest_tris_parallel(edge_centers, bvhtree, tri_indices);
+
+    Vector<int> &face_indices = tls.face_indices;
+    face_indices.reinitialize(range.size());
+    array_utils::gather(src_tri_faces, tri_indices.as_span(), face_indices.as_mutable_span());
+
+    /* Find the source edge that's closest to the destination edge in the nearest face. Search
+     * through the whole face instead of just the triangle because the triangle has edges that
+     * might not be actual mesh edges. */
+    Vector<float, 64> distances;
+    for (const int i : range.index_range()) {
+      const int dst_edge = range[i];
+      const float3 &dst_position = edge_centers[i];
+
+      const int src_face = face_indices[i];
+      const Span<int> src_face_edges = src_corner_edges.slice(src_faces[src_face]);
+
+      distances.reinitialize(src_face_edges.size());
+      for (const int i : src_face_edges.index_range()) {
+        const int2 src_edge = src_edges[src_face_edges[i]];
+        const float3 src_center = math::midpoint(src_positions[src_edge[0]],
+                                                 src_positions[src_edge[1]]);
+        distances[i] = math::distance_squared(src_center, dst_position);
+      }
+
+      const int min = std::min_element(distances.begin(), distances.end()) - distances.begin();
+      nearest_edges[dst_edge] = src_face_edges[min];
+    }
+  });
+}
+
+static void gather_attributes(const Span<AttributeIDRef> ids,
+                              const AttributeAccessor src_attributes,
+                              const eAttrDomain domain,
+                              const Span<int> index_map,
+                              MutableAttributeAccessor dst_attributes)
+{
+  for (const AttributeIDRef &id : ids) {
+    const GVArraySpan src = *src_attributes.lookup(id, domain);
+    const eCustomDataType type = cpp_type_to_custom_data_type(src.type());
+    GSpanAttributeWriter dst = dst_attributes.lookup_or_add_for_write_only_span(id, domain, type);
+    attribute_math::gather(src, index_map, dst.span);
+    dst.finish();
+  }
+}
+
+void mesh_remesh_reproject_attributes(const Mesh &src, Mesh &dst)
+{
+  /* Gather attributes to tranfer for each domain. This makes it possible to skip
+   * building index maps and even the main BVH tree if there are no attributes. */
+  const AttributeAccessor src_attributes = src.attributes();
  Vector<AttributeIDRef> point_ids;
+  Vector<AttributeIDRef> edge_ids;
+  Vector<AttributeIDRef> face_ids;
  Vector<AttributeIDRef> corner_ids;
-  source->attributes().for_all([&](const AttributeIDRef &id, const AttributeMetaData &meta_data) {
-    if (CD_TYPE_AS_MASK(meta_data.data_type) & CD_MASK_COLOR_ALL) {
-      if (meta_data.domain == ATTR_DOMAIN_POINT) {
+  src_attributes.for_all([&](const AttributeIDRef &id, const AttributeMetaData &meta_data) {
+    if (ELEM(id.name(), "position", ".edge_verts", ".corner_vert", ".corner_edge")) {
+      return true;
+    }
+    switch (meta_data.domain) {
+      case ATTR_DOMAIN_POINT:
        point_ids.append(id);
-      }
-      else if (meta_data.domain == ATTR_DOMAIN_CORNER) {
+        break;
+      case ATTR_DOMAIN_EDGE:
+        edge_ids.append(id);
+        break;
+      case ATTR_DOMAIN_FACE:
+        face_ids.append(id);
+        break;
+      case ATTR_DOMAIN_CORNER:
        corner_ids.append(id);
-      }
+        break;
+      default:
+        BLI_assert_unreachable();
+        break;
    }
    return true;
  });

-  if (point_ids.is_empty() && corner_ids.is_empty()) {
+  if (point_ids.is_empty() && edge_ids.is_empty() && face_ids.is_empty() && corner_ids.is_empty())
+  {
    return;
  }

-  GroupedSpan<int> source_lmap;
-  GroupedSpan<int> target_lmap;
-  BVHTreeFromMesh bvhtree = {nullptr};
-  threading::parallel_invoke(
-      [&]() { BKE_bvhtree_from_mesh_get(&bvhtree, source, BVHTREE_FROM_VERTS, 2); },
-      [&]() { source_lmap = source->vert_to_corner_map(); },
-      [&]() { target_lmap = target->vert_to_corner_map(); });
+  const Span<float3> src_positions = src.vert_positions();
+  const OffsetIndices src_faces = src.faces();
+  const Span<int> src_corner_verts = src.corner_verts();
+  const Span<MLoopTri> src_tris = src.looptris();

-  const Span<float3> target_positions = target->vert_positions();
-  Array<int> nearest_src_verts(target_positions.size());
-  threading::parallel_for(target_positions.index_range(), 1024, [&](const IndexRange range) {
-    for (const int i : range) {
-      BVHTreeNearest nearest;
-      nearest.index = -1;
-      nearest.dist_sq = FLT_MAX;
-      BLI_bvhtree_find_nearest(
-          bvhtree.tree, target_positions[i], &nearest, bvhtree.nearest_callback, &bvhtree);
-      nearest_src_verts[i] = nearest.index;
+  /* The main idea in the following code is to trade some complexity in sampling for the benefit of
+   * only using and building a single BVH tree. Since sculpt mode doesn't generally deal with loose
+   * vertices and edges, we use the standard "triangles" BVH which won't contain them. Also, only
+   * relying on a single BVH should reduce memory usage, and work better if the BVH and PBVH are
+   * ever merged.
+   *
+   * One key decision is separating building transfer index maps from actually transferring any
+   * attribute data. This is important to keep attribute storage independent from the specifics of
+   * the decisions made here, which mainly results in easier refactoring, more generic code, and
+   * possibly improved performance from lower cache usage in the "complex" sampling part of the
+   * algorithm and the copying itself. */
+  BVHTreeFromMesh bvhtree{};
+  BKE_bvhtree_from_mesh_get(&bvhtree, &src, BVHTREE_FROM_LOOPTRI, 2);
+
+  const Span<float3> dst_positions = dst.vert_positions();
+  const OffsetIndices dst_faces = dst.faces();
+  const Span<int> dst_corner_verts = dst.corner_verts();
+
+  MutableAttributeAccessor dst_attributes = dst.attributes_for_write();
+
+  if (!point_ids.is_empty() || !corner_ids.is_empty()) {
+    Array<int> vert_nearest_tris(dst_positions.size());
+    find_nearest_tris_parallel(dst_positions, bvhtree, vert_nearest_tris);
+
+    if (!point_ids.is_empty()) {
+      Array<int> map(dst.totvert);
+      find_nearest_verts(
+          src_positions, src_corner_verts, src_tris, dst_positions, vert_nearest_tris, map);
+      gather_attributes(point_ids, src_attributes, ATTR_DOMAIN_POINT, map, dst_attributes);
    }
-  });

-  for (const AttributeIDRef &id : point_ids) {
-    const GVArraySpan src = *src_attributes.lookup(id, ATTR_DOMAIN_POINT);
-    GSpanAttributeWriter dst = dst_attributes.lookup_or_add_for_write_only_span(
-        id, ATTR_DOMAIN_POINT, cpp_type_to_custom_data_type(src.type()));
-    attribute_math::gather(src, nearest_src_verts, dst.span);
-    dst.finish();
-  }
-
-  if (!corner_ids.is_empty()) {
-    for (const AttributeIDRef &id : corner_ids) {
-      const GVArraySpan src = *src_attributes.lookup(id, ATTR_DOMAIN_CORNER);
-      GSpanAttributeWriter dst = dst_attributes.lookup_or_add_for_write_only_span(
-          id, ATTR_DOMAIN_CORNER, cpp_type_to_custom_data_type(src.type()));
-
-      threading::parallel_for(target_positions.index_range(), 1024, [&](const IndexRange range) {
-        src.type().to_static_type_tag<ColorGeometry4b, ColorGeometry4f>([&](auto type_tag) {
-          using T = typename decltype(type_tag)::type;
-          if constexpr (std::is_void_v<T>) {
-            BLI_assert_unreachable();
-          }
-          else {
-            const Span<T> src_typed = src.typed<T>();
-            MutableSpan<T> dst_typed = dst.span.typed<T>();
-            for (const int dst_vert : range) {
-              /* Find the average value at the corners of the closest vertex on the
-               * source mesh. */
-              const int src_vert = nearest_src_verts[dst_vert];
-              T value;
-              typename blender::bke::attribute_math::DefaultMixer<T> mixer({&value, 1});
-              for (const int corner : source_lmap[src_vert]) {
-                mixer.mix_in(0, src_typed[corner]);
-              }
-
-              dst_typed.fill_indices(target_lmap[dst_vert], value);
-            }
-          }
-        });
-      });
-
-      dst.finish();
+    if (!corner_ids.is_empty()) {
+      const Span<int> src_tri_faces = src.looptri_faces();
+      Array<int> map(dst.totloop);
+      find_nearest_corners(src_positions,
+                           src_faces,
+                           src_corner_verts,
+                           src_tri_faces,
+                           dst_positions,
+                           dst_corner_verts,
+                           vert_nearest_tris,
+                           map);
+      gather_attributes(corner_ids, src_attributes, ATTR_DOMAIN_CORNER, map, dst_attributes);
    }
  }

-  /* Make sure active/default color attribute (names) are brought over. */
-  if (source->active_color_attribute) {
-    BKE_id_attributes_active_color_set(&target->id, source->active_color_attribute);
+  if (!edge_ids.is_empty()) {
+    const Span<int2> src_edges = src.edges();
+    const Span<int> src_corner_edges = src.corner_edges();
+    const Span<int> src_tri_faces = src.looptri_faces();
+    const Span<int2> dst_edges = dst.edges();
+    Array<int> map(dst.totedge);
+    find_nearest_edges(src_positions,
+                       src_edges,
+                       src_faces,
+                       src_corner_edges,
+                       src_tri_faces,
+                       dst_positions,
+                       dst_edges,
+                       bvhtree,
+                       map);
+    gather_attributes(edge_ids, src_attributes, ATTR_DOMAIN_EDGE, map, dst_attributes);
  }
-  if (source->default_color_attribute) {
-    BKE_id_attributes_default_color_set(&target->id, source->default_color_attribute);
+
+  if (!face_ids.is_empty()) {
+    const Span<int> src_tri_faces = src.looptri_faces();
+    Array<int> map(dst.faces_num);
+    find_nearest_faces(src_tri_faces, dst_positions, dst_faces, dst_corner_verts, bvhtree, map);
+    gather_attributes(face_ids, src_attributes, ATTR_DOMAIN_FACE, map, dst_attributes);
+  }
+
+  if (src.active_color_attribute) {
+    BKE_id_attributes_active_color_set(&dst.id, src.active_color_attribute);
+  }
+  if (src.default_color_attribute) {
+    BKE_id_attributes_default_color_set(&dst.id, src.default_color_attribute);
  }

  free_bvhtree_from_mesh(&bvhtree);
 }

+}  // namespace blender::bke
+
 Mesh *BKE_mesh_remesh_voxel_fix_poles(const Mesh *mesh)
 {
  const BMAllocTemplate allocsize = BMALLOC_TEMPLATE_FROM_ME(mesh);
--- a/source/blender/blenkernel/intern/node.cc
+++ b/source/blender/blenkernel/intern/node.cc
@ -788,6 +788,9 @@ void ntreeBlendWrite(BlendWriter *writer, bNodeTree *ntree)
    if (node->type == GEO_NODE_REPEAT_OUTPUT) {
      blender::nodes::RepeatItemsAccessor::blend_write(writer, *node);
    }
+    if (node->type == GEO_NODE_INDEX_SWITCH) {
+      blender::nodes::IndexSwitchItemsAccessor::blend_write(writer, *node);
+    }
  }

  LISTBASE_FOREACH (bNodeLink *, link, &ntree->links) {
@ -1027,6 +1030,10 @@ void ntreeBlendReadData(BlendDataReader *reader, ID *owner_id, bNodeTree *ntree)
          blender::nodes::RepeatItemsAccessor::blend_read_data(reader, *node);
          break;
        }
+        case GEO_NODE_INDEX_SWITCH: {
+          blender::nodes::IndexSwitchItemsAccessor::blend_read_data(reader, *node);
+          break;
+        }

        default:
          break;
--- a/source/blender/blenlib/BLI_array_utils.hh
+++ b/source/blender/blenlib/BLI_array_utils.hh
@ -165,6 +165,29 @@ inline void gather(const VArray<T> &src,
  });
 }

+template<typename T>
+inline void gather_group_to_group(const OffsetIndices<int> src_offsets,
+                                  const OffsetIndices<int> dst_offsets,
+                                  const IndexMask &selection,
+                                  const Span<T> src,
+                                  MutableSpan<T> dst)
+{
+  selection.foreach_index(GrainSize(512), [&](const int64_t src_i, const int64_t dst_i) {
+    dst.slice(dst_offsets[dst_i]).copy_from(src.slice(src_offsets[src_i]));
+  });
+}
+
+template<typename T>
+inline void gather_to_groups(const OffsetIndices<int> dst_offsets,
+                             const IndexMask &src_selection,
+                             const Span<T> src,
+                             MutableSpan<T> dst)
+{
+  src_selection.foreach_index(GrainSize(1024), [&](const int src_i, const int dst_i) {
+    dst.slice(dst_offsets[dst_i]).fill(src[src_i]);
+  });
+}
+
 /**
 * Copy the \a src data from the groups defined by \a src_offsets to the groups in \a dst defined
 * by \a dst_offsets. Groups to use are masked by \a selection, and it is assumed that the
--- a/source/blender/blenlib/BLI_map.hh
+++ b/source/blender/blenlib/BLI_map.hh
@ -1015,6 +1015,33 @@ class Map {
    return this->count_collisions__impl(key, hash_(key));
  }

+  /**
+   * True if both maps have the same key-value-pairs.
+   */
+  friend bool operator==(const Map &a, const Map &b)
+  {
+    if (a.size() != b.size()) {
+      return false;
+    }
+    for (const Item item : a.items()) {
+      const Key &key = item.key;
+      const Value &value_a = item.value;
+      const Value *value_b = b.lookup_ptr(key);
+      if (value_b == nullptr) {
+        return false;
+      }
+      if (value_a != *value_b) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  friend bool operator!=(const Map &a, const Map &b)
+  {
+    return !(a == b);
+  }
+
 private:
  BLI_NOINLINE void realloc_and_reinsert(int64_t min_usable_slots)
  {
--- a/source/blender/blenlib/BLI_multi_value_map.hh
+++ b/source/blender/blenlib/BLI_multi_value_map.hh
@ -21,6 +21,7 @@
 */

 #include "BLI_map.hh"
+#include "BLI_struct_equality_utils.hh"
 #include "BLI_vector.hh"

 namespace blender {
@ -157,6 +158,8 @@ template<typename Key, typename Value> class MultiValueMap {
  {
    map_.clear_and_shrink();
  }
+
+  BLI_STRUCT_EQUALITY_OPERATORS_1(MultiValueMap, map_)
 };

 }  // namespace blender
--- a/source/blender/blenlib/BLI_offset_indices.hh
+++ b/source/blender/blenlib/BLI_offset_indices.hh
@ -85,8 +85,8 @@ template<typename T> class OffsetIndices {
   */
  OffsetIndices slice(const IndexRange range) const
  {
-    BLI_assert(offsets_.index_range().drop_back(1).contains(range.last()));
-    return OffsetIndices(offsets_.slice(range.start(), range.one_after_last()));
+    BLI_assert(range.is_empty() || offsets_.index_range().drop_back(1).contains(range.last()));
+    return OffsetIndices(offsets_.slice(range.start(), range.size() + 1));
  }

  Span<T> data() const
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@ -14,6 +14,7 @@ set(INC
 set(INC_SYS
  ../../../extern/wcwidth
  ../../../extern/json/include
+  ../../../extern/fmtlib/include

  ${EIGEN3_INCLUDE_DIRS}
  ${ZLIB_INCLUDE_DIRS}
@ -397,6 +398,7 @@ set(LIB
  PRIVATE bf::intern::guardedalloc
  extern_wcwidth
  PRIVATE bf::intern::atomic
+  PRIVATE extern_fmtlib
  ${ZLIB_LIBRARIES}
  ${ZSTD_LIBRARIES}
 )
--- a/source/blender/blenlib/intern/math_matrix.cc
+++ b/source/blender/blenlib/intern/math_matrix.cc
@ -218,7 +218,7 @@ MatBase<T, Size, Size> pseudo_invert(const MatBase<T, Size, Size> &mat, T epsilo
 {
  /* Start by trying normal inversion first. */
  bool success;
-  MatBase<T, Size, Size> inv = invert(mat, success);
+  MatBase<T, Size, Size> inv = invert<T, Size>(mat, success);
  if (success) {
    return inv;
  }
--- a/source/blender/blenlib/intern/timeit.cc
+++ b/source/blender/blenlib/intern/timeit.cc
@ -7,39 +7,55 @@
 #include <algorithm>
 #include <iomanip>
 #include <iostream>
+#include <string_view>
+
+#include <fmt/format.h>

 namespace blender::timeit {

-void print_duration(Nanoseconds duration)
+static void format_duration(Nanoseconds duration, fmt::memory_buffer &buf)
 {
  using namespace std::chrono;
  if (duration < microseconds(100)) {
-    std::cout << duration.count() << " ns";
+    fmt::format_to(fmt::appender(buf), FMT_STRING("{} ns"), duration.count());
  }
  else if (duration < seconds(5)) {
-    std::cout << std::fixed << std::setprecision(2) << duration.count() / 1.0e6 << " ms";
+    fmt::format_to(fmt::appender(buf), FMT_STRING("{:.2f} ms"), duration.count() / 1.0e6);
  }
  else if (duration > seconds(90)) {
    /* Long durations: print seconds, and also H:m:s */
    const auto dur_hours = duration_cast<hours>(duration);
    const auto dur_mins = duration_cast<minutes>(duration - dur_hours);
    const auto dur_sec = duration_cast<seconds>(duration - dur_hours - dur_mins);
-    std::cout << std::fixed << std::setprecision(1) << duration.count() / 1.0e9 << " s ("
-              << dur_hours.count() << "H:" << dur_mins.count() << "m:" << dur_sec.count() << "s)";
+    fmt::format_to(fmt::appender(buf),
+                   FMT_STRING("{:.1f} s ({}H:{}m:{}s)"),
+                   duration.count() / 1.0e9,
+                   dur_hours.count(),
+                   dur_mins.count(),
+                   dur_sec.count());
  }
  else {
-    std::cout << std::fixed << std::setprecision(1) << duration.count() / 1.0e9 << " s";
+    fmt::format_to(fmt::appender(buf), FMT_STRING("{:.1f} s"), duration.count() / 1.0e9);
  }
 }

+void print_duration(Nanoseconds duration)
+{
+  fmt::memory_buffer buf;
+  format_duration(duration, buf);
+  std::cout << std::string_view(buf.data(), buf.size());
+}
+
 ScopedTimer::~ScopedTimer()
 {
  const TimePoint end = Clock::now();
  const Nanoseconds duration = end - start_;

-  std::cout << "Timer '" << name_ << "' took ";
-  print_duration(duration);
-  std::cout << '\n';
+  fmt::memory_buffer buf;
+  fmt::format_to(fmt::appender(buf), FMT_STRING("Timer '{}' took "), name_);
+  format_duration(duration, buf);
+  buf.append(std::string_view("\n"));
+  std::cout << std::string_view(buf.data(), buf.size());
 }

 ScopedTimerAveraged::~ScopedTimerAveraged()
@ -51,13 +67,15 @@ ScopedTimerAveraged::~ScopedTimerAveraged()
  total_time_ += duration;
  min_time_ = std::min(duration, min_time_);

-  std::cout << "Timer '" << name_ << "': (Average: ";
-  print_duration(total_time_ / total_count_);
-  std::cout << ", Min: ";
-  print_duration(min_time_);
-  std::cout << ", Last: ";
-  print_duration(duration);
-  std::cout << ")\n";
+  fmt::memory_buffer buf;
+  fmt::format_to(fmt::appender(buf), FMT_STRING("Timer '{}': (Average: "), name_);
+  format_duration(total_time_ / total_count_, buf);
+  buf.append(std::string_view(", Min: "));
+  format_duration(min_time_, buf);
+  buf.append(std::string_view(", Last: "));
+  format_duration(duration, buf);
+  buf.append(std::string_view(")\n"));
+  std::cout << std::string_view(buf.data(), buf.size());
 }

 }  // namespace blender::timeit
--- a/source/blender/blenlib/tests/BLI_map_test.cc
+++ b/source/blender/blenlib/tests/BLI_map_test.cc
@ -694,6 +694,22 @@ TEST(map, VectorKey)
  EXPECT_EQ(map.size(), 1);
 }

+TEST(map, Equality)
+{
+  Map<int, int> a;
+  Map<int, int> b;
+
+  EXPECT_EQ(a, b);
+  a.add(3, 4);
+  EXPECT_NE(a, b);
+  b.add(3, 4);
+  EXPECT_EQ(a, b);
+
+  a.add(4, 10);
+  b.add(4, 11);
+  EXPECT_NE(a, b);
+}
+
 /**
 * Set this to 1 to activate the benchmark. It is disabled by default, because it prints a lot.
 */
--- a/source/blender/blenlib/tests/BLI_string_test.cc
+++ b/source/blender/blenlib/tests/BLI_string_test.cc
@ -1390,7 +1390,7 @@ class StringEscape : public testing::Test {
    size_t dst_test_len;
    char dst_test[64]; /* Must be big enough for all input. */
    for (const auto &item : items) {
-      /* Validate the static size is big enough (test the test it's self). */
+      /* Validate the static size is big enough (test the test itself). */
      EXPECT_LT((strlen(item[0]) * 2) + 1, sizeof(dst_test));
      /* Escape the string. */
      dst_test_len = BLI_str_escape(dst_test, item[0], sizeof(dst_test));
--- a/source/blender/blenloader/intern/versioning_280.cc
+++ b/source/blender/blenloader/intern/versioning_280.cc
@ -4712,7 +4712,7 @@ void blo_do_versions_280(FileData *fd, Library * /*lib*/, Main *bmain)

    LISTBASE_FOREACH (Mesh *, me, &bmain->meshes) {
      me->flag &= ~(ME_FLAG_UNUSED_0 | ME_FLAG_UNUSED_1 | ME_FLAG_UNUSED_3 | ME_FLAG_UNUSED_4 |
-                    ME_FLAG_UNUSED_6 | ME_FLAG_UNUSED_7 | ME_REMESH_REPROJECT_VERTEX_COLORS);
+                    ME_FLAG_UNUSED_6 | ME_FLAG_UNUSED_7 | ME_REMESH_REPROJECT_ATTRIBUTES);
    }

    LISTBASE_FOREACH (Material *, mat, &bmain->materials) {
--- a/source/blender/blenloader/intern/versioning_400.cc
+++ b/source/blender/blenloader/intern/versioning_400.cc
@ -1292,8 +1292,9 @@ static void change_input_socket_to_rotation_type(bNodeTree &ntree,
    if (link->tosock != &socket) {
      continue;
    }
-    if (ELEM(link->fromsock->type, SOCK_VECTOR, SOCK_FLOAT) &&
-        link->fromnode->type != NODE_REROUTE) {
+    if (ELEM(link->fromsock->type, SOCK_ROTATION, SOCK_VECTOR, SOCK_FLOAT) &&
+        link->fromnode->type != NODE_REROUTE)
+    {
      /* No need to add the conversion node when implicit conversions will work. */
      continue;
    }
@ -1321,7 +1322,8 @@ static void change_output_socket_to_rotation_type(bNodeTree &ntree,
    if (link->fromsock != &socket) {
      continue;
    }
-    if (link->tosock->type == SOCK_VECTOR && link->tonode->type != NODE_REROUTE) {
+    if (ELEM(link->tosock->type, SOCK_ROTATION, SOCK_VECTOR) && link->tonode->type != NODE_REROUTE)
+    {
      /* No need to add the conversion node when implicit conversions will work. */
      continue;
    }
@ -1351,7 +1353,7 @@ static void version_geometry_nodes_use_rotation_socket(bNodeTree &ntree)
      bNodeSocket *socket = nodeFindSocket(node, SOCK_IN, "Rotation");
      change_input_socket_to_rotation_type(ntree, *node, *socket);
    }
-    if (STREQ(node->idname, "GeometryNodeDistributePointsOnFaces")) {
+    if (STR_ELEM(node->idname, "GeometryNodeDistributePointsOnFaces", "GeometryNodeObjectInfo")) {
      bNodeSocket *socket = nodeFindSocket(node, SOCK_OUT, "Rotation");
      change_output_socket_to_rotation_type(ntree, *node, *socket);
    }
@ -2504,5 +2506,27 @@ void blo_do_versions_400(FileData *fd, Library * /*lib*/, Main *bmain)
    LISTBASE_FOREACH (Mesh *, mesh, &bmain->meshes) {
      blender::bke::mesh_sculpt_mask_to_generic(*mesh);
    }
+
+    if (!DNA_struct_member_exists(
+            fd->filesdna, "RaytraceEEVEE", "float", "screen_trace_max_roughness"))
+    {
+      LISTBASE_FOREACH (Scene *, scene, &bmain->scenes) {
+        scene->eevee.reflection_options.screen_trace_max_roughness = 0.5f;
+        scene->eevee.refraction_options.screen_trace_max_roughness = 0.5f;
+        scene->eevee.diffuse_options.screen_trace_max_roughness = 0.5f;
+      }
+    }
+
+    if (!DNA_struct_member_exists(fd->filesdna, "Material", "char", "displacement_method")) {
+      /* Replace Cycles.displacement_method by Material::displacement_method. */
+      LISTBASE_FOREACH (Material *, material, &bmain->materials) {
+        int displacement_method = MA_DISPLACEMENT_BUMP;
+        if (IDProperty *cmat = version_cycles_properties_from_ID(&material->id)) {
+          displacement_method = version_cycles_property_int(
+              cmat, "displacement_method", MA_DISPLACEMENT_BUMP);
+        }
+        material->displacement_method = displacement_method;
+      }
+    }
  }
 }
--- a/source/blender/blenloader/intern/versioning_defaults.cc
+++ b/source/blender/blenloader/intern/versioning_defaults.cc
@ -594,8 +594,7 @@ void BLO_update_defaults_startup_blend(Main *bmain, const char *app_template)
    /* Match default for new meshes. */
    mesh->smoothresh_legacy = DEG2RADF(30);
    /* Match voxel remesher options for all existing meshes in templates. */
-    mesh->flag |= ME_REMESH_REPROJECT_VOLUME | ME_REMESH_REPROJECT_PAINT_MASK |
-                  ME_REMESH_REPROJECT_SCULPT_FACE_SETS | ME_REMESH_REPROJECT_VERTEX_COLORS;
+    mesh->flag |= ME_REMESH_REPROJECT_VOLUME | ME_REMESH_REPROJECT_ATTRIBUTES;

    /* For Sculpting template. */
    if (app_template && STREQ(app_template, "Sculpting")) {
--- a/source/blender/blenloader/intern/versioning_userdef.cc
+++ b/source/blender/blenloader/intern/versioning_userdef.cc
@ -916,6 +916,9 @@ void blo_do_versions_userdef(UserDef *userdef)
   */
  {
    /* Keep this block, even when empty. */
+    userdef->key_insert_channels = (USER_ANIM_KEY_CHANNEL_LOCATION |
+                                    USER_ANIM_KEY_CHANNEL_ROTATION | USER_ANIM_KEY_CHANNEL_SCALE |
+                                    USER_ANIM_KEY_CHANNEL_CUSTOM_PROPERTIES);
  }

  LISTBASE_FOREACH (bTheme *, btheme, &userdef->themes) {
--- a/source/blender/compositor/realtime_compositor/CMakeLists.txt
+++ b/source/blender/compositor/realtime_compositor/CMakeLists.txt
@ -69,6 +69,7 @@ set(SRC
  algorithms/intern/smaa.cc
  algorithms/intern/summed_area_table.cc
  algorithms/intern/symmetric_separable_blur.cc
+  algorithms/intern/symmetric_separable_blur_variable_size.cc
  algorithms/intern/transform.cc

  algorithms/COM_algorithm_jump_flooding.hh
@ -79,6 +80,7 @@ set(SRC
  algorithms/COM_algorithm_smaa.hh
  algorithms/COM_algorithm_summed_area_table.hh
  algorithms/COM_algorithm_symmetric_separable_blur.hh
+  algorithms/COM_algorithm_symmetric_separable_blur_variable_size.hh
  algorithms/COM_algorithm_transform.hh

  cached_resources/intern/cached_mask.cc
@ -150,6 +152,7 @@ set(GLSL_SRC
  shaders/compositor_image_crop.glsl
  shaders/compositor_inpaint_compute_boundary.glsl
  shaders/compositor_inpaint_compute_region.glsl
+  shaders/compositor_inpaint_fill_region.glsl
  shaders/compositor_jump_flooding.glsl
  shaders/compositor_keying_compute_image.glsl
  shaders/compositor_keying_compute_matte.glsl
@ -186,6 +189,7 @@ set(GLSL_SRC
  shaders/compositor_symmetric_blur.glsl
  shaders/compositor_symmetric_blur_variable_size.glsl
  shaders/compositor_symmetric_separable_blur.glsl
+  shaders/compositor_symmetric_separable_blur_variable_size.glsl
  shaders/compositor_tone_map_photoreceptor.glsl
  shaders/compositor_tone_map_simple.glsl
  shaders/compositor_write_output.glsl
@ -297,6 +301,7 @@ set(SRC_SHADER_CREATE_INFOS
  shaders/infos/compositor_symmetric_blur_info.hh
  shaders/infos/compositor_symmetric_blur_variable_size_info.hh
  shaders/infos/compositor_symmetric_separable_blur_info.hh
+  shaders/infos/compositor_symmetric_separable_blur_variable_size_info.hh
  shaders/infos/compositor_tone_map_photoreceptor_info.hh
  shaders/infos/compositor_tone_map_simple_info.hh
  shaders/infos/compositor_write_output_info.hh
--- a/source/blender/compositor/realtime_compositor/algorithms/COM_algorithm_symmetric_separable_blur_variable_size.hh
+++ b/source/blender/compositor/realtime_compositor/algorithms/COM_algorithm_symmetric_separable_blur_variable_size.hh
@ -0,0 +1,33 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include "DNA_scene_types.h"
+
+#include "COM_context.hh"
+#include "COM_result.hh"
+
+namespace blender::realtime_compositor {
+
+/* Blur the input using a horizontal and a vertical separable blur passes given the filter type
+ * using SymmetricSeparableBlurWeights, where the number of weights is equal to weights_resolution.
+ * Since the radius can be variable, the number of weights can be less than or more than the number
+ * of pixels actually getting accumulated during blurring, so the weights are interpolated in the
+ * shader as needed, the resolution is typically set to the maximum possible radius if known. The
+ * radius of the blur can be variable and is defined using the given radius float image. The output
+ * is written to the given output result, which will be allocated internally and is thus expected
+ * not to be previously allocated.
+ *
+ * Technically, variable size blur can't be computed separably, however, assuming a sufficiently
+ * smooth radius field, the results can be visually pleasing, so this can be used a more performant
+ * variable size blur if the quality is satisfactory. */
+void symmetric_separable_blur_variable_size(Context &context,
+                                            Result &input,
+                                            Result &output,
+                                            Result &radius,
+                                            int filter_type = R_FILTER_GAUSS,
+                                            int weights_resolution = 128);
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/algorithms/intern/symmetric_separable_blur_variable_size.cc
+++ b/source/blender/compositor/realtime_compositor/algorithms/intern/symmetric_separable_blur_variable_size.cc
@ -0,0 +1,139 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "BLI_assert.h"
+#include "BLI_math_base.hh"
+#include "BLI_math_vector.hh"
+#include "BLI_math_vector_types.hh"
+
+#include "GPU_shader.h"
+#include "GPU_texture.h"
+
+#include "COM_context.hh"
+#include "COM_result.hh"
+#include "COM_utilities.hh"
+
+#include "COM_algorithm_symmetric_separable_blur_variable_size.hh"
+
+#include "COM_symmetric_separable_blur_weights.hh"
+
+namespace blender::realtime_compositor {
+
+static const char *get_blur_shader(ResultType type)
+{
+  switch (type) {
+    case ResultType::Float:
+      return "compositor_symmetric_separable_blur_variable_size_float";
+    case ResultType::Float2:
+      return "compositor_symmetric_separable_blur_variable_size_float2";
+    case ResultType::Vector:
+    case ResultType::Color:
+      return "compositor_symmetric_separable_blur_variable_size_float4";
+    case ResultType::Float3:
+      /* GPU module does not support float3 outputs. */
+      break;
+    case ResultType::Int2:
+      /* Blur does not support integer types. */
+      break;
+  }
+
+  BLI_assert_unreachable();
+  return nullptr;
+}
+
+static Result horizontal_pass(
+    Context &context, Result &input, Result &radius, int filter_type, int weights_resolution)
+{
+  GPUShader *shader = context.get_shader(get_blur_shader(input.type()));
+  GPU_shader_bind(shader);
+
+  GPU_shader_uniform_1b(shader, "is_vertical_pass", false);
+
+  input.bind_as_texture(shader, "input_tx");
+
+  const SymmetricSeparableBlurWeights &weights =
+      context.cache_manager().symmetric_separable_blur_weights.get(
+          context, filter_type, weights_resolution);
+  weights.bind_as_texture(shader, "weights_tx");
+
+  radius.bind_as_texture(shader, "radius_tx");
+
+  /* We allocate an output image of a transposed size, that is, with a height equivalent to the
+   * width of the input and vice versa. This is done as a performance optimization. The shader
+   * will blur the image horizontally and write it to the intermediate output transposed. Then
+   * the vertical pass will execute the same horizontal blur shader, but since its input is
+   * transposed, it will effectively do a vertical blur and write to the output transposed,
+   * effectively undoing the transposition in the horizontal pass. This is done to improve
+   * spatial cache locality in the shader and to avoid having two separate shaders for each blur
+   * pass. */
+  Domain domain = input.domain();
+  const int2 transposed_domain = int2(domain.size.y, domain.size.x);
+
+  Result output = context.create_temporary_result(input.type());
+  output.allocate_texture(transposed_domain);
+  output.bind_as_image(shader, "output_img");
+
+  compute_dispatch_threads_at_least(shader, domain.size);
+
+  GPU_shader_unbind();
+  input.unbind_as_texture();
+  weights.unbind_as_texture();
+  radius.unbind_as_texture();
+  output.unbind_as_image();
+
+  return output;
+}
+
+static void vertical_pass(Context &context,
+                          Result &original_input,
+                          Result &horizontal_pass_result,
+                          Result &output,
+                          Result &radius,
+                          int filter_type,
+                          int weights_resolution)
+{
+  GPUShader *shader = context.get_shader(get_blur_shader(original_input.type()));
+  GPU_shader_bind(shader);
+
+  GPU_shader_uniform_1b(shader, "is_vertical_pass", true);
+
+  horizontal_pass_result.bind_as_texture(shader, "input_tx");
+
+  const SymmetricSeparableBlurWeights &weights =
+      context.cache_manager().symmetric_separable_blur_weights.get(
+          context, filter_type, weights_resolution);
+  weights.bind_as_texture(shader, "weights_tx");
+
+  radius.bind_as_texture(shader, "radius_tx");
+
+  Domain domain = original_input.domain();
+  output.allocate_texture(domain);
+  output.bind_as_image(shader, "output_img");
+
+  /* Notice that the domain is transposed, see the note on the horizontal pass method for more
+   * information on the reasoning behind this. */
+  compute_dispatch_threads_at_least(shader, int2(domain.size.y, domain.size.x));
+
+  GPU_shader_unbind();
+  horizontal_pass_result.unbind_as_texture();
+  output.unbind_as_image();
+  weights.unbind_as_texture();
+  radius.unbind_as_texture();
+}
+
+void symmetric_separable_blur_variable_size(Context &context,
+                                            Result &input,
+                                            Result &output,
+                                            Result &radius,
+                                            int filter_type,
+                                            int weights_resolution)
+{
+  Result horizontal_pass_result = horizontal_pass(
+      context, input, radius, filter_type, weights_resolution);
+  vertical_pass(
+      context, input, horizontal_pass_result, output, radius, filter_type, weights_resolution);
+  horizontal_pass_result.release();
+}
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/cached_resources/intern/symmetric_separable_blur_weights.cc
+++ b/source/blender/compositor/realtime_compositor/cached_resources/intern/symmetric_separable_blur_weights.cc
@ -84,6 +84,8 @@ SymmetricSeparableBlurWeights::SymmetricSeparableBlurWeights(Context &context,
      Result::texture_format(ResultType::Float, context.get_precision()),
      GPU_TEXTURE_USAGE_GENERAL,
      weights.data());
+  GPU_texture_filter_mode(texture_, true);
+  GPU_texture_extend_mode(texture_, GPU_SAMPLER_EXTEND_MODE_EXTEND);
 }

 SymmetricSeparableBlurWeights::~SymmetricSeparableBlurWeights()
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_inpaint_compute_region.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_inpaint_compute_region.glsl
@ -2,12 +2,7 @@
 *
 * SPDX-License-Identifier: GPL-2.0-or-later */

-/* Fill the inpainting region by sampling the color of the nearest boundary pixel if it is not
- * further than the user supplied distance. Additionally, apply a lateral blur in the tangential
- * path to the inpainting boundary to smooth out the inpainted region. */
-
 #pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
-#pragma BLENDER_REQUIRE(gpu_shader_compositor_jump_flooding_lib.glsl)

 void main()
 {
@ -15,55 +10,23 @@ void main()

  vec4 color = texture_load(input_tx, texel);

-  /* An opaque pixel, no inpainting needed. */
+  /* An opaque pixel, not part of the inpainting region, write the original color. */
  if (color.a == 1.0) {
    imageStore(output_img, texel, color);
    return;
  }

-  ivec2 closest_boundary_texel = texture_load(flooded_boundary_tx, texel).xy;
-  float distance_to_boundary = distance(vec2(texel), vec2(closest_boundary_texel));
+  float distance_to_boundary = texture_load(distance_to_boundary_tx, texel).x;

-  /* Further than the user supplied distance, write a transparent color. */
+  /* Further than the inpainting distance, not part of the inpainting region, write the original
+   * color. */
  if (distance_to_boundary > max_distance) {
-    imageStore(output_img, texel, vec4(0.0));
+    imageStore(output_img, texel, color);
    return;
  }

-  /* We set the blur radius to be proportional to the distance to the boundary. */
-  int blur_radius = int(ceil(distance_to_boundary));
-
-  /* Laterally blur by accumulate the boundary pixels nearest to the pixels along the tangential
-   * path in both directions starting from the current pixel, noting that the weights texture only
-   * stores the weights for the left half, but since the Gaussian is symmetric, the same weight is
-   * used for the right half and we add both of their contributions. */
-  vec2 left_texel = vec2(texel);
-  vec2 right_texel = vec2(texel);
-  float accumulated_weight = 0.0;
-  vec4 accumulated_color = vec4(0.0);
-  for (int i = 0; i < blur_radius; i++) {
-    float weight = texture(gaussian_weights_tx, float(i / (blur_radius - 1))).x;
-
-    {
-      ivec2 boundary_texel = texture_load(flooded_boundary_tx, ivec2(left_texel)).xy;
-      accumulated_color += texture_load(input_tx, boundary_texel) * weight;
-      accumulated_weight += weight;
-
-      /* Move the left texel one pixel in the clockwise tangent to the boundary. */
-      left_texel += normalize((left_texel - vec2(boundary_texel)).yx * vec2(-1.0, 1.0));
-    }
-
-    /* When i is zero, we are accumulating the center pixel, which was already accumulated as the
-     * left texel above, so no need to accumulate it again. */
-    if (i != 0) {
-      ivec2 boundary_texel = texture_load(flooded_boundary_tx, ivec2(right_texel)).xy;
-      accumulated_color += texture_load(input_tx, boundary_texel) * weight;
-      accumulated_weight += weight;
-
-      /* Move the left texel one pixel in the anti-clockwise tangent to the boundary. */
-      right_texel += normalize((right_texel - vec2(boundary_texel)).yx * vec2(1.0, -1.0));
-    }
-  }
-
-  imageStore(output_img, texel, accumulated_color / accumulated_weight);
+  /* Mix the inpainted color with the original color using its alpha because semi-transparent areas
+   * are considered to be partially inpainted. */
+  vec4 inpainted_color = texture_load(inpainted_region_tx, texel);
+  imageStore(output_img, texel, vec4(mix(inpainted_color.rgb, color.rgb, color.a), 1.0));
 }
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_inpaint_fill_region.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_inpaint_fill_region.glsl
@ -0,0 +1,48 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+/* Fill the inpainting region by sampling the color of the nearest boundary pixel. Additionally,
+ * compute some information about the inpainting region, like the distance to the boundary, as well
+ * as the blur radius to use to smooth out that region. */
+
+#pragma BLENDER_REQUIRE(gpu_shader_math_base_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_jump_flooding_lib.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  vec4 color = texture_load(input_tx, texel);
+
+  /* An opaque pixel, not part of the inpainting region. */
+  if (color.a == 1.0) {
+    imageStore(filled_region_img, texel, color);
+    imageStore(smoothing_radius_img, texel, vec4(0.0));
+    imageStore(distance_to_boundary_img, texel, vec4(0.0));
+    return;
+  }
+
+  ivec2 closest_boundary_texel = texture_load(flooded_boundary_tx, texel).xy;
+  float distance_to_boundary = distance(vec2(texel), vec2(closest_boundary_texel));
+  imageStore(distance_to_boundary_img, texel, vec4(distance_to_boundary));
+
+  /* We follow this shader by a blur shader that smoothes out the inpainting region, where the blur
+   * radius is the radius of the circle that touches the boundary. We can imagine the blur window
+   * to be inscribed in that circle and thus the blur radius is the distance to the boundary
+   * divided by square root two. As a performance optimization, we limit the blurring to areas that
+   * will affect the inpainting region, that is, whose distance to boundary is less than double the
+   * inpainting distance. Additionally, we clamp to the distance to the inpainting distance since
+   * areas outside of the clamp range only indirectly affect the inpainting region due to blurring
+   * and thus needn't use higher blur radii. */
+  float blur_window_size = min(float(max_distance), distance_to_boundary) / M_SQRT2;
+  bool skip_smoothing = distance_to_boundary > (max_distance * 2.0);
+  float smoothing_radius = skip_smoothing ? 0.0 : blur_window_size;
+  imageStore(smoothing_radius_img, texel, vec4(smoothing_radius));
+
+  /* Mix the boundary color with the original color using its alpha because semi-transparent areas
+   * are considered to be partially inpainted. */
+  vec4 boundary_color = texture_load(input_tx, closest_boundary_texel);
+  imageStore(filled_region_img, texel, mix(boundary_color, color, color.a));
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_symmetric_separable_blur_variable_size.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_symmetric_separable_blur_variable_size.glsl
@ -0,0 +1,41 @@
+/* SPDX-FileCopyrightText: 2022 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_blur_common.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  float accumulated_weight = 0.0;
+  vec4 accumulated_color = vec4(0.0);
+
+  /* First, compute the contribution of the center pixel. */
+  vec4 center_color = texture_load(input_tx, texel);
+  float center_weight = texture_load(weights_tx, 0).x;
+  accumulated_color += center_color * center_weight;
+  accumulated_weight += center_weight;
+
+  /* The dispatch domain is transposed in the vertical pass, so make sure to reverse transpose the
+   * texel coordinates when loading the radius. See the horizontal_pass function in the
+   * symmetric_separable_blur_variable_size.cc file for more information. */
+  int radius = int(texture_load(radius_tx, is_vertical_pass ? texel.yx : texel).x);
+
+  /* Then, compute the contributions of the pixel to the right and left, noting that the
+   * weights texture only stores the weights for the positive half, but since the filter is
+   * symmetric, the same weight is used for the negative half and we add both of their
+   * contributions. */
+  for (int i = 1; i <= radius; i++) {
+    /* Add 0.5 to evaluate at the center of the pixels. */
+    float weight = texture(weights_tx, (float(i) + 0.5) / float(radius + 1)).x;
+    accumulated_color += texture_load(input_tx, texel + ivec2(i, 0)) * weight;
+    accumulated_color += texture_load(input_tx, texel + ivec2(-i, 0)) * weight;
+    accumulated_weight += weight * 2.0;
+  }
+
+  /* Write the color using the transposed texel. See the horizontal_pass function mentioned above
+   * for more information on the rational behind this. */
+  imageStore(output_img, texel.yx, accumulated_color / accumulated_weight);
+}
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_inpaint_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_inpaint_info.hh
@ -11,12 +11,23 @@ GPU_SHADER_CREATE_INFO(compositor_inpaint_compute_boundary)
    .compute_source("compositor_inpaint_compute_boundary.glsl")
    .do_static_compilation(true);

-GPU_SHADER_CREATE_INFO(compositor_inpaint_compute_region)
+GPU_SHADER_CREATE_INFO(compositor_inpaint_fill_region)
    .local_group_size(16, 16)
    .push_constant(Type::INT, "max_distance")
    .sampler(0, ImageType::FLOAT_2D, "input_tx")
    .sampler(1, ImageType::INT_2D, "flooded_boundary_tx")
-    .sampler(2, ImageType::INT_1D, "gaussian_weights_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "filled_region_img")
+    .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "distance_to_boundary_img")
+    .image(2, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "smoothing_radius_img")
+    .compute_source("compositor_inpaint_fill_region.glsl")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_inpaint_compute_region)
+    .local_group_size(16, 16)
+    .push_constant(Type::INT, "max_distance")
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .sampler(1, ImageType::FLOAT_2D, "inpainted_region_tx")
+    .sampler(2, ImageType::FLOAT_2D, "distance_to_boundary_tx")
    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
    .compute_source("compositor_inpaint_compute_region.glsl")
    .do_static_compilation(true);
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_symmetric_separable_blur_variable_size_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_symmetric_separable_blur_variable_size_info.hh
@ -0,0 +1,28 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur_variable_size_shared)
+    .local_group_size(16, 16)
+    .push_constant(Type::BOOL, "is_vertical_pass")
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .sampler(1, ImageType::FLOAT_1D, "weights_tx")
+    .sampler(2, ImageType::FLOAT_2D, "radius_tx")
+    .compute_source("compositor_symmetric_separable_blur_variable_size.glsl");
+
+GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur_variable_size_float)
+    .additional_info("compositor_symmetric_separable_blur_variable_size_shared")
+    .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur_variable_size_float2)
+    .additional_info("compositor_symmetric_separable_blur_variable_size_shared")
+    .image(0, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur_variable_size_float4)
+    .additional_info("compositor_symmetric_separable_blur_variable_size_shared")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .do_static_compilation(true);
--- a/source/blender/draw/CMakeLists.txt
+++ b/source/blender/draw/CMakeLists.txt
@ -510,9 +510,12 @@ set(GLSL_SRC
  engines/eevee_next/shaders/eevee_geom_world_vert.glsl
  engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl
  engines/eevee_next/shaders/eevee_hiz_update_comp.glsl
+  engines/eevee_next/shaders/eevee_horizon_denoise_comp.glsl
  engines/eevee_next/shaders/eevee_horizon_scan_eval_lib.glsl
+  engines/eevee_next/shaders/eevee_horizon_scan_comp.glsl
  engines/eevee_next/shaders/eevee_horizon_scan_lib.glsl
  engines/eevee_next/shaders/eevee_horizon_scan_test.glsl
+  engines/eevee_next/shaders/eevee_horizon_setup_comp.glsl
  engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl
  engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl
  engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl
--- a/source/blender/draw/engines/eevee_next/eevee_defines.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_defines.hh
@ -103,7 +103,6 @@
 /* Keep this as a define to avoid shader variations. */
 #define RAYTRACE_RADIANCE_FORMAT GPU_R11F_G11F_B10F
 #define RAYTRACE_RAYTIME_FORMAT GPU_R32F
-#define RAYTRACE_HORIZON_FORMAT GPU_R32UI
 #define RAYTRACE_VARIANCE_FORMAT GPU_R16F
 #define RAYTRACE_TILEMASK_FORMAT GPU_R8UI

--- a/source/blender/draw/engines/eevee_next/eevee_film.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_film.cc
@ -295,11 +295,13 @@ void Film::init(const int2 &extent, const rcti *output_rect)
                                                   EEVEE_RENDER_PASS_ENVIRONMENT |
                                                   EEVEE_RENDER_PASS_MIST |
                                                   EEVEE_RENDER_PASS_SHADOW | EEVEE_RENDER_PASS_AO;
+    const eViewLayerEEVEEPassType color_passes_3 = EEVEE_RENDER_PASS_TRANSPARENT;

    data_.exposure_scale = pow2f(scene.view_settings.exposure);
    data_.has_data = (enabled_passes_ & data_passes) != 0;
    data_.any_render_pass_1 = (enabled_passes_ & color_passes_1) != 0;
    data_.any_render_pass_2 = (enabled_passes_ & color_passes_2) != 0;
+    data_.any_render_pass_3 = (enabled_passes_ & color_passes_3) != 0;
  }
  {
    /* Set pass offsets. */
@ -341,6 +343,7 @@ void Film::init(const int2 &extent, const rcti *output_rect)
    data_.environment_id = pass_index_get(EEVEE_RENDER_PASS_ENVIRONMENT);
    data_.shadow_id = pass_index_get(EEVEE_RENDER_PASS_SHADOW);
    data_.ambient_occlusion_id = pass_index_get(EEVEE_RENDER_PASS_AO);
+    data_.transparent_id = pass_index_get(EEVEE_RENDER_PASS_TRANSPARENT);

    data_.aov_color_id = data_.color_len;
    data_.aov_value_id = data_.value_len;
@ -629,7 +632,7 @@ void Film::update_sample_table()
  }
 }

-void Film::accumulate(const DRWView *view, GPUTexture *combined_final_tx)
+void Film::accumulate(View &view, GPUTexture *combined_final_tx)
 {
  if (inst_.is_viewport()) {
    DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get();
@ -650,9 +653,7 @@ void Film::accumulate(const DRWView *view, GPUTexture *combined_final_tx)
  data_.display_only = false;
  inst_.push_uniform_data();

-  draw::View drw_view("MainView", view);
-
-  inst_.manager->submit(accumulate_ps_, drw_view);
+  inst_.manager->submit(accumulate_ps_, view);

  combined_tx_.swap();
  weight_tx_.swap();
--- a/source/blender/draw/engines/eevee_next/eevee_film.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_film.hh
@ -90,7 +90,7 @@ class Film {
  void end_sync();

  /** Accumulate the newly rendered sample contained in #RenderBuffers and blit to display. */
-  void accumulate(const DRWView *view, GPUTexture *combined_final_tx);
+  void accumulate(View &view, GPUTexture *combined_final_tx);

  /** Sort and normalize cryptomatte samples. */
  void cryptomatte_sort();
@ -180,6 +180,8 @@ class Film {
        return data_.shadow_id;
      case EEVEE_RENDER_PASS_AO:
        return data_.ambient_occlusion_id;
+      case EEVEE_RENDER_PASS_TRANSPARENT:
+        return data_.transparent_id;
      case EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT:
        return data_.cryptomatte_object_id;
      case EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET:
--- a/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc
@ -95,6 +95,8 @@ void HiZBuffer::update()
  else {
    inst_.manager->submit(hiz_update_layer_ps_);
  }
+
+  is_dirty_ = false;
 }

 void HiZBuffer::debug_draw(View &view, GPUFrameBuffer *view_fb)
--- a/source/blender/draw/engines/eevee_next/eevee_lookdev.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_lookdev.cc
@ -113,23 +113,21 @@ bNodeTree *LookdevWorldNodeTree::nodetree_get(const LookdevParameters &parameter
 LookdevModule::~LookdevModule()
 {
  GPU_material_free(&gpu_materials_);
-  gpu_material_ = nullptr;
 }

 bool LookdevModule::sync_world()
 {
  /* Check based on the v3d if the world is overridden. */
  LookdevParameters new_parameters(inst_.v3d);
-  bool parameters_changed = parameters_ != new_parameters;
-  if (parameters_changed) {
-    if (parameters_.gpu_parameters_changed(new_parameters)) {
-      GPU_material_free(&gpu_materials_);
-      gpu_material_ = nullptr;
-    }
+  const bool parameters_changed = parameters_ != new_parameters;
+  const bool gpu_parameters_changed = parameters_.gpu_parameters_changed(new_parameters);

+  if (gpu_parameters_changed) {
+    GPU_material_free(&gpu_materials_);
+  }
+  if (parameters_changed) {
    parameters_ = new_parameters;
    inst_.sampling.reset();
-    gpu_status_ = GPU_MAT_CREATED;
  }

  if (parameters_.show_scene_world) {
@ -137,25 +135,15 @@ bool LookdevModule::sync_world()
  }

  ::bNodeTree *node_tree = world_override_tree.nodetree_get(parameters_);
-  gpu_material_ = inst_.shaders.material_shader_get("EEVEE Lookdev Background",
-                                                    gpu_materials_,
-                                                    node_tree,
-                                                    MAT_PIPE_DEFERRED,
-                                                    MAT_GEOM_WORLD,
-                                                    true);
+  GPUMaterial *gpu_material = inst_.shaders.material_shader_get(
+      "EEVEE Lookdev Background", gpu_materials_, node_tree, MAT_PIPE_DEFERRED, MAT_GEOM_WORLD);

-  if (assign_if_different(gpu_status_, GPU_material_status(gpu_material_)) &&
-      gpu_status_ == GPU_MAT_SUCCESS)
-  {
+  if (gpu_parameters_changed) {
    inst_.reflection_probes.sync_world_lookdev();
  }
-  else if (gpu_status_ == GPU_MAT_QUEUED) {
-    inst_.sampling.reset();
-    DRW_viewport_request_redraw();
-  }

-  inst_.pipelines.world.sync(gpu_material_);
-  inst_.pipelines.background.sync(gpu_material_, parameters_.background_opacity);
+  inst_.pipelines.world.sync(gpu_material);
+  inst_.pipelines.background.sync(gpu_material, parameters_.background_opacity);

  return true;
 }
--- a/source/blender/draw/engines/eevee_next/eevee_lookdev.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_lookdev.hh
@ -79,8 +79,6 @@ class LookdevModule {

  LookdevParameters parameters_;
  ListBase gpu_materials_ = {nullptr, nullptr};
-  GPUMaterial *gpu_material_ = nullptr;
-  eGPUMaterialStatus gpu_status_ = GPU_MAT_CREATED;

 public:
  LookdevModule(Instance &inst) : inst_(inst){};
--- a/source/blender/draw/engines/eevee_next/eevee_material.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_material.hh
@ -61,6 +61,24 @@ static inline bool geometry_type_has_surface(eMaterialGeometry geometry_type)
  return geometry_type < MAT_GEOM_VOLUME;
 }

+enum eMaterialDisplacement {
+  MAT_DISPLACEMENT_BUMP = 0,
+  MAT_DISPLACEMENT_VERTEX_WITH_BUMP,
+};
+
+static inline eMaterialDisplacement to_displacement_type(int displacement_method)
+{
+  switch (displacement_method) {
+    case MA_DISPLACEMENT_DISPLACE:
+      /* Currently unsupported. Revert to vertex displacement + bump. */
+      ATTR_FALLTHROUGH;
+    case MA_DISPLACEMENT_BOTH:
+      return MAT_DISPLACEMENT_VERTEX_WITH_BUMP;
+    default:
+      return MAT_DISPLACEMENT_BUMP;
+  }
+}
+
 enum eMaterialProbe {
  MAT_PROBE_NONE = 0,
  MAT_PROBE_REFLECTION,
@ -70,23 +88,30 @@ enum eMaterialProbe {
 static inline void material_type_from_shader_uuid(uint64_t shader_uuid,
                                                  eMaterialPipeline &pipeline_type,
                                                  eMaterialGeometry &geometry_type,
+                                                  eMaterialDisplacement &displacement_type,
                                                  bool &transparent_shadows)
 {
  const uint64_t geometry_mask = ((1u << 4u) - 1u);
  const uint64_t pipeline_mask = ((1u << 4u) - 1u);
+  const uint64_t displacement_mask = ((1u << 2u) - 1u);
  geometry_type = static_cast<eMaterialGeometry>(shader_uuid & geometry_mask);
  pipeline_type = static_cast<eMaterialPipeline>((shader_uuid >> 4u) & pipeline_mask);
-  transparent_shadows = (shader_uuid >> 8u) & 1u;
+  displacement_type = static_cast<eMaterialDisplacement>((shader_uuid >> 8u) & displacement_mask);
+  transparent_shadows = (shader_uuid >> 10u) & 1u;
 }

-static inline uint64_t shader_uuid_from_material_type(eMaterialPipeline pipeline_type,
-                                                      eMaterialGeometry geometry_type,
-                                                      char blend_flags)
+static inline uint64_t shader_uuid_from_material_type(
+    eMaterialPipeline pipeline_type,
+    eMaterialGeometry geometry_type,
+    eMaterialDisplacement displacement_type = MAT_DISPLACEMENT_BUMP,
+    char blend_flags = 0)
 {
+  BLI_assert(displacement_type < (1 << 2));
  BLI_assert(geometry_type < (1 << 4));
  BLI_assert(pipeline_type < (1 << 4));
-  uchar transparent_shadows = blend_flags & MA_BL_TRANSPARENT_SHADOW ? 1 : 0;
-  return geometry_type | (pipeline_type << 4) | (transparent_shadows << 8);
+  uint64_t transparent_shadows = blend_flags & MA_BL_TRANSPARENT_SHADOW ? 1 : 0;
+  return geometry_type | (pipeline_type << 4) | (displacement_type << 8) |
+         (transparent_shadows << 10);
 }

 ENUM_OPERATORS(eClosureBits, CLOSURE_AMBIENT_OCCLUSION)
@ -147,18 +172,21 @@ struct MaterialKey {

  MaterialKey(::Material *mat_, eMaterialGeometry geometry, eMaterialPipeline pipeline) : mat(mat_)
  {
-    options = shader_uuid_from_material_type(pipeline, geometry, mat_->blend_flag);
+    options = shader_uuid_from_material_type(
+        pipeline, geometry, to_displacement_type(mat_->displacement_method), mat_->blend_flag);
  }

  uint64_t hash() const
  {
-    BLI_assert(options < sizeof(*mat));
    return uint64_t(mat) + options;
  }

  bool operator<(const MaterialKey &k) const
  {
-    return (mat < k.mat) || (options < k.options);
+    if (mat == k.mat) {
+      return options < k.options;
+    }
+    return mat < k.mat;
  }

  bool operator==(const MaterialKey &k) const
--- a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc
@ -371,7 +371,7 @@ PassMain::Sub *ForwardPipeline::material_transparent_add(const Object *ob,
                                                         ::Material *blender_mat,
                                                         GPUMaterial *gpumat)
 {
-  DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM | DRW_STATE_DEPTH_LESS_EQUAL;
+  DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_LESS_EQUAL;
  if (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) {
    state |= DRW_STATE_CULL_BACK;
  }
@ -382,24 +382,14 @@ PassMain::Sub *ForwardPipeline::material_transparent_add(const Object *ob,
  return pass;
 }

-void ForwardPipeline::render(View &view,
-                             Framebuffer &prepass_fb,
-                             Framebuffer &combined_fb,
-                             GPUTexture * /*combined_tx*/)
+void ForwardPipeline::render(View &view, Framebuffer &prepass_fb, Framebuffer &combined_fb)
 {
  DRW_stats_group_start("Forward.Opaque");

  prepass_fb.bind();
  inst_.manager->submit(prepass_ps_, view);

-  // if (!DRW_pass_is_empty(prepass_ps_)) {
  inst_.hiz_buffer.set_dirty();
-  // }
-
-  // if (inst_.raytracing.enabled()) {
-  //   rt_buffer.radiance_copy(combined_tx);
-  //   inst_.hiz_buffer.update();
-  // }

  inst_.shadows.set_view(view);
  inst_.irradiance_cache.set_view(view);
@ -413,10 +403,6 @@ void ForwardPipeline::render(View &view,

  combined_fb.bind();
  inst_.manager->submit(transparent_ps_, view);
-
-  // if (inst_.raytracing.enabled()) {
-  //   gbuffer.ray_radiance_tx.release();
-  // }
 }

 /** \} */
@ -483,8 +469,8 @@ void DeferredLayer::begin_sync()
      inst_.cryptomatte.bind_resources(gbuffer_ps_);
    }

-    DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM | DRW_STATE_DEPTH_EQUAL |
-                     DRW_STATE_WRITE_STENCIL | DRW_STATE_STENCIL_ALWAYS;
+    DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL | DRW_STATE_WRITE_STENCIL |
+                     DRW_STATE_STENCIL_ALWAYS;

    gbuffer_double_sided_ps_ = &gbuffer_ps_.sub("DoubleSided");
    gbuffer_double_sided_ps_->state_set(state);
@ -1032,8 +1018,8 @@ void DeferredProbeLayer::begin_sync()
      inst_.cryptomatte.bind_resources(gbuffer_ps_);
    }

-    DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM | DRW_STATE_DEPTH_EQUAL |
-                     DRW_STATE_WRITE_STENCIL | DRW_STATE_STENCIL_ALWAYS;
+    DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL | DRW_STATE_WRITE_STENCIL |
+                     DRW_STATE_STENCIL_ALWAYS;

    gbuffer_double_sided_ps_ = &gbuffer_ps_.sub("DoubleSided");
    gbuffer_double_sided_ps_->state_set(state);
@ -1191,7 +1177,7 @@ void PlanarProbePipeline::begin_sync()
    gbuffer_ps_.bind_image(RBUFS_VALUE_SLOT, &inst_.render_buffers.rp_value_tx);
    inst_.cryptomatte.bind_resources(gbuffer_ps_);

-    DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM | DRW_STATE_DEPTH_EQUAL;
+    DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL;

    gbuffer_double_sided_ps_ = &gbuffer_ps_.sub("DoubleSided");
    gbuffer_double_sided_ps_->state_set(state);
--- a/source/blender/draw/engines/eevee_next/eevee_pipeline.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_pipeline.hh
@ -148,8 +148,6 @@ class ForwardPipeline {
  PassSortable transparent_ps_ = {"Forward.Transparent"};
  float3 camera_forward_;

-  // GPUTexture *input_screen_radiance_tx_ = nullptr;
-
 public:
  ForwardPipeline(Instance &inst) : inst_(inst){};

@ -165,10 +163,7 @@ class ForwardPipeline {
                                          ::Material *blender_mat,
                                          GPUMaterial *gpumat);

-  void render(View &view,
-              Framebuffer &prepass_fb,
-              Framebuffer &combined_fb,
-              GPUTexture *combined_tx);
+  void render(View &view, Framebuffer &prepass_fb, Framebuffer &combined_fb);
 };

 /** \} */
--- a/source/blender/draw/engines/eevee_next/eevee_raytrace.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_raytrace.cc
@ -60,7 +60,9 @@ void RayTraceModule::sync()
    pass.shader_set(inst_.shaders.static_shader_get(RAY_TILE_CLASSIFY));
    pass.bind_image("tile_mask_img", &tile_mask_tx_);
    pass.bind_ssbo("ray_dispatch_buf", &ray_dispatch_buf_);
-    pass.bind_ssbo("denoise_dispatch_buf", &denoise_dispatch_buf_);
+    pass.bind_ssbo("ray_denoise_dispatch_buf", &ray_denoise_dispatch_buf_);
+    pass.bind_ssbo("horizon_dispatch_buf", &horizon_dispatch_buf_);
+    pass.bind_ssbo("horizon_denoise_dispatch_buf", &horizon_denoise_dispatch_buf_);
    inst_.bind_uniform_data(&pass);
    inst_.gbuffer.bind_resources(pass);
    pass.dispatch(&tile_classify_dispatch_size_);
@ -72,9 +74,13 @@ void RayTraceModule::sync()
    pass.shader_set(inst_.shaders.static_shader_get(RAY_TILE_COMPACT));
    pass.bind_image("tile_mask_img", &tile_mask_tx_);
    pass.bind_ssbo("ray_dispatch_buf", &ray_dispatch_buf_);
-    pass.bind_ssbo("denoise_dispatch_buf", &denoise_dispatch_buf_);
+    pass.bind_ssbo("ray_denoise_dispatch_buf", &ray_denoise_dispatch_buf_);
    pass.bind_ssbo("ray_tiles_buf", &ray_tiles_buf_);
-    pass.bind_ssbo("denoise_tiles_buf", &denoise_tiles_buf_);
+    pass.bind_ssbo("ray_denoise_tiles_buf", &ray_denoise_tiles_buf_);
+    pass.bind_ssbo("horizon_dispatch_buf", &horizon_dispatch_buf_);
+    pass.bind_ssbo("horizon_denoise_dispatch_buf", &horizon_denoise_dispatch_buf_);
+    pass.bind_ssbo("horizon_tiles_buf", &horizon_tiles_buf_);
+    pass.bind_ssbo("horizon_denoise_tiles_buf", &horizon_denoise_tiles_buf_);
    inst_.bind_uniform_data(&pass);
    pass.dispatch(&tile_compact_dispatch_size_);
    pass.barrier(GPU_BARRIER_SHADER_STORAGE);
@ -148,7 +154,7 @@ void RayTraceModule::sync()
    PassSimple &pass = PASS_VARIATION(denoise_spatial_, type, _ps_);
    pass.init();
    pass.shader_set(inst_.shaders.static_shader_get(SHADER_VARIATION(RAY_DENOISE_SPATIAL_, type)));
-    pass.bind_ssbo("tiles_coord_buf", &denoise_tiles_buf_);
+    pass.bind_ssbo("tiles_coord_buf", &ray_denoise_tiles_buf_);
    pass.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
    pass.bind_texture("depth_tx", &depth_tx);
    pass.bind_image("ray_data_img", &ray_data_tx_);
@ -161,7 +167,7 @@ void RayTraceModule::sync()
    inst_.bind_uniform_data(&pass);
    inst_.sampling.bind_resources(pass);
    inst_.gbuffer.bind_resources(pass);
-    pass.dispatch(denoise_dispatch_buf_);
+    pass.dispatch(ray_denoise_dispatch_buf_);
    pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
  }
  {
@ -178,9 +184,9 @@ void RayTraceModule::sync()
    pass.bind_image("out_radiance_img", &denoised_temporal_tx_);
    pass.bind_image("in_variance_img", &hit_variance_tx_);
    pass.bind_image("out_variance_img", &denoise_variance_tx_);
-    pass.bind_ssbo("tiles_coord_buf", &denoise_tiles_buf_);
+    pass.bind_ssbo("tiles_coord_buf", &ray_denoise_tiles_buf_);
    inst_.sampling.bind_resources(pass);
-    pass.dispatch(denoise_dispatch_buf_);
+    pass.dispatch(ray_denoise_dispatch_buf_);
    pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
  }
  for (auto type : IndexRange(3)) {
@ -193,14 +199,63 @@ void RayTraceModule::sync()
    pass.bind_image("out_radiance_img", &denoised_bilateral_tx_);
    pass.bind_image("in_variance_img", &denoise_variance_tx_);
    pass.bind_image("tile_mask_img", &tile_mask_tx_);
-    pass.bind_ssbo("tiles_coord_buf", &denoise_tiles_buf_);
+    pass.bind_ssbo("tiles_coord_buf", &ray_denoise_tiles_buf_);
    inst_.bind_uniform_data(&pass);
    inst_.sampling.bind_resources(pass);
    inst_.gbuffer.bind_resources(pass);
-    pass.dispatch(denoise_dispatch_buf_);
+    pass.dispatch(ray_denoise_dispatch_buf_);
+    pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
+  }
+  {
+    PassSimple &pass = horizon_setup_ps_;
+    pass.init();
+    pass.shader_set(inst_.shaders.static_shader_get(HORIZON_SETUP));
+    inst_.bind_uniform_data(&pass);
+    pass.bind_texture("depth_tx", &depth_tx);
+    pass.bind_texture("in_radiance_tx", &screen_radiance_tx_, GPUSamplerState::default_sampler());
+    pass.bind_image("out_radiance_img", &downsampled_in_radiance_tx_);
+    pass.bind_image("out_normal_img", &downsampled_in_normal_tx_);
+    inst_.bind_uniform_data(&pass);
+    inst_.gbuffer.bind_resources(pass);
+    pass.dispatch(&tracing_dispatch_size_);
+    pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
+  }
+  for (auto type : IndexRange(3)) {
+    PassSimple &pass = PASS_VARIATION(horizon_scan_, type, _ps_);
+    pass.init();
+    pass.shader_set(inst_.shaders.static_shader_get(SHADER_VARIATION(HORIZON_SCAN_, type)));
+    pass.bind_image("horizon_radiance_img", &horizon_radiance_tx_);
+    pass.bind_image("horizon_occlusion_img", &horizon_occlusion_tx_);
+    pass.bind_ssbo("tiles_coord_buf", &horizon_tiles_buf_);
+    pass.bind_texture("screen_radiance_tx", &downsampled_in_radiance_tx_);
+    pass.bind_texture("screen_normal_tx", &downsampled_in_normal_tx_);
+    pass.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
+    inst_.bind_uniform_data(&pass);
+    inst_.hiz_buffer.bind_resources(pass);
+    inst_.sampling.bind_resources(pass);
+    inst_.gbuffer.bind_resources(pass);
+    pass.dispatch(horizon_dispatch_buf_);
+    pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
+  }
+  {
+    PassSimple &pass = horizon_denoise_ps_;
+    pass.init();
+    pass.shader_set(inst_.shaders.static_shader_get(HORIZON_DENOISE));
+    inst_.bind_uniform_data(&pass);
+    pass.bind_texture("depth_tx", &depth_tx);
+    pass.bind_image("horizon_radiance_img", &horizon_radiance_tx_);
+    pass.bind_image("horizon_occlusion_img", &horizon_occlusion_tx_);
+    pass.bind_image("radiance_img", &horizon_scan_output_tx_);
+    pass.bind_image("tile_mask_img", &tile_mask_tx_);
+    pass.bind_ssbo("tiles_coord_buf", &horizon_denoise_tiles_buf_);
+    inst_.bind_uniform_data(&pass);
+    inst_.sampling.bind_resources(pass);
+    inst_.gbuffer.bind_resources(pass);
+    inst_.irradiance_cache.bind_resources(pass);
+    inst_.reflection_probes.bind_resources(pass);
+    pass.dispatch(horizon_denoise_dispatch_buf_);
    pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
  }
-
 #undef SHADER_VARIATION
 #undef PASS_VARIATION
 }
@ -236,6 +291,7 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
  PassSimple *trace_ray_ps = nullptr;
  PassSimple *denoise_spatial_ps = nullptr;
  PassSimple *denoise_bilateral_ps = nullptr;
+  PassSimple *horizon_scan_ps = nullptr;
  RayTraceBuffer::DenoiseBuffer *denoise_buf = nullptr;

  if (raytrace_closure == CLOSURE_DIFFUSE) {
@ -245,6 +301,7 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
    denoise_spatial_ps = &denoise_spatial_diffuse_ps_;
    denoise_bilateral_ps = &denoise_bilateral_diffuse_ps_;
    denoise_buf = &rt_buffer.diffuse;
+    horizon_scan_ps = &horizon_scan_diffuse_ps_;
  }
  else if (raytrace_closure == CLOSURE_REFLECTION) {
    options = reflection_options_;
@ -253,6 +310,7 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
    denoise_spatial_ps = &denoise_spatial_reflect_ps_;
    denoise_bilateral_ps = &denoise_bilateral_reflect_ps_;
    denoise_buf = &rt_buffer.reflection;
+    horizon_scan_ps = &horizon_scan_reflect_ps_;
  }
  else if (raytrace_closure == CLOSURE_REFRACTION) {
    options = refraction_options_;
@ -261,6 +319,7 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
    denoise_spatial_ps = &denoise_spatial_refract_ps_;
    denoise_bilateral_ps = &denoise_bilateral_refract_ps_;
    denoise_buf = &rt_buffer.refraction;
+    horizon_scan_ps = &horizon_scan_refract_ps_;
  }

  if ((active_closures & raytrace_closure) == 0) {
@ -278,6 +337,8 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
  const int2 tracing_res = math::divide_ceil(extent, int2(resolution_scale));
  const int2 dummy_extent(1, 1);

+  tracing_dispatch_size_ = int3(math::divide_ceil(tracing_res, int2(RAYTRACE_GROUP_SIZE)), 1);
+
  tile_classify_dispatch_size_ = int3(math::divide_ceil(extent, int2(RAYTRACE_GROUP_SIZE)), 1);
  const int denoise_tile_count = tile_classify_dispatch_size_.x * tile_classify_dispatch_size_.y;
  const int2 tile_mask_extent = tile_classify_dispatch_size_.xy();
@ -289,20 +350,25 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
  renderbuf_stencil_view_ = inst_.render_buffers.depth_tx.stencil_view();
  renderbuf_depth_view_ = inst_.render_buffers.depth_tx;

-  bool use_denoise = (options.flag & RAYTRACE_EEVEE_USE_DENOISE);
-  bool use_spatial_denoise = (options.denoise_stages & RAYTRACE_EEVEE_DENOISE_SPATIAL) &&
-                             use_denoise;
-  bool use_temporal_denoise = (options.denoise_stages & RAYTRACE_EEVEE_DENOISE_TEMPORAL) &&
-                              use_spatial_denoise;
-  bool use_bilateral_denoise = (options.denoise_stages & RAYTRACE_EEVEE_DENOISE_BILATERAL) &&
-                               use_temporal_denoise;
+  const bool use_denoise = (options.flag & RAYTRACE_EEVEE_USE_DENOISE);
+  const bool use_spatial_denoise = (options.denoise_stages & RAYTRACE_EEVEE_DENOISE_SPATIAL) &&
+                                   use_denoise;
+  const bool use_temporal_denoise = (options.denoise_stages & RAYTRACE_EEVEE_DENOISE_TEMPORAL) &&
+                                    use_spatial_denoise;
+  const bool use_bilateral_denoise = (options.denoise_stages & RAYTRACE_EEVEE_DENOISE_BILATERAL) &&
+                                     use_temporal_denoise;
+  const bool use_horizon_scan = true;

  DRW_stats_group_start("Raytracing");

  data_.thickness = options.screen_trace_thickness;
  data_.quality = 1.0f - 0.95f * options.screen_trace_quality;
  data_.brightness_clamp = (options.sample_clamp > 0.0) ? options.sample_clamp : 1e20;
-  data_.max_trace_roughness = 1.0f;
+
+  float roughness_mask_start = options.screen_trace_max_roughness;
+  float roughness_mask_fade = 0.2f;
+  data_.roughness_mask_scale = 1.0 / roughness_mask_fade;
+  data_.roughness_mask_bias = data_.roughness_mask_scale * roughness_mask_start;

  data_.resolution_scale = resolution_scale;
  data_.closure_active = raytrace_closure;
@ -315,8 +381,10 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
  inst_.push_uniform_data();

  tile_mask_tx_.acquire(tile_mask_extent, RAYTRACE_TILEMASK_FORMAT);
-  denoise_tiles_buf_.resize(ceil_to_multiple_u(denoise_tile_count, 512));
+  horizon_tiles_buf_.resize(ceil_to_multiple_u(ray_tile_count, 512));
+  horizon_denoise_tiles_buf_.resize(ceil_to_multiple_u(denoise_tile_count, 512));
  ray_tiles_buf_.resize(ceil_to_multiple_u(ray_tile_count, 512));
+  ray_denoise_tiles_buf_.resize(ceil_to_multiple_u(denoise_tile_count, 512));

  /* Ray setup. */
  inst_.manager->submit(tile_classify_ps_);
@ -372,8 +440,6 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,

    inst_.manager->submit(denoise_temporal_ps_, render_view);

-    /* Swap after last use. */
-    TextureFromPool::swap(tile_mask_tx_, denoise_buf->tilemask_history_tx);
    /* Save view-projection matrix for next reprojection. */
    denoise_buf->history_persmat = main_view.persmat();
    /* Radiance will be swapped with history in #RayTraceResult::release().
@ -393,13 +459,10 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
  if (use_bilateral_denoise) {
    denoise_buf->denoised_bilateral_tx.acquire(extent, RAYTRACE_RADIANCE_FORMAT);
    denoised_bilateral_tx_ = denoise_buf->denoised_bilateral_tx;
-    /* Swap back for one last use. */
-    TextureFromPool::swap(tile_mask_tx_, denoise_buf->tilemask_history_tx);

    inst_.manager->submit(*denoise_bilateral_ps, render_view);

    /* Swap after last use. */
-    TextureFromPool::swap(tile_mask_tx_, denoise_buf->tilemask_history_tx);
    TextureFromPool::swap(denoise_buf->denoised_temporal_tx, denoise_buf->radiance_history_tx);
    TextureFromPool::swap(denoise_variance_tx_, denoise_buf->variance_history_tx);

@ -408,9 +471,37 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
    denoise_buf->denoised_temporal_tx.release();
  }

-  tile_mask_tx_.release();
  denoise_variance_tx_.release();

+  if (use_horizon_scan) {
+    downsampled_in_radiance_tx_.acquire(tracing_res, RAYTRACE_RADIANCE_FORMAT);
+    downsampled_in_normal_tx_.acquire(tracing_res, GPU_RGBA8);
+
+    inst_.manager->submit(horizon_setup_ps_, render_view);
+
+    horizon_occlusion_tx_.acquire(tracing_res, GPU_R8);
+    horizon_radiance_tx_.acquire(tracing_res, RAYTRACE_RADIANCE_FORMAT);
+
+    inst_.manager->submit(*horizon_scan_ps, render_view);
+
+    downsampled_in_radiance_tx_.release();
+    downsampled_in_normal_tx_.release();
+
+    horizon_scan_output_tx_ = result.get();
+
+    inst_.manager->submit(horizon_denoise_ps_, render_view);
+
+    horizon_occlusion_tx_.release();
+    horizon_radiance_tx_.release();
+  }
+
+  tile_mask_tx_.release();
+
+  if (tile_mask_tx_.is_valid()) {
+    /* Swap after last use. */
+    TextureFromPool::swap(tile_mask_tx_, denoise_buf->tilemask_history_tx);
+  }
+
  DRW_stats_group_end();

  return result;
--- a/source/blender/draw/engines/eevee_next/eevee_raytrace.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_raytrace.hh
@ -117,26 +117,49 @@ class RayTraceModule {
  draw::PassSimple denoise_bilateral_diffuse_ps_ = {"DenoiseBilateral.Diffuse"};
  draw::PassSimple denoise_bilateral_reflect_ps_ = {"DenoiseBilateral.Reflection"};
  draw::PassSimple denoise_bilateral_refract_ps_ = {"DenoiseBilateral.Refraction"};
+  draw::PassSimple horizon_setup_ps_ = {"HorizonScan.Setup"};
+  draw::PassSimple horizon_scan_diffuse_ps_ = {"HorizonScan.Diffuse"};
+  draw::PassSimple horizon_scan_reflect_ps_ = {"HorizonScan.Reflection"};
+  draw::PassSimple horizon_scan_refract_ps_ = {"HorizonScan.Refraction"};
+  draw::PassSimple horizon_denoise_ps_ = {"HorizonScan.Denoise"};

  /** Dispatch with enough tiles for the whole screen. */
  int3 tile_classify_dispatch_size_ = int3(1);
  /** Dispatch with enough tiles for the tile mask. */
  int3 tile_compact_dispatch_size_ = int3(1);
+  /** Dispatch with enough tiles for the tracing resolution. */
+  int3 tracing_dispatch_size_ = int3(1);
  /** 2D tile mask to check which unused adjacent tile we need to clear. */
  TextureFromPool tile_mask_tx_ = {"tile_mask_tx"};
  /** Indirect dispatch rays. Avoid dispatching work-groups that will not trace anything.*/
  DispatchIndirectBuf ray_dispatch_buf_ = {"ray_dispatch_buf_"};
  /** Indirect dispatch denoise full-resolution tiles. */
-  DispatchIndirectBuf denoise_dispatch_buf_ = {"denoise_dispatch_buf_"};
+  DispatchIndirectBuf ray_denoise_dispatch_buf_ = {"ray_denoise_dispatch_buf_"};
+  /** Indirect dispatch horizon scan. Avoid dispatching work-groups that will not scan anything.*/
+  DispatchIndirectBuf horizon_dispatch_buf_ = {"horizon_dispatch_buf_"};
+  /** Indirect dispatch denoise full-resolution tiles. */
+  DispatchIndirectBuf horizon_denoise_dispatch_buf_ = {"horizon_denoise_dispatch_buf_"};
+  /** Pointer to the texture to store the result of horizon scan in. */
+  GPUTexture *horizon_scan_output_tx_ = nullptr;
  /** Tile buffer that contains tile coordinates. */
  RayTraceTileBuf ray_tiles_buf_ = {"ray_tiles_buf_"};
-  RayTraceTileBuf denoise_tiles_buf_ = {"denoise_tiles_buf_"};
+  RayTraceTileBuf ray_denoise_tiles_buf_ = {"ray_denoise_tiles_buf_"};
+  RayTraceTileBuf horizon_tiles_buf_ = {"horizon_tiles_buf_"};
+  RayTraceTileBuf horizon_denoise_tiles_buf_ = {"horizon_denoise_tiles_buf_"};
  /** Texture containing the ray direction and PDF. */
  TextureFromPool ray_data_tx_ = {"ray_data_tx"};
  /** Texture containing the ray hit time. */
  TextureFromPool ray_time_tx_ = {"ray_data_tx"};
  /** Texture containing the ray hit radiance (tracing-res). */
  TextureFromPool ray_radiance_tx_ = {"ray_radiance_tx"};
+  /** Texture containing the horizon visibility mask. */
+  TextureFromPool horizon_occlusion_tx_ = {"horizon_occlusion_tx_"};
+  /** Texture containing the horizon local radiance. */
+  TextureFromPool horizon_radiance_tx_ = {"horizon_radiance_tx_"};
+  /** Texture containing the input screen radiance but re-projected. */
+  TextureFromPool downsampled_in_radiance_tx_ = {"downsampled_in_radiance_tx_"};
+  /** Texture containing the view space normal. The BSDF normal is arbitrarily chosen. */
+  TextureFromPool downsampled_in_normal_tx_ = {"downsampled_in_normal_tx_"};
  /** Textures containing the ray hit radiance denoised (full-res). One of them is result_tx. */
  GPUTexture *denoised_spatial_tx_ = nullptr;
  GPUTexture *denoised_temporal_tx_ = nullptr;
--- a/source/blender/draw/engines/eevee_next/eevee_renderbuffers.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_renderbuffers.cc
@ -50,6 +50,7 @@ void RenderBuffers::sync()
  data.environment_id = pass_index_get(EEVEE_RENDER_PASS_ENVIRONMENT);
  data.shadow_id = pass_index_get(EEVEE_RENDER_PASS_SHADOW);
  data.ambient_occlusion_id = pass_index_get(EEVEE_RENDER_PASS_AO);
+  data.transparent_id = pass_index_get(EEVEE_RENDER_PASS_TRANSPARENT);

  data.aovs = inst_.film.aovs_info;
 }
--- a/source/blender/draw/engines/eevee_next/eevee_shader.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc
@ -106,6 +106,16 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_
      return "eevee_hiz_update";
    case HIZ_UPDATE_LAYER:
      return "eevee_hiz_update_layer";
+    case HORIZON_DENOISE:
+      return "eevee_horizon_denoise";
+    case HORIZON_SCAN_DIFFUSE:
+      return "eevee_horizon_scan_diffuse";
+    case HORIZON_SCAN_REFLECT:
+      return "eevee_horizon_scan_reflect";
+    case HORIZON_SCAN_REFRACT:
+      return "eevee_horizon_scan_refract";
+    case HORIZON_SETUP:
+      return "eevee_horizon_setup";
    case MOTION_BLUR_GATHER:
      return "eevee_motion_blur_gather";
    case MOTION_BLUR_TILE_DILATE:
@ -311,8 +321,10 @@ void ShaderModule::material_create_info_ammend(GPUMaterial *gpumat, GPUCodegenOu

  eMaterialPipeline pipeline_type;
  eMaterialGeometry geometry_type;
+  eMaterialDisplacement displacement_type;
  bool transparent_shadows;
-  material_type_from_shader_uuid(shader_uuid, pipeline_type, geometry_type, transparent_shadows);
+  material_type_from_shader_uuid(
+      shader_uuid, pipeline_type, geometry_type, displacement_type, transparent_shadows);

  GPUCodegenOutput &codegen = *codegen_;
  ShaderCreateInfo &info = *reinterpret_cast<ShaderCreateInfo *>(codegen.create_info);
@ -491,17 +503,18 @@ void ShaderModule::material_create_info_ammend(GPUMaterial *gpumat, GPUCodegenOu
  }

  if (!is_compute) {
-    if (!ELEM(geometry_type,
-              MAT_GEOM_WORLD,
-              MAT_GEOM_VOLUME_WORLD,
-              MAT_GEOM_VOLUME_OBJECT,
-              MAT_GEOM_VOLUME))
-    {
-      vert_gen << "vec3 nodetree_displacement()\n";
-      vert_gen << "{\n";
-      vert_gen << ((codegen.displacement) ? codegen.displacement : "return vec3(0);\n");
-      vert_gen << "}\n\n";
-    }
+    const bool use_vertex_displacement = (codegen.displacement != nullptr) &&
+                                         (displacement_type != MAT_DISPLACEMENT_BUMP) &&
+                                         (!ELEM(geometry_type,
+                                                MAT_GEOM_WORLD,
+                                                MAT_GEOM_VOLUME_WORLD,
+                                                MAT_GEOM_VOLUME_OBJECT,
+                                                MAT_GEOM_VOLUME));
+
+    vert_gen << "vec3 nodetree_displacement()\n";
+    vert_gen << "{\n";
+    vert_gen << ((use_vertex_displacement) ? codegen.displacement : "return vec3(0);\n");
+    vert_gen << "}\n\n";

    info.vertex_source_generated = vert_gen.str();
  }
@ -644,8 +657,10 @@ GPUMaterial *ShaderModule::material_shader_get(::Material *blender_mat,
 {
  bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_MATERIAL, MAT_PIPE_VOLUME_OCCUPANCY);

+  eMaterialDisplacement displacement_type = to_displacement_type(blender_mat->displacement_method);
+
  uint64_t shader_uuid = shader_uuid_from_material_type(
-      pipeline_type, geometry_type, blender_mat->blend_flag);
+      pipeline_type, geometry_type, displacement_type, blender_mat->blend_flag);

  return DRW_shader_from_material(
      blender_mat, nodetree, shader_uuid, is_volume, deferred_compilation, codegen_callback, this);
@ -660,7 +675,7 @@ GPUMaterial *ShaderModule::world_shader_get(::World *blender_world,

  eMaterialGeometry geometry_type = is_volume ? MAT_GEOM_VOLUME_WORLD : MAT_GEOM_WORLD;

-  uint64_t shader_uuid = shader_uuid_from_material_type(pipeline_type, geometry_type, 0);
+  uint64_t shader_uuid = shader_uuid_from_material_type(pipeline_type, geometry_type);

  return DRW_shader_from_world(
      blender_world, nodetree, shader_uuid, is_volume, defer_compilation, codegen_callback, this);
@ -672,10 +687,9 @@ GPUMaterial *ShaderModule::material_shader_get(const char *name,
                                               ListBase &materials,
                                               bNodeTree *nodetree,
                                               eMaterialPipeline pipeline_type,
-                                               eMaterialGeometry geometry_type,
-                                               bool is_lookdev)
+                                               eMaterialGeometry geometry_type)
 {
-  uint64_t shader_uuid = shader_uuid_from_material_type(pipeline_type, geometry_type, 0);
+  uint64_t shader_uuid = shader_uuid_from_material_type(pipeline_type, geometry_type);

  bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_MATERIAL, MAT_PIPE_VOLUME_OCCUPANCY);

@ -686,10 +700,10 @@ GPUMaterial *ShaderModule::material_shader_get(const char *name,
                                                   name,
                                                   shader_uuid,
                                                   is_volume,
-                                                   is_lookdev,
+                                                   false,
                                                   codegen_callback,
                                                   this);
-  GPU_material_status_set(gpumat, GPU_MAT_QUEUED);
+  GPU_material_status_set(gpumat, GPU_MAT_CREATED);
  GPU_material_compile(gpumat);
  /* Queue deferred material optimization. */
  DRW_shader_queue_optimize_material(gpumat);
--- a/source/blender/draw/engines/eevee_next/eevee_shader.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader.hh
@ -66,6 +66,12 @@ enum eShaderType {
  HIZ_UPDATE_LAYER,
  HIZ_DEBUG,

+  HORIZON_DENOISE,
+  HORIZON_SCAN_DIFFUSE,
+  HORIZON_SCAN_REFLECT,
+  HORIZON_SCAN_REFRACT,
+  HORIZON_SETUP,
+
  LIGHT_CULLING_DEBUG,
  LIGHT_CULLING_SELECT,
  LIGHT_CULLING_SORT,
@ -167,8 +173,7 @@ class ShaderModule {
                                   ListBase &materials,
                                   bNodeTree *nodetree,
                                   eMaterialPipeline pipeline_type,
-                                   eMaterialGeometry geometry_type,
-                                   bool is_lookdev);
+                                   eMaterialGeometry geometry_type);

  void material_create_info_ammend(GPUMaterial *mat, GPUCodegenOutput *codegen);

--- a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
@ -268,9 +268,9 @@ struct FilmData {
  /** Is true if accumulation of filtered passes is needed. */
  bool1 any_render_pass_1;
  bool1 any_render_pass_2;
+  bool1 any_render_pass_3;
  /** Controlled by user in lookdev mode or by render settings. */
  float background_opacity;
-  float _pad0, _pad1;
  /** Output counts per type. */
  int color_len, value_len;
  /** Index in color_accum_img or value_accum_img of each pass. -1 if pass is not enabled. */
@ -287,6 +287,7 @@ struct FilmData {
  int environment_id;
  int shadow_id;
  int ambient_occlusion_id;
+  int transparent_id;
  /** Not indexed but still not -1 if enabled. */
  int depth_id;
  int combined_id;
@ -376,11 +377,12 @@ struct RenderBuffersInfoData {
  int volume_light_id;
  int emission_id;
  int environment_id;
+  int transparent_id;
  /* Value */
  int value_len;
  int shadow_id;
  int ambient_occlusion_id;
-  int _pad0, _pad1, _pad2;
+  int _pad0, _pad1;
 };
 BLI_STATIC_ASSERT_ALIGN(RenderBuffersInfoData, 16)

@ -1200,14 +1202,14 @@ struct RayTraceData {
  /** Maximum brightness during lighting evaluation. */
  float brightness_clamp;
  /** Maximum roughness for which we will trace a ray. */
-  float max_trace_roughness;
+  float roughness_mask_scale;
+  float roughness_mask_bias;
  /** If set to true will bypass spatial denoising. */
  bool1 skip_denoise;
  /** Closure being ray-traced. */
  eClosureBits closure_active;
  int _pad0;
  int _pad1;
-  int _pad2;
 };
 BLI_STATIC_ASSERT_ALIGN(RayTraceData, 16)

--- a/source/blender/draw/engines/eevee_next/eevee_view.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_view.cc
@ -28,11 +28,7 @@ namespace blender::eevee {
 /** \name ShadingView
 * \{ */

-void ShadingView::init()
-{
-  // dof_.init();
-  // mb_.init();
-}
+void ShadingView::init() {}

 void ShadingView::sync()
 {
@ -59,31 +55,21 @@ void ShadingView::sync()
  const CameraData &cam = inst_.camera.data_get();

  float4x4 viewmat, winmat;
-  const float(*viewmat_p)[4] = viewmat.ptr(), (*winmat_p)[4] = winmat.ptr();
  if (false /* inst_.camera.is_panoramic() */) {
    /* TODO(@fclem) Over-scans. */
    /* For now a mandatory 5% over-scan for DoF. */
    float side = cam.clip_near * 1.05f;
    float near = cam.clip_near;
    float far = cam.clip_far;
-    perspective_m4(winmat.ptr(), -side, side, -side, side, near, far);
+    winmat = math::projection::perspective(-side, side, -side, side, near, far);
    viewmat = face_matrix_ * cam.viewmat;
  }
  else {
-    viewmat_p = cam.viewmat.ptr();
-    winmat_p = cam.winmat.ptr();
+    viewmat = cam.viewmat;
+    winmat = cam.winmat;
  }

-  main_view_ = DRW_view_create(viewmat_p, winmat_p, nullptr, nullptr, nullptr);
-  sub_view_ = DRW_view_create_sub(main_view_, viewmat_p, winmat_p);
-  render_view_ = DRW_view_create_sub(main_view_, viewmat_p, winmat_p);
-
-  // dof_.sync(winmat_p, extent_);
-  // rt_buffer_opaque_.sync(extent_);
-  // rt_buffer_refract_.sync(extent_);
-  // inst_.hiz_back.view_sync(extent_);
-  // inst_.hiz_front.view_sync(extent_);
-  // inst_.gbuffer.view_sync(extent_);
+  main_view_.sync(viewmat, winmat);
 }

 void ShadingView::render()
@ -103,9 +89,8 @@ void ShadingView::render()
  update_view();

  DRW_stats_group_start(name_);
-  DRW_view_set_active(render_view_);

-  inst_.planar_probes.set_view(render_view_new_, extent_);
+  inst_.planar_probes.set_view(render_view_, extent_);

  /* If camera has any motion, compute motion vector in the film pass. Otherwise, we avoid float
   * precision issue by setting the motion of all static geometry to 0. */
@ -121,42 +106,42 @@ void ShadingView::render()
  inst_.hiz_buffer.set_source(&inst_.render_buffers.depth_tx);
  inst_.hiz_buffer.set_dirty();

-  inst_.pipelines.background.render(render_view_new_);
+  inst_.pipelines.background.render(render_view_);

  /* TODO(fclem): Move it after the first prepass (and hiz update) once pipeline is stabilized. */
-  inst_.lights.set_view(render_view_new_, extent_);
-  inst_.reflection_probes.set_view(render_view_new_);
+  inst_.lights.set_view(render_view_, extent_);
+  inst_.reflection_probes.set_view(render_view_);

-  inst_.volume.draw_prepass(render_view_new_);
+  inst_.volume.draw_prepass(render_view_);

-  /* TODO: cleanup. */
-  View main_view_new("MainView", main_view_);
  /* TODO(Miguel Pozo): Deferred and forward prepass should happen before the GBuffer pass. */
-  inst_.pipelines.deferred.render(main_view_new,
-                                  render_view_new_,
+  inst_.pipelines.deferred.render(main_view_,
+                                  render_view_,
                                  prepass_fb_,
                                  combined_fb_,
                                  extent_,
                                  rt_buffer_opaque_,
                                  rt_buffer_refract_);

-  inst_.volume.draw_compute(render_view_new_);
+  inst_.volume.draw_compute(render_view_);

  // inst_.lookdev.render_overlay(view_fb_);

-  inst_.pipelines.forward.render(render_view_new_, prepass_fb_, combined_fb_, rbufs.combined_tx);
+  inst_.pipelines.forward.render(render_view_, prepass_fb_, combined_fb_);

-  inst_.lights.debug_draw(render_view_new_, combined_fb_);
-  inst_.hiz_buffer.debug_draw(render_view_new_, combined_fb_);
-  inst_.shadows.debug_draw(render_view_new_, combined_fb_);
-  inst_.irradiance_cache.viewport_draw(render_view_new_, combined_fb_);
-  inst_.reflection_probes.viewport_draw(render_view_new_, combined_fb_);
-  inst_.planar_probes.viewport_draw(render_view_new_, combined_fb_);
+  render_transparent_pass(rbufs);

-  inst_.ambient_occlusion.render_pass(render_view_new_);
+  inst_.lights.debug_draw(render_view_, combined_fb_);
+  inst_.hiz_buffer.debug_draw(render_view_, combined_fb_);
+  inst_.shadows.debug_draw(render_view_, combined_fb_);
+  inst_.irradiance_cache.viewport_draw(render_view_, combined_fb_);
+  inst_.reflection_probes.viewport_draw(render_view_, combined_fb_);
+  inst_.planar_probes.viewport_draw(render_view_, combined_fb_);
+
+  inst_.ambient_occlusion.render_pass(render_view_);

  GPUTexture *combined_final_tx = render_postfx(rbufs.combined_tx);
-  inst_.film.accumulate(sub_view_, combined_final_tx);
+  inst_.film.accumulate(jitter_view_, combined_final_tx);

  rbufs.release();
  postfx_tx_.release();
@ -164,6 +149,20 @@ void ShadingView::render()
  DRW_stats_group_end();
 }

+void ShadingView::render_transparent_pass(RenderBuffers &rbufs)
+{
+  if (rbufs.data.transparent_id != -1) {
+    transparent_fb_.ensure(
+        GPU_ATTACHMENT_TEXTURE(rbufs.depth_tx),
+        GPU_ATTACHMENT_TEXTURE_LAYER(rbufs.rp_color_tx, rbufs.data.transparent_id));
+    /* Alpha stores transmittance. So start at 1. */
+    float4 clear_color = {0.0f, 0.0f, 0.0f, 1.0f};
+    GPU_framebuffer_bind(transparent_fb_);
+    GPU_framebuffer_clear_color(transparent_fb_, clear_color);
+    inst_.pipelines.forward.render(render_view_, prepass_fb_, transparent_fb_);
+  }
+}
+
 GPUTexture *ShadingView::render_postfx(GPUTexture *input_tx)
 {
  if (!inst_.depth_of_field.postfx_enabled() && !inst_.motion_blur.postfx_enabled()) {
@ -174,17 +173,16 @@ GPUTexture *ShadingView::render_postfx(GPUTexture *input_tx)
  GPUTexture *output_tx = postfx_tx_;

  /* Swapping is done internally. Actual output is set to the next input. */
-  inst_.depth_of_field.render(render_view_new_, &input_tx, &output_tx, dof_buffer_);
-  inst_.motion_blur.render(render_view_new_, &input_tx, &output_tx);
+  inst_.depth_of_field.render(render_view_, &input_tx, &output_tx, dof_buffer_);
+  inst_.motion_blur.render(render_view_, &input_tx, &output_tx);

  return input_tx;
 }

 void ShadingView::update_view()
 {
-  float4x4 viewmat, winmat;
-  DRW_view_viewmat_get(main_view_, viewmat.ptr(), false);
-  DRW_view_winmat_get(main_view_, winmat.ptr(), false);
+  float4x4 viewmat = main_view_.viewmat();
+  float4x4 winmat = main_view_.winmat();

  /* TODO(fclem): Mixed-resolution rendering: We need to make sure we render with exactly the same
   * distances between pixels to line up render samples and target pixels.
@ -197,14 +195,12 @@ void ShadingView::update_view()
  jitter *= 2.0f;

  window_translate_m4(winmat.ptr(), winmat.ptr(), UNPACK2(jitter));
-  DRW_view_update_sub(sub_view_, viewmat.ptr(), winmat.ptr());
+  jitter_view_.sync(winmat, winmat);

  /* FIXME(fclem): The offset may be noticeably large and the culling might make object pop
   * out of the blurring radius. To fix this, use custom enlarged culling matrix. */
  inst_.depth_of_field.jitter_apply(winmat, viewmat);
-  DRW_view_update_sub(render_view_, viewmat.ptr(), winmat.ptr());
-
-  render_view_new_.sync(viewmat, winmat);
+  render_view_.sync(viewmat, winmat);
 }

 /** \} */
--- a/source/blender/draw/engines/eevee_next/eevee_view.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_view.hh
@ -48,16 +48,16 @@ class ShadingView {

  Framebuffer prepass_fb_;
  Framebuffer combined_fb_;
+  Framebuffer transparent_fb_ = {"transparent"};
  TextureFromPool postfx_tx_;

  /** Main views is created from the camera (or is from the viewport). It is not jittered. */
-  DRWView *main_view_ = nullptr;
+  View main_view_ = {"main_view"};
  /** Sub views is jittered versions or the main views. This allows jitter updates without trashing
   * the visibility culling cache. */
-  DRWView *sub_view_ = nullptr;
-  /** Same as sub_view_ but has Depth Of Field jitter applied. */
-  DRWView *render_view_ = nullptr;
-  View render_view_new_;
+  View jitter_view_ = {"jitter_view"};
+  /** Same as jitter_view_ but has Depth Of Field jitter applied. */
+  View render_view_;

  /** Render size of the view. Can change between scene sample eval. */
  int2 extent_ = {-1, -1};
@ -66,7 +66,7 @@ class ShadingView {

 public:
  ShadingView(Instance &inst, const char *name, const float4x4 &face_matrix)
-      : inst_(inst), name_(name), face_matrix_(face_matrix), render_view_new_(name){};
+      : inst_(inst), name_(name), face_matrix_(face_matrix), render_view_(name){};

  ~ShadingView(){};

@ -76,9 +76,11 @@ class ShadingView {

  void render();

+ private:
+  void render_transparent_pass(RenderBuffers &rbufs);
+
  GPUTexture *render_postfx(GPUTexture *input_tx);

- private:
  void update_view();
 };

--- a/source/blender/draw/engines/eevee_next/shaders/eevee_ambient_occlusion_pass_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_ambient_occlusion_pass_comp.glsl
@ -29,21 +29,24 @@ void main()

  vec2 noise;
  noise.x = interlieved_gradient_noise(vec2(texel), 3.0, 0.0);
-  noise.y = utility_tx_fetch(utility_tx, texel, UTIL_BLUE_NOISE_LAYER).r;
+  noise.y = utility_tx_fetch(utility_tx, vec2(texel), UTIL_BLUE_NOISE_LAYER).r;
  noise = fract(noise + sampling_rng_2D_get(SAMPLING_AO_U));

-  vec3 ambient_occlusion = horizon_scan_eval(vP,
-                                             vN,
-                                             hiz_tx,
-                                             noise,
-                                             uniform_buf.ao.pixel_size,
-                                             uniform_buf.ao.distance,
-                                             uniform_buf.ao.thickness,
-                                             uniform_buf.ao.angle_bias,
-                                             10);
+  ClosureOcclusion occlusion;
+  occlusion.N = vN;

-  /* We can have some float imprecision because of the weighted accumulation. */
-  ambient_occlusion = saturate(ambient_occlusion * 1.02);
+  HorizonScanContext ctx;
+  ctx.occlusion = occlusion;

-  imageStore(out_ao_img, ivec3(texel, out_ao_img_layer_index), saturate(ambient_occlusion.rrrr));
+  horizon_scan_eval(vP,
+                    ctx,
+                    noise,
+                    uniform_buf.ao.pixel_size,
+                    uniform_buf.ao.distance,
+                    uniform_buf.ao.thickness,
+                    uniform_buf.ao.angle_bias,
+                    10);
+
+  imageStore(
+      out_ao_img, ivec3(texel, out_ao_img_layer_index), vec4(saturate(ctx.occlusion_result.r)));
 }
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl
@ -759,6 +759,23 @@ void film_process_data(ivec2 texel_film, out vec4 out_color, out float out_depth
    film_store_value(dst, uniform_buf.film.mist_id, mist_accum, out_color);
  }

+  if (uniform_buf.film.any_render_pass_3) {
+    vec4 transparent_accum = vec4(0.0);
+
+    for (int i = 0; i < uniform_buf.film.samples_len; i++) {
+      FilmSample src = film_sample_get(i, texel_film);
+      film_sample_accum(src,
+                        uniform_buf.film.transparent_id,
+                        uniform_buf.render_pass.transparent_id,
+                        rp_color_tx,
+                        transparent_accum);
+    }
+    /* Alpha stores transmittance for transparent pass. */
+    transparent_accum.a = weight_accum - transparent_accum.a;
+
+    film_store_color(dst, uniform_buf.film.transparent_id, transparent_accum, out_color);
+  }
+
  for (int aov = 0; aov < uniform_buf.film.aov_color_len; aov++) {
    vec4 aov_accum = vec4(0.0);

--- a/source/blender/draw/engines/eevee_next/shaders/eevee_horizon_denoise_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_horizon_denoise_comp.glsl
@ -0,0 +1,188 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma BLENDER_REQUIRE(draw_view_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_utildefines_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_math_vector_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_gbuffer_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_lightprobe_eval_lib.glsl)
+
+float bilateral_depth_weight(vec3 center_N, vec3 center_P, vec3 sample_P)
+{
+  vec4 center_plane_eq = vec4(center_N, -dot(center_N, center_P));
+  /* Only compare distance to the center plane formed by the normal. */
+  float depth_delta = dot(center_plane_eq, vec4(sample_P, 1.0));
+  /* TODO(fclem): Scene parameter. This is dependent on scene scale. */
+  const float scale = 10000.0;
+  float weight = exp2(-scale * square(depth_delta));
+  return weight;
+}
+
+float bilateral_spatial_weight(float sigma, vec2 offset_from_center)
+{
+  /* From https://github.com/tranvansang/bilateral-filter/blob/master/fshader.frag */
+  float fac = -1.0 / square(sigma);
+  /* Take two standard deviation. */
+  fac *= 2.0;
+  float weight = exp2(fac * length_squared(offset_from_center));
+  return weight;
+}
+
+float bilateral_normal_weight(vec3 center_N, vec3 sample_N)
+{
+  float facing_ratio = dot(center_N, sample_N);
+  float weight = saturate(pow8f(facing_ratio));
+  return weight;
+}
+
+/* In order to remove some more fireflies, "tone-map" the color samples during the accumulation. */
+vec3 to_accumulation_space(vec3 color)
+{
+  /* This 4 factor is to avoid killing too much energy. */
+  /* TODO(fclem): Parameter? */
+  color /= 4.0;
+  color = color / (1.0 + reduce_add(color));
+  return color;
+}
+vec3 from_accumulation_space(vec3 color)
+{
+  color = color / (1.0 - reduce_add(color));
+  color *= 4.0;
+  return color;
+}
+
+vec3 load_normal(ivec2 texel)
+{
+  GBufferData gbuf = gbuffer_read(gbuf_header_tx, gbuf_closure_tx, gbuf_color_tx, texel);
+
+  /* TODO(fclem): Load preprocessed Normal. */
+  vec3 N = vec3(0.0);
+  if (gbuf.has_diffuse) {
+    N = gbuf.diffuse.N;
+  }
+  if (gbuf.has_reflection) {
+    N = gbuf.reflection.N;
+  }
+  if (gbuf.has_refraction) {
+    N = gbuf.refraction.N;
+  }
+  return N;
+}
+
+void main()
+{
+  const uint tile_size = RAYTRACE_GROUP_SIZE;
+  uvec2 tile_coord = unpackUvec2x16(tiles_coord_buf[gl_WorkGroupID.x]);
+
+  ivec2 texel_fullres = ivec2(gl_LocalInvocationID.xy + tile_coord * tile_size);
+  ivec2 texel = (texel_fullres) / uniform_buf.raytrace.resolution_scale;
+
+  ivec2 extent = textureSize(gbuf_header_tx, 0).xy;
+  if (any(greaterThanEqual(texel_fullres, extent))) {
+    return;
+  }
+
+  vec2 center_uv = (vec2(texel_fullres) + 0.5) * uniform_buf.raytrace.full_resolution_inv;
+  float center_depth = texelFetch(depth_tx, texel_fullres, 0).r;
+  vec3 center_P = drw_point_screen_to_world(vec3(center_uv, center_depth));
+
+  if (center_depth == 1.0) {
+    /* Do not trace for background */
+    return;
+  }
+
+  GBufferData gbuf = gbuffer_read(gbuf_header_tx, gbuf_closure_tx, gbuf_color_tx, texel_fullres);
+
+  uint closure_bits = texelFetch(gbuf_header_tx, texel_fullres, 0).r;
+  if (!flag_test(closure_bits, uniform_buf.raytrace.closure_active)) {
+    return;
+  }
+
+  vec3 center_N = gbuf.diffuse.N;
+  float roughness = 1.0;
+  if (uniform_buf.raytrace.closure_active == eClosureBits(CLOSURE_REFLECTION)) {
+    roughness = gbuf.reflection.roughness;
+    center_N = gbuf.reflection.N;
+  }
+  if (uniform_buf.raytrace.closure_active == eClosureBits(CLOSURE_REFRACTION)) {
+    roughness = 1.0; /* TODO(fclem): Apparent roughness. */
+    center_N = gbuf.refraction.N;
+  }
+
+  float mix_fac = saturate(roughness * uniform_buf.raytrace.roughness_mask_scale -
+                           uniform_buf.raytrace.roughness_mask_bias);
+  bool use_raytrace = mix_fac < 1.0;
+  bool use_horizon = mix_fac > 0.0;
+
+  if (use_horizon == false) {
+    return;
+  }
+
+  vec3 accum_radiance = vec3(0.0);
+  float accum_occlusion = 0.0;
+  float accum_weight = 0.0;
+  for (int x = -1; x <= 1; x++) {
+    for (int y = -1; y <= 1; y++) {
+      ivec2 offset = ivec2(x, y);
+      ivec2 sample_texel = texel + ivec2(x, y);
+      ivec2 sample_texel_fullres = sample_texel * uniform_buf.raytrace.resolution_scale +
+                                   uniform_buf.raytrace.resolution_bias;
+      ivec2 sample_tile = sample_texel_fullres / RAYTRACE_GROUP_SIZE;
+      /* Make sure the sample has been processed and do not contain garbage data. */
+      uint tile_mask = imageLoad(tile_mask_img, sample_tile).r;
+      bool unprocessed_tile = !flag_test(tile_mask, 1u << 1u);
+      if (unprocessed_tile) {
+        continue;
+      }
+
+      float sample_depth = texelFetch(depth_tx, sample_texel_fullres, 0).r;
+      vec2 sample_uv = (vec2(sample_texel_fullres) + 0.5) *
+                       uniform_buf.raytrace.full_resolution_inv;
+      vec3 sample_P = drw_point_screen_to_world(vec3(sample_uv, sample_depth));
+
+      /* Background case. */
+      if (sample_depth == 0.0) {
+        continue;
+      }
+
+      vec3 sample_N = load_normal(sample_texel_fullres);
+
+      float depth_weight = bilateral_depth_weight(center_N, center_P, sample_P);
+      float spatial_weight = bilateral_spatial_weight(1.5, vec2(offset));
+      float normal_weight = bilateral_normal_weight(center_N, sample_N);
+
+      float weight = depth_weight * spatial_weight * normal_weight;
+
+      vec3 radiance = imageLoad(horizon_radiance_img, sample_texel).rgb;
+      /* Do not gather unprocessed pixels. */
+      if (all(equal(radiance, FLT_11_11_10_MAX))) {
+        continue;
+      }
+      float occlusion = imageLoad(horizon_occlusion_img, sample_texel).r;
+      accum_radiance += to_accumulation_space(radiance) * weight;
+      accum_occlusion += occlusion * weight;
+      accum_weight += weight;
+    }
+  }
+  float occlusion = accum_occlusion * safe_rcp(accum_weight);
+  vec3 radiance = from_accumulation_space(accum_radiance * safe_rcp(accum_weight));
+
+  vec3 P = center_P;
+  vec3 N = center_N;
+  vec3 Ng = center_N;
+  vec3 V = drw_world_incident_vector(P);
+  /* Fallback to nearest light-probe. */
+  LightProbeSample samp = lightprobe_load(P, Ng, V);
+  vec3 radiance_probe = spherical_harmonics_evaluate_lambert(N, samp.volume_irradiance);
+  /* Apply missing distant lighting. */
+  radiance += occlusion * radiance_probe;
+
+  vec4 radiance_horizon = vec4(radiance, 0.0);
+  vec4 radiance_raytrace = use_raytrace ? imageLoad(radiance_img, texel_fullres) : vec4(0.0);
+
+  vec4 radiance_mixed = mix(radiance_raytrace, radiance_horizon, mix_fac);
+
+  imageStore(radiance_img, texel_fullres, radiance_mixed);
+}
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_horizon_scan_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_horizon_scan_comp.glsl
@ -0,0 +1,98 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma BLENDER_REQUIRE(draw_view_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_utildefines_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_math_vector_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_gbuffer_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_horizon_scan_eval_lib.glsl)
+
+void main()
+{
+  const uint tile_size = RAYTRACE_GROUP_SIZE;
+  uvec2 tile_coord = unpackUvec2x16(tiles_coord_buf[gl_WorkGroupID.x]);
+  ivec2 texel = ivec2(gl_LocalInvocationID.xy + tile_coord * tile_size);
+
+  ivec2 texel_fullres = texel * uniform_buf.raytrace.resolution_scale +
+                        uniform_buf.raytrace.resolution_bias;
+
+  ivec2 extent = textureSize(gbuf_header_tx, 0).xy;
+  if (any(greaterThanEqual(texel_fullres, extent))) {
+    return;
+  }
+
+  vec2 uv = (vec2(texel_fullres) + 0.5) * uniform_buf.raytrace.full_resolution_inv;
+  float depth = texelFetch(hiz_tx, texel_fullres, 0).r;
+
+  if (depth == 1.0) {
+    /* Do not trace for background */
+    imageStore(horizon_radiance_img, texel, vec4(FLT_11_11_10_MAX, 0.0));
+    return;
+  }
+
+  GBufferData gbuf = gbuffer_read(gbuf_header_tx, gbuf_closure_tx, gbuf_color_tx, texel_fullres);
+
+  HorizonScanContext ctx;
+#ifdef HORIZON_DIFFUSE
+  if (gbuf.has_diffuse == false) {
+    imageStore(horizon_radiance_img, texel, vec4(0.0));
+    return;
+  }
+  vec3 Ng = gbuf.diffuse.N;
+  ctx.diffuse = gbuf.diffuse;
+  ctx.diffuse.N = drw_normal_world_to_view(ctx.diffuse.N);
+#endif
+#ifdef HORIZON_REFLECT
+  if (gbuf.has_reflection == false) {
+    imageStore(horizon_radiance_img, texel, vec4(0.0));
+    return;
+  }
+  vec3 Ng = gbuf.reflection.N;
+  ctx.reflection = gbuf.reflection;
+  ctx.reflection.roughness = max(ctx.reflection.roughness, 0.1);
+  ctx.reflection.N = drw_normal_world_to_view(ctx.reflection.N);
+#endif
+#ifdef HORIZON_REFRACT
+  if (gbuf.has_refraction == false) {
+    imageStore(horizon_radiance_img, texel, vec4(0.0));
+    return;
+  }
+  vec3 Ng = gbuf.refraction.N;
+  ctx.refraction = gbuf.refraction;
+  ctx.refraction.N = drw_normal_world_to_view(ctx.refraction.N);
+#endif
+
+  vec3 vP = drw_point_screen_to_view(vec3(uv, depth));
+
+  vec2 noise = utility_tx_fetch(utility_tx, vec2(texel), UTIL_BLUE_NOISE_LAYER).rg;
+  noise = fract(noise + sampling_rng_2D_get(SAMPLING_AO_U));
+
+  horizon_scan_eval(vP,
+                    ctx,
+                    noise,
+                    uniform_buf.ao.pixel_size,
+                    1.0e16,
+                    uniform_buf.ao.thickness,
+                    uniform_buf.ao.angle_bias,
+                    8);
+
+  float occlusion = 0.0;
+  vec4 radiance = vec4(1.0, 0.0, 1.0, 1.0);
+#ifdef HORIZON_DIFFUSE
+  radiance.rgb = ctx.diffuse_result.rgb;
+  occlusion = ctx.diffuse_result.a;
+#endif
+#ifdef HORIZON_REFLECT
+  radiance.rgb = ctx.reflection_result.rgb;
+  occlusion = ctx.reflection_result.a;
+#endif
+#ifdef HORIZON_REFRACT
+  radiance.rgb = ctx.refraction_result.rgb;
+  occlusion = ctx.refraction_result.a;
+#endif
+
+  imageStore(horizon_radiance_img, texel, radiance);
+  imageStore(horizon_occlusion_img, texel, vec4(occlusion));
+}
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_horizon_scan_eval_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_horizon_scan_eval_lib.glsl
@ -8,12 +8,239 @@
 * This mostly follows the paper:
 * "Screen Space Indirect Lighting with Visibility Bitmask"
 * by Olivier Therrien, Yannick Levesque, Guillaume Gilet
+ *
+ * Expects `screen_radiance_tx` and `screen_normal_tx` to be bound if `HORIZON_OCCLUSION` is not
+ * defined.
 */

 #pragma BLENDER_REQUIRE(common_shape_lib.glsl)
 #pragma BLENDER_REQUIRE(draw_view_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_horizon_scan_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_ray_types_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_codegen_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_bxdf_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_spherical_harmonics_lib.glsl)
+
+#ifdef RAYTRACE_DIFFUSE
+#  define HORIZON_DIFFUSE
+#endif
+#ifdef RAYTRACE_REFLECT
+#  define HORIZON_REFLECT
+#endif
+#ifdef RAYTRACE_REFRACT
+#  define HORIZON_REFRACT
+#endif
+
+vec3 horizon_scan_sample_radiance(vec2 uv)
+{
+#ifndef HORIZON_OCCLUSION
+  return texture(screen_radiance_tx, uv).rgb;
+#else
+  return vec3(0.0);
+#endif
+}
+
+vec3 horizon_scan_sample_normal(vec2 uv)
+{
+#ifndef HORIZON_OCCLUSION
+  return texture(screen_normal_tx, uv).rgb * 2.0 - 1.0;
+#else
+  return vec3(0.0);
+#endif
+}
+
+/* Note: Expects all normals to be in view-space. */
+struct HorizonScanContextCommon {
+  float N_angle;
+  float N_length;
+  uint bitmask;
+  float weight_slice;
+  float weight_accum;
+  vec3 light_slice;
+  vec4 light_accum;
+};
+
+struct HorizonScanContext {
+#ifdef HORIZON_OCCLUSION
+  ClosureOcclusion occlusion;
+  HorizonScanContextCommon occlusion_common;
+  vec4 occlusion_result;
+#endif
+#ifdef HORIZON_DIFFUSE
+  ClosureDiffuse diffuse;
+  HorizonScanContextCommon diffuse_common;
+  vec4 diffuse_result;
+#endif
+#ifdef HORIZON_REFLECT
+  ClosureReflection reflection;
+  HorizonScanContextCommon reflection_common;
+  vec4 reflection_result;
+#endif
+#ifdef HORIZON_REFRACT
+  ClosureRefraction refraction;
+  HorizonScanContextCommon refraction_common;
+  vec4 refraction_result;
+#endif
+};
+
+void horizon_scan_context_accumulation_reset(inout HorizonScanContext context)
+{
+#ifdef HORIZON_OCCLUSION
+  context.occlusion_common.light_accum = vec4(0.0);
+  context.occlusion_common.weight_accum = 0.0;
+#endif
+#ifdef HORIZON_DIFFUSE
+  context.diffuse_common.light_accum = vec4(0.0);
+  context.diffuse_common.weight_accum = 0.0;
+#endif
+#ifdef HORIZON_REFLECT
+  context.reflection_common.light_accum = vec4(0.0);
+  context.reflection_common.weight_accum = 0.0;
+#endif
+#ifdef HORIZON_REFRACT
+  context.refraction_common.light_accum = vec4(0.0);
+  context.refraction_common.weight_accum = 0.0;
+#endif
+}
+
+void horizon_scan_context_slice_start(
+    inout HorizonScanContextCommon context, vec3 vN, vec3 vV, vec3 vT, vec3 vB)
+{
+  context.bitmask = 0u;
+  context.weight_slice = 0.0;
+  context.light_slice = vec3(0.0);
+  horizon_scan_projected_normal_to_plane_angle_and_length(
+      vN, vV, vT, vB, context.N_length, context.N_angle);
+}
+
+void horizon_scan_context_slice_start(inout HorizonScanContext context, vec3 vV, vec3 vT, vec3 vB)
+{
+#ifdef HORIZON_OCCLUSION
+  horizon_scan_context_slice_start(context.occlusion_common, context.occlusion.N, vV, vT, vB);
+#endif
+#ifdef HORIZON_DIFFUSE
+  horizon_scan_context_slice_start(context.diffuse_common, context.diffuse.N, vV, vT, vB);
+#endif
+#ifdef HORIZON_REFLECT
+  horizon_scan_context_slice_start(context.reflection_common, context.reflection.N, vV, vT, vB);
+#endif
+#ifdef HORIZON_REFRACT
+  horizon_scan_context_slice_start(context.refraction_common, context.refraction.N, vV, vT, vB);
+#endif
+}
+
+void horizon_scan_context_sample_finish(inout HorizonScanContextCommon context,
+                                        vec3 sample_radiance,
+                                        float sample_weight,
+                                        vec2 sample_theta,
+                                        float angle_bias)
+{
+  /* Angular bias shrinks the visibility bitmask around the projected normal. */
+  sample_theta = (sample_theta - context.N_angle) * angle_bias;
+  uint sample_bitmask = horizon_scan_angles_to_bitmask(sample_theta);
+  sample_weight *= horizon_scan_bitmask_to_visibility_uniform(sample_bitmask & ~context.bitmask);
+
+  context.weight_slice += sample_weight;
+  context.light_slice += sample_radiance * sample_weight;
+  context.bitmask |= sample_bitmask;
+}
+
+float bxdf_eval(ClosureDiffuse closure, vec3 L, vec3 V)
+{
+  return bsdf_lambert(closure.N, L);
+}
+
+float bxdf_eval(ClosureReflection closure, vec3 L, vec3 V)
+{
+  return bsdf_ggx(closure.N, L, V, closure.roughness);
+}
+
+float bxdf_eval(ClosureRefraction closure, vec3 L, vec3 V)
+{
+  return btdf_ggx(closure.N, L, V, closure.roughness, closure.ior);
+}
+
+void horizon_scan_context_sample_finish(
+    inout HorizonScanContext ctx, vec3 L, vec3 V, vec2 sample_uv, vec2 theta, float bias)
+{
+  vec3 sample_radiance = horizon_scan_sample_radiance(sample_uv);
+  /* Take emitter surface normal into consideration. */
+  vec3 sample_normal = horizon_scan_sample_normal(sample_uv);
+  /* Discard backfacing samples.
+   * The paper suggests a smooth test which is not physically correct since we
+   * already consider the sample reflected radiance.
+   * Set the weight to allow energy conservation. If we modulate the radiance, we loose energy. */
+  float weight = step(dot(sample_normal, -L), 0.0);
+
+#ifdef HORIZON_OCCLUSION
+  horizon_scan_context_sample_finish(ctx.occlusion_common, sample_radiance, 1.0, theta, bias);
+#endif
+#ifdef HORIZON_DIFFUSE
+  weight = bxdf_eval(ctx.diffuse, L, V);
+  horizon_scan_context_sample_finish(ctx.diffuse_common, sample_radiance, weight, theta, bias);
+#endif
+#ifdef HORIZON_REFLECT
+  weight = bxdf_eval(ctx.reflection, L, V);
+  horizon_scan_context_sample_finish(ctx.reflection_common, sample_radiance, weight, theta, bias);
+#endif
+#ifdef HORIZON_REFRACT
+  /* TODO(fclem): Broken: Black. */
+  weight = bxdf_eval(ctx.refraction, L, V);
+  horizon_scan_context_sample_finish(ctx.refraction_common, sample_radiance, weight, theta, bias);
+#endif
+}
+
+void horizon_scan_context_slice_finish(inout HorizonScanContextCommon context)
+{
+  /* Use uniform visibility since this is what we use for near field lighting.
+   * Also the lighting we are going to mask is already containing the cosine lobe. */
+  float slice_occlusion = horizon_scan_bitmask_to_visibility_uniform(~context.bitmask);
+  /* Normalize radiance since BxDF is applied when merging direct and indirect light. */
+  context.light_slice *= safe_rcp(context.weight_slice) * (1.0 - slice_occlusion);
+  /* Correct normal not on plane (Eq. 8 of GTAO paper). */
+  context.light_accum += vec4(context.light_slice, slice_occlusion) * context.N_length;
+  context.weight_accum += context.N_length;
+}
+
+void horizon_scan_context_slice_finish(inout HorizonScanContext context)
+{
+#ifdef HORIZON_OCCLUSION
+  float occlusion = horizon_scan_bitmask_to_occlusion_cosine(context.occlusion_common.bitmask);
+  context.occlusion_common.light_accum += vec4(occlusion) * context.occlusion_common.N_length;
+  context.occlusion_common.weight_accum += context.occlusion_common.N_length;
+#endif
+#ifdef HORIZON_DIFFUSE
+  horizon_scan_context_slice_finish(context.diffuse_common);
+#endif
+#ifdef HORIZON_REFLECT
+  horizon_scan_context_slice_finish(context.reflection_common);
+#endif
+#ifdef HORIZON_REFRACT
+  horizon_scan_context_slice_finish(context.refraction_common);
+#endif
+}
+
+void horizon_scan_context_accumulation_finish(HorizonScanContextCommon context, out vec4 result)
+{
+  result = context.light_accum * safe_rcp(context.weight_accum);
+}
+
+void horizon_scan_context_accumulation_finish(inout HorizonScanContext context)
+{
+#ifdef HORIZON_OCCLUSION
+  horizon_scan_context_accumulation_finish(context.occlusion_common, context.occlusion_result);
+#endif
+#ifdef HORIZON_DIFFUSE
+  horizon_scan_context_accumulation_finish(context.diffuse_common, context.diffuse_result);
+#endif
+#ifdef HORIZON_REFLECT
+  horizon_scan_context_accumulation_finish(context.reflection_common, context.reflection_result);
+#endif
+#ifdef HORIZON_REFRACT
+  horizon_scan_context_accumulation_finish(context.refraction_common, context.refraction_result);
+#endif
+}

 /**
 * Returns the start and end point of a ray clipped to its intersection
@ -47,11 +274,10 @@ void horizon_scan_occluder_intersection_ray_sphere_clip(Ray ray,

 /**
 * Scans the horizon in many directions and returns the indirect lighting radiance.
- * Returned lighting depends on configuration.
+ * Returned lighting is stored inside the context in `_accum` members already normalized.
 */
-vec3 horizon_scan_eval(vec3 vP,
-                       vec3 vN,
-                       sampler2D depth_tx,
+void horizon_scan_eval(vec3 vP,
+                       inout HorizonScanContext context,
                       vec2 noise,
                       vec2 pixel_size,
                       float search_distance,
@ -61,29 +287,21 @@ vec3 horizon_scan_eval(vec3 vP,
 {
  vec3 vV = drw_view_incident_vector(vP);

-  /* Only a quarter of a turn because we integrate using 2 slices.
-   * We use this instead of using full circle noise to improve cache hits
-   * since all tracing direction will be in the same quadrant. */
-  vec2 v_dir = sample_circle(noise.x * 0.25);
+  const int slice_len = 2;
+  vec2 v_dir = sample_circle(noise.x * (0.5 / float(slice_len)));

-  vec3 accum_light = vec3(0.0);
-  float accum_weight = 0.0;
+  horizon_scan_context_accumulation_reset(context);
+
+  for (int slice = 0; slice < slice_len; slice++) {
+#if 0 /* For debug purpose. For when slice_len is greater than 2. */
+    vec2 v_dir = sample_circle(((float(slice) + noise.x) / float(slice_len)));
+#endif

-  for (int i = 0; i < 2; i++) {
    /* Setup integration domain around V. */
    vec3 vB = normalize(cross(vV, vec3(v_dir, 0.0)));
    vec3 vT = cross(vB, vV);
-    /* Projected view normal onto the integration plane. */
-    float vN_proj_len;
-    vec3 vN_proj = normalize_and_get_length(vN - vB * dot(vN, vB), vN_proj_len);

-    float vN_sin = dot(vN_proj, vT);
-    float vN_cos = saturate(dot(vN_proj, vV));
-    /* Angle between normalized projected normal and view vector. */
-    float vN_angle = sign(vN_sin) * acos_fast(vN_cos);
-
-    vec3 slice_light = vec3(0.0);
-    uint slice_bitmask = 0u;
+    horizon_scan_context_slice_start(context, vV, vT, vB);

    /* For both sides of the view vector. */
    for (int side = 0; side < 2; side++) {
@ -100,18 +318,18 @@ vec3 horizon_scan_eval(vec3 vP,
        /* Always cross at least one pixel. */
        float time = 1.0 + square((float(j) + noise.y) / float(sample_count)) * ssray.max_time;

-        float lod = float(j >> 2) / (1.0 + uniform_buf.ao.quality);
+        float lod = 1.0 + (float(j >> 2) / (1.0 + uniform_buf.ao.quality));

        vec2 sample_uv = ssray.origin.xy + ssray.direction.xy * time;
-        float sample_depth =
-            textureLod(depth_tx, sample_uv * uniform_buf.hiz.uv_scale, floor(lod)).r;
+        float sample_depth = textureLod(hiz_tx, sample_uv * uniform_buf.hiz.uv_scale, lod).r;

        if (sample_depth == 1.0) {
          /* Skip background. Avoids making shadow on the geometry near the far plane. */
          continue;
        }

-        bool front_facing = vN.z > 0.0;
+        /* TODO(fclem): Re-introduce bias. But this is difficult to do per closure. */
+        bool front_facing = true;  // vN.z > 0.0;

        /* Bias depth a bit to avoid self shadowing issues. */
        const float bias = 2.0 * 2.4e-7;
@ -137,35 +355,16 @@ vec3 horizon_scan_eval(vec3 vP,
        vec2 theta = acos_fast(vec2(dot(vL_front, vV), dot(vL_back, vV)));
        /* If we are tracing backward, the angles are negative. Swizzle to keep correct order. */
        theta = (side == 0) ? theta.xy : -theta.yx;
-        theta -= vN_angle;
-        /* Angular bias. Shrink the visibility bitmask around the projected normal. */
-        theta *= angle_bias;

-        uint sample_bitmask = horizon_scan_angles_to_bitmask(theta);
-#ifdef USE_RADIANCE_ACCUMULATION
-        float sample_visibility = horizon_scan_bitmask_to_visibility_uniform(sample_bitmask &
-                                                                             ~slice_bitmask);
-        if (sample_visibility > 0.0) {
-          vec3 sample_radiance = horizon_scan_sample_radiance(sample_uv);
-#  ifdef USE_NORMAL_MASKING
-          vec3 sample_normal = horizon_scan_sample_normal(sample_uv);
-          sample_visibility *= dot(sample_normal, -vL_front);
-#  endif
-          slice_light += sample_radiance * (bsdf_eval(vN, vL_front) * sample_visibility);
-        }
-#endif
-        slice_bitmask |= sample_bitmask;
+        horizon_scan_context_sample_finish(context, vL_front, vV, sample_uv, theta, angle_bias);
      }
    }

-    /* Add distant lighting. */
-    slice_light = vec3(horizon_scan_bitmask_to_occlusion_cosine(slice_bitmask));
-    /* Correct normal not on plane (Eq. 8 of GTAO paper). */
-    accum_light += slice_light * vN_proj_len;
-    accum_weight += vN_proj_len;
+    horizon_scan_context_slice_finish(context);

    /* Rotate 90 degrees. */
    v_dir = orthogonal(v_dir);
  }
-  return accum_light * safe_rcp(accum_weight);
+
+  horizon_scan_context_accumulation_finish(context);
 }
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_horizon_scan_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_horizon_scan_lib.glsl
@ -12,6 +12,7 @@

 #pragma BLENDER_REQUIRE(gpu_shader_utildefines_lib.glsl)
 #pragma BLENDER_REQUIRE(gpu_shader_math_vector_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_math_fast_lib.glsl)

 /**
 * Returns the bitmask for a given ordered pair of angle in [-pi/2..pi/2] range.
@ -77,7 +78,24 @@ float horizon_scan_bitmask_to_occlusion_cosine(uint bitmask)
 #endif
 }

-float bsdf_eval(vec3 N, vec3 L)
+float bsdf_eval(vec3 N, vec3 L, vec3 V)
 {
  return dot(N, L);
 }
+
+/**
+ * Projects the normal `N` onto a plane defined by `V` and `T`.
+ * V, T, B forms an orthonormal basis around V.
+ * Returns the angle of the normal projected normal with `V` and its length.
+ */
+void horizon_scan_projected_normal_to_plane_angle_and_length(
+    vec3 N, vec3 V, vec3 T, vec3 B, out float N_proj_len, out float N_angle)
+{
+  /* Projected view normal onto the integration plane. */
+  vec3 N_proj = normalize_and_get_length(N - B * dot(N, B), N_proj_len);
+
+  float N_sin = dot(N_proj, T);
+  float N_cos = dot(N_proj, V);
+  /* Angle between normalized projected normal and view vector. */
+  N_angle = sign(N_sin) * acos_fast(N_cos);
+}
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_horizon_setup_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_horizon_setup_comp.glsl
@ -0,0 +1,47 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+/**
+ * This pass reprojects the input radiance if needed, downsample it and output the matching normal.
+ *
+ * Dispatched as one thread for each trace resolution pixel.
+ */
+
+#pragma BLENDER_REQUIRE(draw_view_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_gbuffer_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_math_matrix_lib.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+  ivec2 texel_fullres = texel * uniform_buf.raytrace.resolution_scale +
+                        uniform_buf.raytrace.resolution_bias;
+
+  /* Load Gbuffer. */
+  GBufferData gbuf = gbuffer_read(gbuf_header_tx, gbuf_closure_tx, gbuf_color_tx, texel_fullres);
+
+  /* Export normal. */
+  /* TODO(fclem): Export the most visible normal. */
+  vec3 N = gbuf.has_diffuse ? gbuf.diffuse.N : gbuf.reflection.N;
+  if (is_zero(N)) {
+    /* Avoid NaN. But should be fixed in any case. */
+    N = vec3(1.0, 0.0, 0.0);
+  }
+  vec3 vN = drw_normal_world_to_view(N);
+  imageStore(out_normal_img, texel, vec4(vN * 0.5 + 0.5, 0.0));
+
+  /* Re-project radiance. */
+  vec2 uv = (vec2(texel_fullres) + 0.5) / vec2(textureSize(depth_tx, 0).xy);
+  float depth = texelFetch(depth_tx, texel_fullres, 0).r;
+  vec3 P = drw_point_screen_to_world(vec3(uv, depth));
+
+  vec3 ssP_prev = drw_ndc_to_screen(project_point(uniform_buf.raytrace.radiance_persmat, P));
+
+  vec4 radiance = texture(in_radiance_tx, ssP_prev.xy);
+
+  float luma = max(1e-8, reduce_max(radiance.rgb));
+  radiance *= 1.0 - max(0.0, luma - uniform_buf.raytrace.brightness_clamp) / luma;
+
+  imageStore(out_radiance_img, texel, radiance);
+}
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_ray_denoise_bilateral_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_ray_denoise_bilateral_comp.glsl
@ -53,11 +53,11 @@ float bilateral_normal_weight(vec3 center_N, vec3 sample_N)
 /* In order to remove some more fireflies, "tone-map" the color samples during the accumulation. */
 vec3 to_accumulation_space(vec3 color)
 {
-  return color / (1.0 + dot(color, vec3(1.0)));
+  return color / (1.0 + reduce_add(color));
 }
 vec3 from_accumulation_space(vec3 color)
 {
-  return color / (1.0 - dot(color, vec3(1.0)));
+  return color / (1.0 - reduce_add(color));
 }

 void gbuffer_load_closure_data(sampler2DArray gbuf_closure_tx,
@ -101,7 +101,7 @@ void main()
  const uint tile_size = RAYTRACE_GROUP_SIZE;
  uvec2 tile_coord = unpackUvec2x16(tiles_coord_buf[gl_WorkGroupID.x]);
  ivec2 texel_fullres = ivec2(gl_LocalInvocationID.xy + tile_coord * tile_size);
-  vec2 center_uv = vec2(texel_fullres) * uniform_buf.raytrace.full_resolution_inv;
+  vec2 center_uv = (vec2(texel_fullres) + 0.5) * uniform_buf.raytrace.full_resolution_inv;

  float center_depth = texelFetch(depth_tx, texel_fullres, 0).r;
  vec3 center_P = drw_point_screen_to_world(vec3(center_uv, center_depth));
@ -157,13 +157,14 @@ void main()
    ivec2 sample_texel = texel_fullres + offset;
    ivec2 sample_tile = sample_texel / RAYTRACE_GROUP_SIZE;
    /* Make sure the sample has been processed and do not contain garbage data. */
-    bool unprocessed_tile = imageLoad(tile_mask_img, sample_tile).r == 0;
+    uint tile_mask = imageLoad(tile_mask_img, sample_tile).r;
+    bool unprocessed_tile = !flag_test(tile_mask, 1u << 0u);
    if (unprocessed_tile) {
      continue;
    }

    float sample_depth = texelFetch(depth_tx, sample_texel, 0).r;
-    vec2 sample_uv = vec2(sample_texel) * uniform_buf.raytrace.full_resolution_inv;
+    vec2 sample_uv = (vec2(sample_texel) + 0.5) * uniform_buf.raytrace.full_resolution_inv;
    vec3 sample_P = drw_point_screen_to_world(vec3(sample_uv, sample_depth));

    /* Background case. */
@ -181,7 +182,7 @@ void main()

    vec3 radiance = imageLoad(in_radiance_img, sample_texel).rgb;
    /* Do not gather unprocessed pixels. */
-    if (all(equal(in_radiance, FLT_11_11_10_MAX))) {
+    if (all(equal(radiance, FLT_11_11_10_MAX))) {
      continue;
    }
    accum_radiance += to_accumulation_space(radiance) * weight;
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_ray_denoise_spatial_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_ray_denoise_spatial_comp.glsl
@ -63,7 +63,8 @@ void main()
        continue;
      }

-      bool tile_is_unused = imageLoad(tile_mask_img, tile_coord_neighbor).r == 0;
+      uint tile_mask = imageLoad(tile_mask_img, tile_coord_neighbor).r;
+      bool tile_is_unused = !flag_test(tile_mask, 1u << 0u);
      if (tile_is_unused) {
        ivec2 texel_fullres_neighbor = texel_fullres + ivec2(x, y) * int(tile_size);

--- a/source/blender/draw/engines/eevee_next/shaders/eevee_ray_tile_classify_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_ray_tile_classify_comp.glsl
@ -12,13 +12,13 @@
 #pragma BLENDER_REQUIRE(gpu_shader_codegen_lib.glsl)
 #pragma BLENDER_REQUIRE(eevee_gbuffer_lib.glsl)

-shared uint tile_contains_glossy_rays;
+shared uint tile_contains_ray_tracing;
+shared uint tile_contains_horizon_scan;

-/* Returns a blend factor between different irradiance fetching method for reflections. */
-float ray_glossy_factor(float roughness)
+/* Returns a blend factor between different tracing method. */
+float ray_roughness_factor(RayTraceData raytrace, float roughness)
 {
-  /* TODO */
-  return 1.0;
+  return saturate(roughness * raytrace.roughness_mask_scale - raytrace.roughness_mask_bias);
 }

 void main()
@ -27,15 +27,22 @@ void main()
    /* Clear num_groups_x to 0 so that we can use it as counter in the compaction phase.
     * Note that these writes are subject to race condition, but we write the same value
     * from all work-groups. */
-    denoise_dispatch_buf.num_groups_x = 0u;
-    denoise_dispatch_buf.num_groups_y = 1u;
-    denoise_dispatch_buf.num_groups_z = 1u;
+    ray_denoise_dispatch_buf.num_groups_x = 0u;
+    ray_denoise_dispatch_buf.num_groups_y = 1u;
+    ray_denoise_dispatch_buf.num_groups_z = 1u;
    ray_dispatch_buf.num_groups_x = 0u;
    ray_dispatch_buf.num_groups_y = 1u;
    ray_dispatch_buf.num_groups_z = 1u;
+    horizon_dispatch_buf.num_groups_x = 0u;
+    horizon_dispatch_buf.num_groups_y = 1u;
+    horizon_dispatch_buf.num_groups_z = 1u;
+    horizon_denoise_dispatch_buf.num_groups_x = 0u;
+    horizon_denoise_dispatch_buf.num_groups_y = 1u;
+    horizon_denoise_dispatch_buf.num_groups_z = 1u;

    /* Init shared variables. */
-    tile_contains_glossy_rays = 0;
+    tile_contains_ray_tracing = 0;
+    tile_contains_horizon_scan = 0;
  }

  barrier();
@ -48,13 +55,22 @@ void main()
  if (flag_test(closure_bits, uniform_buf.raytrace.closure_active)) {
    GBufferData gbuf = gbuffer_read(gbuf_header_tx, gbuf_closure_tx, gbuf_color_tx, texel);

-    float roughness = (uniform_buf.raytrace.closure_active == CLOSURE_REFRACTION) ?
-                          gbuf.refraction.roughness :
-                          gbuf.reflection.roughness;
+    float roughness = 1.0;
+    if (uniform_buf.raytrace.closure_active == eClosureBits(CLOSURE_REFLECTION)) {
+      roughness = gbuf.reflection.roughness;
+    }
+    if (uniform_buf.raytrace.closure_active == eClosureBits(CLOSURE_REFRACTION)) {
+      roughness = 0.0; /* TODO(fclem): Apparent roughness. For now, always raytrace. */
+    }

-    if (ray_glossy_factor(roughness) > 0.0) {
+    float ray_roughness_fac = ray_roughness_factor(uniform_buf.raytrace, roughness);
+    if (ray_roughness_fac > 0.0) {
      /* We don't care about race condition here. */
-      tile_contains_glossy_rays = 1;
+      tile_contains_horizon_scan = 1;
+    }
+    if (ray_roughness_fac < 1.0) {
+      /* We don't care about race condition here. */
+      tile_contains_ray_tracing = 1;
    }
  }

@ -64,8 +80,11 @@ void main()
    ivec2 tile_co = ivec2(gl_WorkGroupID.xy);

    uint tile_mask = 0u;
-    if (tile_contains_glossy_rays > 0) {
-      tile_mask = 1u;
+    if (tile_contains_ray_tracing > 0) {
+      tile_mask |= 1u << 0u;
+    }
+    if (tile_contains_horizon_scan > 0) {
+      tile_mask |= 1u << 1u;
    }

    imageStore(tile_mask_img, tile_co, uvec4(tile_mask));
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_ray_tile_compact_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_ray_tile_compact_comp.glsl
@ -19,9 +19,13 @@ void main()
  ivec2 tile = ivec2(gl_GlobalInvocationID.xy);

  /* True if an adjacent tile is tracing and will need this tile data for denoising. */
-  bool tile_is_sampled = false;
+  bool tile_is_ray_sampled = false;
  /* True if this tile is shooting and tracing rays. */
-  bool tile_is_tracing = false;
+  bool tile_is_ray_tracing = false;
+  /* True if this tile is using horizon scan. */
+  bool tile_is_horizon_tracing = false;
+  /* True if an adjacent tile is tracing and will need this tile data for denoising (horizon). */
+  bool tile_is_horizon_sampled = false;
  /* Could be optimized if that becomes an issue. */
  for (int x_tile = -1; x_tile <= 1; x_tile++) {
    for (int y_tile = -1; y_tile <= 1; y_tile++) {
@ -32,17 +36,32 @@ void main()
          if (any(greaterThanEqual(full_res_tile, imageSize(tile_mask_img)))) {
            continue;
          }
-          bool denoise_tile_is_used = imageLoad(tile_mask_img, full_res_tile).r != 0u;
-          if (denoise_tile_is_used) {
+          uint tile_mask = imageLoad(tile_mask_img, full_res_tile).r;
+          bool tile_uses_ray_tracing = flag_test(tile_mask, 1u << 0u);
+          bool tile_uses_horizon_scan = flag_test(tile_mask, 1u << 1u);
+          if (tile_uses_ray_tracing) {
            if (x_tile == 0 && y_tile == 0) {
              /* Dispatch full resolution denoise tile. */
-              uint tile_index = atomicAdd(denoise_dispatch_buf.num_groups_x, 1u);
-              denoise_tiles_buf[tile_index] = packUvec2x16(uvec2(full_res_tile));
-              tile_is_tracing = true;
+              uint tile_index = atomicAdd(ray_denoise_dispatch_buf.num_groups_x, 1u);
+              ray_denoise_tiles_buf[tile_index] = packUvec2x16(uvec2(full_res_tile));
+              tile_is_ray_tracing = true;
            }
            else {
              /* This denoise tile will sample the target tracing tile. Make sure it is cleared. */
-              tile_is_sampled = true;
+              tile_is_ray_sampled = true;
+            }
+          }
+
+          if (tile_uses_horizon_scan) {
+            if (x_tile == 0 && y_tile == 0) {
+              /* Dispatch full resolution horizon scan. */
+              uint tile_horizon_index = atomicAdd(horizon_denoise_dispatch_buf.num_groups_x, 1u);
+              horizon_denoise_tiles_buf[tile_horizon_index] = packUvec2x16(uvec2(full_res_tile));
+              tile_is_horizon_tracing = true;
+            }
+            else {
+              /* This denoise tile will sample the target tracing tile. Make sure it is cleared. */
+              tile_is_horizon_sampled = true;
            }
          }
        }
@ -51,9 +70,16 @@ void main()
  }

  /* TODO(fclem): we might want to dispatch another type of shader only for clearing. */
-  if (tile_is_tracing || tile_is_sampled) {
+  if (tile_is_ray_tracing || tile_is_ray_sampled) {
    /* Dispatch trace resolution tracing tile. */
    uint tile_index = atomicAdd(ray_dispatch_buf.num_groups_x, 1u);
    ray_tiles_buf[tile_index] = packUvec2x16(uvec2(tile));
  }
+
+  /* TODO(fclem): we might want to dispatch another type of shader only for clearing. */
+  if (tile_is_horizon_tracing || tile_is_horizon_sampled) {
+    /* Dispatch trace resolution tracing tile. */
+    uint tile_index = atomicAdd(horizon_dispatch_buf.num_groups_x, 1u);
+    horizon_tiles_buf[tile_index] = packUvec2x16(uvec2(tile));
+  }
 }
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_ray_types_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_ray_types_lib.glsl
@ -3,6 +3,7 @@
 * SPDX-License-Identifier: GPL-2.0-or-later */

 #pragma BLENDER_REQUIRE(draw_view_lib.glsl)
+#pragma BLENDER_REQUIRE(draw_math_geom_lib.glsl)

 /**
 * General purpose 3D ray.
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_subsurface_convolve_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_subsurface_convolve_comp.glsl
@ -90,6 +90,12 @@ void main(void)
  vec2 sample_scale = vec2(ProjectionMatrix[0][0], ProjectionMatrix[1][1]) *
                      (0.5 * max_radius / homcoord);

+  float pixel_footprint = sample_scale.x * textureSize(depth_tx, 0).x;
+  if (pixel_footprint <= 1.0) {
+    /* Early out, avoid divisions by zero. */
+    return;
+  }
+
  /* Avoid too small radii that have float imprecision. */
  vec3 clamped_sss_radius = max(vec3(1e-4), gbuf.diffuse.sss_radius / max_radius) * max_radius;
  /* Scale albedo because we can have HDR value caused by BSDF sampling. */
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_deferred_frag.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_deferred_frag.glsl
@ -144,7 +144,5 @@ void main()
  /* Only output emission during the gbuffer pass. */
  out_radiance = vec4(g_emission, 0.0);
  out_radiance.rgb *= 1.0 - g_holdout;
-
-  out_transmittance.rgb = g_transmittance;
-  out_transmittance.a = saturate(average(g_transmittance));
+  out_radiance.a = g_holdout;
 }
--- a/Show More
+++ b/Show More