Cycles: Added quality parameter for OIDN #115265

Merged
Stefan Werner merged 2 commits from Stefan_Werner/blender:oidn_quality into main 2023-11-23 12:35:38 +01:00
198 changed files with 5601 additions and 2245 deletions
Showing only changes of commit f1800b2516 - Show all commits

View File

@ -599,7 +599,7 @@ doc_dna: .FORCE
@echo "docs written into: '$(BLENDER_DIR)/doc/blender_file_format/dna.html'"
doc_man: .FORCE
@$(PYTHON) doc/manpage/blender.1.py --blender="$(BLENDER_BIN)" --output=blender.1 --verbose
@$(BLENDER_BIN) --background --python doc/manpage/blender.1.py -- --output=blender.1 --verbose
help_features: .FORCE
@$(PYTHON) "$(BLENDER_DIR)/build_files/cmake/cmake_print_build_options.py" $(BLENDER_DIR)"/CMakeLists.txt"

View File

@ -303,7 +303,7 @@ DEPS_MANDATORY_SUBPACKAGES = (
},
),
Package(name="FreeType Library",
distro_package_names={DISTRO_ID_DEBIAN: "libfreetype6-dev",
distro_package_names={DISTRO_ID_DEBIAN: "libfreetype-dev",
DISTRO_ID_FEDORA: "freetype-devel",
DISTRO_ID_SUSE: "freetype2-devel",
DISTRO_ID_ARCH: "freetype2",
@ -505,6 +505,13 @@ DEPS_OPTIONAL_SUBPACKAGES = (
DISTRO_ID_ARCH: ...,
},
),
Package(name="Deflate Library",
distro_package_names={DISTRO_ID_DEBIAN: "libdeflate-dev",
DISTRO_ID_FEDORA: "libdeflate-devel",
DISTRO_ID_SUSE: "libdeflate-devel",
DISTRO_ID_ARCH: "libdeflate",
},
),
)
@ -548,7 +555,7 @@ PYTHON_SUBPACKAGES = (
DISTRO_ID_ARCH: "python-urllib3",
},
),
Package(name="Certifi", version="2021.10.08", version_short="2021.10", version_min="2021.0", version_mex="2023.0",
Package(name="Certifi", version="2021.10.08", version_short="2021.10", version_min="2021.0", version_mex="2025.0",
distro_package_names={DISTRO_ID_DEBIAN: "python3-certifi",
DISTRO_ID_FEDORA: "python3-certifi",
DISTRO_ID_SUSE: suse_pypackages_name_gen("certifi"),
@ -569,14 +576,14 @@ PYTHON_SUBPACKAGES = (
DISTRO_ID_ARCH: "python-zstandard",
},
),
Package(name="NumPy", version="1.23.5", version_short="1.23", version_min="1.14", version_mex="2.0",
Package(name="NumPy", version="1.24.3", version_short="1.24", version_min="1.14", version_mex="2.0",
distro_package_names={DISTRO_ID_DEBIAN: "python3-numpy",
DISTRO_ID_FEDORA: "python3-numpy",
DISTRO_ID_SUSE: suse_pypackages_name_gen("numpy"),
DISTRO_ID_ARCH: "python-numpy",
},
),
Package(name="NumPy Devel", version="1.23.5", version_short="1.23", version_min="1.14", version_mex="2.0",
Package(name="NumPy Devel", version="1.24.3", version_short="1.24", version_min="1.14", version_mex="2.0",
distro_package_names={DISTRO_ID_DEBIAN: ...,
DISTRO_ID_FEDORA: ...,
DISTRO_ID_SUSE: suse_pypackages_name_gen("numpy-devel"),
@ -706,7 +713,7 @@ PACKAGES_ALL = (
DISTRO_ID_ARCH: "clang", # clang-format is part of the main clang package.
},
),
Package(name="Python", is_mandatory=True, version="3.10.12", version_short="3.10", version_min="3.10", version_mex="3.12",
Package(name="Python", is_mandatory=True, version="3.11.6", version_short="3.11", version_min="3.11", version_mex="3.13",
sub_packages=PYTHON_SUBPACKAGES,
distro_package_names={DISTRO_ID_DEBIAN: "python3-dev",
DISTRO_ID_FEDORA: "python3-devel",
@ -714,7 +721,7 @@ PACKAGES_ALL = (
DISTRO_ID_ARCH: "python",
},
),
Package(name="Boost Libraries", is_mandatory=True, version="1.80.0", version_short="1.80", version_min="1.49", version_mex="2.0",
Package(name="Boost Libraries", is_mandatory=True, version="1.82.0", version_short="1.82", version_min="1.49", version_mex="2.0",
sub_packages=BOOST_SUBPACKAGES,
distro_package_names={DISTRO_ID_DEBIAN: "libboost-dev",
DISTRO_ID_FEDORA: "boost-devel",
@ -730,7 +737,7 @@ PACKAGES_ALL = (
DISTRO_ID_ARCH: "intel-oneapi-tbb",
},
),
Package(name="OpenColorIO Library", is_mandatory=False, version="2.2.0", version_short="2.2", version_min="2.0", version_mex="3.0",
Package(name="OpenColorIO Library", is_mandatory=False, version="2.3.0", version_short="2.3", version_min="2.0", version_mex="3.0",
sub_packages=(),
distro_package_names={DISTRO_ID_DEBIAN: "libopencolorio-dev",
DISTRO_ID_FEDORA: "OpenColorIO-devel",
@ -738,7 +745,7 @@ PACKAGES_ALL = (
DISTRO_ID_ARCH: "opencolorio",
},
),
Package(name="IMath Library", is_mandatory=False, version="3.1.7", version_short="3.1", version_min="3.0", version_mex="4.0",
Package(name="IMath Library", is_mandatory=False, version="3.2.1", version_short="3.2", version_min="3.0", version_mex="4.0",
sub_packages=(),
distro_package_names={DISTRO_ID_DEBIAN: "libimath-dev",
DISTRO_ID_FEDORA: "imath-devel",
@ -746,7 +753,7 @@ PACKAGES_ALL = (
DISTRO_ID_ARCH: "imath",
},
),
Package(name="OpenEXR Library", is_mandatory=False, version="3.1.7", version_short="3.1", version_min="3.0", version_mex="4.0",
Package(name="OpenEXR Library", is_mandatory=False, version="3.2.1", version_short="3.2", version_min="3.0", version_mex="4.0",
sub_packages=(),
distro_package_names={DISTRO_ID_DEBIAN: "libopenexr-dev",
DISTRO_ID_FEDORA: "openexr-devel",
@ -801,7 +808,7 @@ PACKAGES_ALL = (
DISTRO_ID_ARCH: "openshadinglanguage",
},
),
Package(name="OpenSubDiv Library", is_mandatory=False, version="3.5.0", version_short="3.5", version_min="3.5", version_mex="4.0",
Package(name="OpenSubDiv Library", is_mandatory=False, version="3.6.0", version_short="3.6", version_min="3.5", version_mex="4.0",
sub_packages=(),
distro_package_names={DISTRO_ID_DEBIAN: "libosd-dev",
DISTRO_ID_FEDORA: "opensubdiv-devel",
@ -809,7 +816,7 @@ PACKAGES_ALL = (
DISTRO_ID_ARCH: "opensubdiv",
},
),
Package(name="OpenVDB Library", is_mandatory=False, version="10.0.0", version_short="10.0", version_min="10.0", version_mex="11.0",
Package(name="OpenVDB Library", is_mandatory=False, version="11.0.0", version_short="11.0", version_min="10.0", version_mex="12.0",
sub_packages=(
# Assume packaged versions of the dependencies are compatible with OpenVDB package.
Package(name="OpenVDB Dependencies", is_mandatory=False, is_group=True,
@ -845,7 +852,7 @@ PACKAGES_ALL = (
DISTRO_ID_ARCH: "alembic",
},
),
Package(name="MaterialX Library", is_mandatory=False, version="1.38.6", version_short="1.38", version_min="1.38", version_mex="1.40",
Package(name="MaterialX Library", is_mandatory=False, version="1.38.8", version_short="1.38", version_min="1.38", version_mex="1.40",
sub_packages=(),
distro_package_names={DISTRO_ID_DEBIAN: None,
DISTRO_ID_FEDORA: None,
@ -876,7 +883,7 @@ PACKAGES_ALL = (
DISTRO_ID_ARCH: "embree",
},
),
Package(name="OpenImageDenoiser Library", is_mandatory=False, version="1.4.3", version_short="1.4", version_min="1.4.0", version_mex="1.5",
Package(name="OpenImageDenoiser Library", is_mandatory=False, version="2.1.0", version_short="2.1", version_min="2.0.0", version_mex="3.0",
sub_packages=(),
distro_package_names={DISTRO_ID_DEBIAN: None,
DISTRO_ID_FEDORA: "oidn-devel",

View File

@ -41,7 +41,7 @@ def blender_extract_info() -> Dict[str, str]:
# Happens when built without WITH_BUILD_INFO e.g.
blender_date = time.strftime("%B %d, %Y", time.gmtime(int(os.environ.get('SOURCE_DATE_EPOCH', time.time()))))
else:
blender_date = time.strftime("%B %d, %Y", time.strptime(blender_build_date_text, "%Y-%m-%d"))
blender_date = time.strftime("%B %d, %Y", time.strptime(blender_build_date_text.decode(), "%Y-%m-%d"))
return {
"help": blender_help_text,

View File

@ -1,7 +1,7 @@
Project: {fmt}
URL: https://github.com/fmtlib/fmt
License: MIT
Upstream version: 10.0.0 (a0b8a92, 2023 May 10)
Upstream version: 10.1.1 (f5e5435, 2023 Aug 28)
Local modifications:
- Took only files needed for Blender:

View File

@ -22,6 +22,9 @@
:alt: Ask questions at StackOverflow with the tag fmt
:target: https://stackoverflow.com/questions/tagged/fmt
.. image:: https://api.securityscorecards.dev/projects/github.com/fmtlib/fmt/badge
:target: https://securityscorecards.dev/viewer/?uri=github.com/fmtlib/fmt
**{fmt}** is an open-source formatting library providing a fast and safe
alternative to C stdio and C++ iostreams.
@ -49,6 +52,7 @@ Features
* Fast IEEE 754 floating-point formatter with correct rounding, shortness and
round-trip guarantees using the `Dragonbox <https://github.com/jk-jeon/dragonbox>`_
algorithm
* Portable Unicode support
* Safe `printf implementation
<https://fmt.dev/latest/api.html#printf-formatting>`_ including the POSIX
extension for positional arguments
@ -65,7 +69,7 @@ Features
<https://github.com/fmtlib/fmt/tree/master/test>`_ and is `continuously fuzzed
<https://bugs.chromium.org/p/oss-fuzz/issues/list?colspec=ID%20Type%20
Component%20Status%20Proj%20Reported%20Owner%20Summary&q=proj%3Dfmt&can=1>`_
* Safety: the library is fully type safe, errors in format strings can be
* Safety: the library is fully type-safe, errors in format strings can be
reported at compile time, automatic memory management prevents buffer overflow
errors
* Ease of use: small self-contained code base, no external dependencies,
@ -75,7 +79,7 @@ Features
consistent output across platforms and support for older compilers
* Clean warning-free codebase even on high warning levels such as
``-Wall -Wextra -pedantic``
* Locale-independence by default
* Locale independence by default
* Optional header-only configuration enabled with the ``FMT_HEADER_ONLY`` macro
See the `documentation <https://fmt.dev>`_ for more details.
@ -225,7 +229,7 @@ The script `bloat-test.py
from `format-benchmark <https://github.com/fmtlib/format-benchmark>`_
tests compile time and code bloat for nontrivial projects.
It generates 100 translation units and uses ``printf()`` or its alternative
five times in each to simulate a medium sized project. The resulting
five times in each to simulate a medium-sized project. The resulting
executable size and compile time (Apple LLVM version 8.1.0 (clang-802.0.42),
macOS Sierra, best of three) is shown in the following tables.
@ -246,7 +250,7 @@ As you can see, {fmt} has 60% less overhead in terms of resulting binary code
size compared to iostreams and comes pretty close to ``printf``. Boost Format
and Folly Format have the largest overheads.
``printf+string`` is the same as ``printf`` but with extra ``<string>``
``printf+string`` is the same as ``printf`` but with an extra ``<string>``
include to measure the overhead of the latter.
**Non-optimized build**
@ -262,14 +266,14 @@ Boost Format 54.1 365 303
Folly Format 79.9 445 430
============= =============== ==================== ==================
``libc``, ``lib(std)c++`` and ``libfmt`` are all linked as shared libraries to
``libc``, ``lib(std)c++``, and ``libfmt`` are all linked as shared libraries to
compare formatting function overhead only. Boost Format is a
header-only library so it doesn't provide any linkage options.
Running the tests
~~~~~~~~~~~~~~~~~
Please refer to `Building the library`__ for the instructions on how to build
Please refer to `Building the library`__ for instructions on how to build
the library and run the unit tests.
__ https://fmt.dev/latest/usage.html#building-the-library
@ -294,9 +298,12 @@ or the bloat test::
Migrating code
--------------
`clang-tidy-fmt <https://github.com/mikecrowe/clang-tidy-fmt>`_ provides clang
tidy checks for converting occurrences of ``printf`` and ``fprintf`` to
``fmt::print``.
`clang-tidy <https://clang.llvm.org/extra/clang-tidy/>`_ v17 (not yet
released) provides the `modernize-use-std-print
<https://clang.llvm.org/extra/clang-tidy/checks/modernize/use-std-print.html>`_
check that is capable of converting occurrences of ``printf`` and
``fprintf`` to ``fmt::print`` if configured to do so. (By default it
converts to ``std::print``.)
Projects using this library
---------------------------
@ -304,8 +311,6 @@ Projects using this library
* `0 A.D. <https://play0ad.com/>`_: a free, open-source, cross-platform
real-time strategy game
* `2GIS <https://2gis.ru/>`_: free business listings with a city map
* `AMPL/MP <https://github.com/ampl/mp>`_:
an open-source library for mathematical programming
@ -396,7 +401,7 @@ Projects using this library
proxy
* `redpanda <https://vectorized.io/redpanda>`_: a 10x faster Kafka® replacement
for mission critical systems written in C++
for mission-critical systems written in C++
* `rpclib <http://rpclib.net/>`_: a modern C++ msgpack-RPC server and client
library
@ -480,7 +485,7 @@ error handling is awkward.
Boost Format
~~~~~~~~~~~~
This is a very powerful library which supports both ``printf``-like format
This is a very powerful library that supports both ``printf``-like format
strings and positional arguments. Its main drawback is performance. According to
various benchmarks, it is much slower than other methods considered here. Boost
Format also has excessive build times and severe code bloat issues (see
@ -489,7 +494,7 @@ Format also has excessive build times and severe code bloat issues (see
FastFormat
~~~~~~~~~~
This is an interesting library which is fast, safe and has positional arguments.
This is an interesting library that is fast, safe, and has positional arguments.
However, it has significant limitations, citing its author:
Three features that have no hope of being accommodated within the
@ -505,7 +510,7 @@ restrictive for using it in some projects.
Boost Spirit.Karma
~~~~~~~~~~~~~~~~~~
This is not really a formatting library but I decided to include it here for
This is not a formatting library but I decided to include it here for
completeness. As iostreams, it suffers from the problem of mixing verbatim text
with arguments. The library is pretty fast, but slower on integer formatting
than ``fmt::format_to`` with format string compilation on Karma's own benchmark,
@ -524,7 +529,7 @@ Documentation License
The `Format String Syntax <https://fmt.dev/latest/syntax.html>`_
section in the documentation is based on the one from Python `string module
documentation <https://docs.python.org/3/library/string.html#module-string>`_.
For this reason the documentation is distributed under the Python Software
For this reason, the documentation is distributed under the Python Software
Foundation license available in `doc/python-license.txt
<https://raw.github.com/fmtlib/fmt/master/doc/python-license.txt>`_.
It only applies if you distribute the documentation of {fmt}.

View File

@ -13,11 +13,12 @@
#include <cstring> // std::strlen
#include <iterator>
#include <limits>
#include <memory> // std::addressof
#include <string>
#include <type_traits>
// The fmt library version in the form major * 10000 + minor * 100 + patch.
#define FMT_VERSION 100000
#define FMT_VERSION 100101
#if defined(__clang__) && !defined(__ibmxl__)
# define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__)
@ -92,7 +93,7 @@
#ifndef FMT_USE_CONSTEXPR
# if (FMT_HAS_FEATURE(cxx_relaxed_constexpr) || FMT_MSC_VERSION >= 1912 || \
(FMT_GCC_VERSION >= 600 && FMT_CPLUSPLUS >= 201402L)) && \
!FMT_ICC_VERSION && !defined(__NVCC__)
!FMT_ICC_VERSION && (!defined(__NVCC__) || FMT_CPLUSPLUS >= 202002L)
# define FMT_USE_CONSTEXPR 1
# else
# define FMT_USE_CONSTEXPR 0
@ -162,9 +163,6 @@
# endif
#endif
// An inline std::forward replacement.
#define FMT_FORWARD(...) static_cast<decltype(__VA_ARGS__)&&>(__VA_ARGS__)
#ifdef _MSC_VER
# define FMT_UNCHECKED_ITERATOR(It) \
using _Unchecked_type = It // Mark iterator as checked.
@ -181,8 +179,8 @@
}
#endif
#ifndef FMT_MODULE_EXPORT
# define FMT_MODULE_EXPORT
#ifndef FMT_EXPORT
# define FMT_EXPORT
# define FMT_BEGIN_EXPORT
# define FMT_END_EXPORT
#endif
@ -244,12 +242,6 @@
# endif
#endif
#if defined __cpp_inline_variables && __cpp_inline_variables >= 201606L
# define FMT_INLINE_VARIABLE inline
#else
# define FMT_INLINE_VARIABLE
#endif
// Enable minimal optimizations for more compact code in debug mode.
FMT_GCC_PRAGMA("GCC push_options")
#if !defined(__OPTIMIZE__) && !defined(__NVCOMPILER) && !defined(__LCC__) && \
@ -276,6 +268,11 @@ template <typename T> using type_identity_t = typename type_identity<T>::type;
template <typename T>
using underlying_t = typename std::underlying_type<T>::type;
// Checks whether T is a container with contiguous storage.
template <typename T> struct is_contiguous : std::false_type {};
template <typename Char>
struct is_contiguous<std::basic_string<Char>> : std::true_type {};
struct monostate {
constexpr monostate() {}
};
@ -289,8 +286,11 @@ struct monostate {
# define FMT_ENABLE_IF(...) fmt::enable_if_t<(__VA_ARGS__), int> = 0
#endif
// This is defined in core.h instead of format.h to avoid injecting in std.
// It is a template to avoid undesirable implicit conversions to std::byte.
#ifdef __cpp_lib_byte
inline auto format_as(std::byte b) -> unsigned char {
template <typename T, FMT_ENABLE_IF(std::is_same<T, std::byte>::value)>
inline auto format_as(T b) -> unsigned char {
return static_cast<unsigned char>(b);
}
#endif
@ -394,7 +394,7 @@ FMT_CONSTEXPR inline auto is_utf8() -> bool {
compiled with a different ``-std`` option than the client code (which is not
recommended).
*/
FMT_MODULE_EXPORT
FMT_EXPORT
template <typename Char> class basic_string_view {
private:
const Char* data_;
@ -497,11 +497,11 @@ template <typename Char> class basic_string_view {
}
};
FMT_MODULE_EXPORT
FMT_EXPORT
using string_view = basic_string_view<char>;
/** Specifies if ``T`` is a character type. Can be specialized by users. */
FMT_MODULE_EXPORT
FMT_EXPORT
template <typename T> struct is_char : std::false_type {};
template <> struct is_char<char> : std::true_type {};
@ -639,6 +639,9 @@ struct error_handler {
};
} // namespace detail
/** Throws ``format_error`` with a given message. */
using detail::throw_format_error;
/** String's character type. */
template <typename S> using char_t = typename detail::char_t_impl<S>::type;
@ -649,7 +652,7 @@ template <typename S> using char_t = typename detail::char_t_impl<S>::type;
You can use the ``format_parse_context`` type alias for ``char`` instead.
\endrst
*/
FMT_MODULE_EXPORT
FMT_EXPORT
template <typename Char> class basic_format_parse_context {
private:
basic_string_view<Char> format_str_;
@ -715,7 +718,7 @@ template <typename Char> class basic_format_parse_context {
FMT_CONSTEXPR void check_dynamic_spec(int arg_id);
};
FMT_MODULE_EXPORT
FMT_EXPORT
using format_parse_context = basic_format_parse_context<char>;
namespace detail {
@ -756,72 +759,6 @@ class compile_parse_context : public basic_format_parse_context<Char> {
#endif
}
};
} // namespace detail
template <typename Char>
FMT_CONSTEXPR void basic_format_parse_context<Char>::do_check_arg_id(int id) {
// Argument id is only checked at compile-time during parsing because
// formatting has its own validation.
if (detail::is_constant_evaluated() &&
(!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) {
using context = detail::compile_parse_context<Char>;
if (id >= static_cast<context*>(this)->num_args())
detail::throw_format_error("argument not found");
}
}
template <typename Char>
FMT_CONSTEXPR void basic_format_parse_context<Char>::check_dynamic_spec(
int arg_id) {
if (detail::is_constant_evaluated() &&
(!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) {
using context = detail::compile_parse_context<Char>;
static_cast<context*>(this)->check_dynamic_spec(arg_id);
}
}
FMT_MODULE_EXPORT template <typename Context> class basic_format_arg;
FMT_MODULE_EXPORT template <typename Context> class basic_format_args;
FMT_MODULE_EXPORT template <typename Context> class dynamic_format_arg_store;
// A formatter for objects of type T.
FMT_MODULE_EXPORT
template <typename T, typename Char = char, typename Enable = void>
struct formatter {
// A deleted default constructor indicates a disabled formatter.
formatter() = delete;
};
// Specifies if T has an enabled formatter specialization. A type can be
// formattable even if it doesn't have a formatter e.g. via a conversion.
template <typename T, typename Context>
using has_formatter =
std::is_constructible<typename Context::template formatter_type<T>>;
// Checks whether T is a container with contiguous storage.
template <typename T> struct is_contiguous : std::false_type {};
template <typename Char>
struct is_contiguous<std::basic_string<Char>> : std::true_type {};
class appender;
namespace detail {
template <typename Context, typename T>
constexpr auto has_const_formatter_impl(T*)
-> decltype(typename Context::template formatter_type<T>().format(
std::declval<const T&>(), std::declval<Context&>()),
true) {
return true;
}
template <typename Context>
constexpr auto has_const_formatter_impl(...) -> bool {
return false;
}
template <typename T, typename Context>
constexpr auto has_const_formatter() -> bool {
return has_const_formatter_impl<Context>(static_cast<T*>(nullptr));
}
// Extracts a reference to the container from back_insert_iterator.
template <typename Container>
@ -903,10 +840,8 @@ template <typename T> class buffer {
/** Returns the capacity of this buffer. */
constexpr auto capacity() const noexcept -> size_t { return capacity_; }
/** Returns a pointer to the buffer data. */
/** Returns a pointer to the buffer data (not null-terminated). */
FMT_CONSTEXPR auto data() noexcept -> T* { return ptr_; }
/** Returns a pointer to the buffer data. */
FMT_CONSTEXPR auto data() const noexcept -> const T* { return ptr_; }
/** Clears this buffer. */
@ -1099,6 +1034,79 @@ template <typename T = char> class counting_buffer final : public buffer<T> {
auto count() -> size_t { return count_ + this->size(); }
};
} // namespace detail
template <typename Char>
FMT_CONSTEXPR void basic_format_parse_context<Char>::do_check_arg_id(int id) {
// Argument id is only checked at compile-time during parsing because
// formatting has its own validation.
if (detail::is_constant_evaluated() &&
(!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) {
using context = detail::compile_parse_context<Char>;
if (id >= static_cast<context*>(this)->num_args())
detail::throw_format_error("argument not found");
}
}
template <typename Char>
FMT_CONSTEXPR void basic_format_parse_context<Char>::check_dynamic_spec(
int arg_id) {
if (detail::is_constant_evaluated() &&
(!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) {
using context = detail::compile_parse_context<Char>;
static_cast<context*>(this)->check_dynamic_spec(arg_id);
}
}
FMT_EXPORT template <typename Context> class basic_format_arg;
FMT_EXPORT template <typename Context> class basic_format_args;
FMT_EXPORT template <typename Context> class dynamic_format_arg_store;
// A formatter for objects of type T.
FMT_EXPORT
template <typename T, typename Char = char, typename Enable = void>
struct formatter {
// A deleted default constructor indicates a disabled formatter.
formatter() = delete;
};
// Specifies if T has an enabled formatter specialization. A type can be
// formattable even if it doesn't have a formatter e.g. via a conversion.
template <typename T, typename Context>
using has_formatter =
std::is_constructible<typename Context::template formatter_type<T>>;
// An output iterator that appends to a buffer.
// It is used to reduce symbol sizes for the common case.
class appender : public std::back_insert_iterator<detail::buffer<char>> {
using base = std::back_insert_iterator<detail::buffer<char>>;
public:
using std::back_insert_iterator<detail::buffer<char>>::back_insert_iterator;
appender(base it) noexcept : base(it) {}
FMT_UNCHECKED_ITERATOR(appender);
auto operator++() noexcept -> appender& { return *this; }
auto operator++(int) noexcept -> appender { return *this; }
};
namespace detail {
template <typename Context, typename T>
constexpr auto has_const_formatter_impl(T*)
-> decltype(typename Context::template formatter_type<T>().format(
std::declval<const T&>(), std::declval<Context&>()),
true) {
return true;
}
template <typename Context>
constexpr auto has_const_formatter_impl(...) -> bool {
return false;
}
template <typename T, typename Context>
constexpr auto has_const_formatter() -> bool {
return has_const_formatter_impl<Context>(static_cast<T*>(nullptr));
}
template <typename T>
using buffer_appender = conditional_t<std::is_same<T, char>::value, appender,
@ -1274,9 +1282,9 @@ template <typename Context> class value {
FMT_INLINE value(const named_arg_info<char_type>* args, size_t size)
: named_args{args, size} {}
template <typename T> FMT_CONSTEXPR FMT_INLINE value(T& val) {
using value_type = remove_cvref_t<T>;
custom.value = const_cast<value_type*>(&val);
template <typename T> FMT_CONSTEXPR20 FMT_INLINE value(T& val) {
using value_type = remove_const_t<T>;
custom.value = const_cast<value_type*>(std::addressof(val));
// Get the formatter type through the context to allow different contexts
// have different extension points, e.g. `formatter<T>` for `format` and
// `printf_formatter<T>` for `printf`.
@ -1301,9 +1309,6 @@ template <typename Context> class value {
}
};
template <typename Context, typename T>
FMT_CONSTEXPR auto make_arg(T&& value) -> basic_format_arg<Context>;
// To minimize the number of types we need to deal with, long is translated
// either to int or to long long depending on its size.
enum { long_short = sizeof(long) == sizeof(int) };
@ -1415,9 +1420,8 @@ template <typename Context> struct arg_mapper {
FMT_ENABLE_IF(
std::is_pointer<T>::value || std::is_member_pointer<T>::value ||
std::is_function<typename std::remove_pointer<T>::type>::value ||
(std::is_convertible<const T&, const void*>::value &&
!std::is_convertible<const T&, const char_type*>::value &&
!has_formatter<T, Context>::value))>
(std::is_array<T>::value &&
!std::is_convertible<T, const char_type*>::value))>
FMT_CONSTEXPR auto map(const T&) -> unformattable_pointer {
return {};
}
@ -1435,30 +1439,28 @@ template <typename Context> struct arg_mapper {
return map(format_as(val));
}
template <typename T, typename U = remove_cvref_t<T>>
struct formattable
: bool_constant<has_const_formatter<U, Context>() ||
(has_formatter<U, Context>::value &&
!std::is_const<remove_reference_t<T>>::value)> {};
template <typename T, typename U = remove_const_t<T>>
struct formattable : bool_constant<has_const_formatter<U, Context>() ||
(has_formatter<U, Context>::value &&
!std::is_const<T>::value)> {};
template <typename T, FMT_ENABLE_IF(formattable<T>::value)>
FMT_CONSTEXPR FMT_INLINE auto do_map(T&& val) -> T& {
FMT_CONSTEXPR FMT_INLINE auto do_map(T& val) -> T& {
return val;
}
template <typename T, FMT_ENABLE_IF(!formattable<T>::value)>
FMT_CONSTEXPR FMT_INLINE auto do_map(T&&) -> unformattable {
FMT_CONSTEXPR FMT_INLINE auto do_map(T&) -> unformattable {
return {};
}
template <typename T, typename U = remove_cvref_t<T>,
template <typename T, typename U = remove_const_t<T>,
FMT_ENABLE_IF((std::is_class<U>::value || std::is_enum<U>::value ||
std::is_union<U>::value) &&
!is_string<U>::value && !is_char<U>::value &&
!is_named_arg<U>::value &&
!std::is_arithmetic<format_as_t<U>>::value)>
FMT_CONSTEXPR FMT_INLINE auto map(T&& val)
-> decltype(this->do_map(std::forward<T>(val))) {
return do_map(std::forward<T>(val));
FMT_CONSTEXPR FMT_INLINE auto map(T& val) -> decltype(this->do_map(val)) {
return do_map(val);
}
template <typename T, FMT_ENABLE_IF(is_named_arg<T>::value)>
@ -1481,22 +1483,121 @@ enum { packed_arg_bits = 4 };
enum { max_packed_args = 62 / packed_arg_bits };
enum : unsigned long long { is_unpacked_bit = 1ULL << 63 };
enum : unsigned long long { has_named_args_bit = 1ULL << 62 };
} // namespace detail
// An output iterator that appends to a buffer.
// It is used to reduce symbol sizes for the common case.
class appender : public std::back_insert_iterator<detail::buffer<char>> {
using base = std::back_insert_iterator<detail::buffer<char>>;
template <typename Char, typename InputIt>
auto copy_str(InputIt begin, InputIt end, appender out) -> appender {
get_container(out).append(begin, end);
return out;
}
template <typename Char, typename InputIt>
auto copy_str(InputIt begin, InputIt end,
std::back_insert_iterator<std::string> out)
-> std::back_insert_iterator<std::string> {
get_container(out).append(begin, end);
return out;
}
template <typename Char, typename R, typename OutputIt>
FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt {
return detail::copy_str<Char>(rng.begin(), rng.end(), out);
}
#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500
// A workaround for gcc 4.8 to make void_t work in a SFINAE context.
template <typename...> struct void_t_impl { using type = void; };
template <typename... T> using void_t = typename void_t_impl<T...>::type;
#else
template <typename...> using void_t = void;
#endif
template <typename It, typename T, typename Enable = void>
struct is_output_iterator : std::false_type {};
template <typename It, typename T>
struct is_output_iterator<
It, T,
void_t<typename std::iterator_traits<It>::iterator_category,
decltype(*std::declval<It>() = std::declval<T>())>>
: std::true_type {};
template <typename It> struct is_back_insert_iterator : std::false_type {};
template <typename Container>
struct is_back_insert_iterator<std::back_insert_iterator<Container>>
: std::true_type {};
// A type-erased reference to an std::locale to avoid a heavy <locale> include.
class locale_ref {
private:
const void* locale_; // A type-erased pointer to std::locale.
public:
using std::back_insert_iterator<detail::buffer<char>>::back_insert_iterator;
appender(base it) noexcept : base(it) {}
FMT_UNCHECKED_ITERATOR(appender);
constexpr FMT_INLINE locale_ref() : locale_(nullptr) {}
template <typename Locale> explicit locale_ref(const Locale& loc);
auto operator++() noexcept -> appender& { return *this; }
auto operator++(int) noexcept -> appender { return *this; }
explicit operator bool() const noexcept { return locale_ != nullptr; }
template <typename Locale> auto get() const -> Locale;
};
template <typename> constexpr auto encode_types() -> unsigned long long {
return 0;
}
template <typename Context, typename Arg, typename... Args>
constexpr auto encode_types() -> unsigned long long {
return static_cast<unsigned>(mapped_type_constant<Arg, Context>::value) |
(encode_types<Context, Args...>() << packed_arg_bits);
}
#if defined(__cpp_if_constexpr)
// This type is intentionally undefined, only used for errors
template <typename T, typename Char> struct type_is_unformattable_for;
#endif
template <bool PACKED, typename Context, typename T, FMT_ENABLE_IF(PACKED)>
FMT_CONSTEXPR FMT_INLINE auto make_arg(T& val) -> value<Context> {
using arg_type = remove_cvref_t<decltype(arg_mapper<Context>().map(val))>;
constexpr bool formattable_char =
!std::is_same<arg_type, unformattable_char>::value;
static_assert(formattable_char, "Mixing character types is disallowed.");
// Formatting of arbitrary pointers is disallowed. If you want to format a
// pointer cast it to `void*` or `const void*`. In particular, this forbids
// formatting of `[const] volatile char*` printed as bool by iostreams.
constexpr bool formattable_pointer =
!std::is_same<arg_type, unformattable_pointer>::value;
static_assert(formattable_pointer,
"Formatting of non-void pointers is disallowed.");
constexpr bool formattable = !std::is_same<arg_type, unformattable>::value;
#if defined(__cpp_if_constexpr)
if constexpr (!formattable) {
type_is_unformattable_for<T, typename Context::char_type> _;
}
#endif
static_assert(
formattable,
"Cannot format an argument. To make type T formattable provide a "
"formatter<T> specialization: https://fmt.dev/latest/api.html#udt");
return {arg_mapper<Context>().map(val)};
}
template <typename Context, typename T>
FMT_CONSTEXPR auto make_arg(T& val) -> basic_format_arg<Context> {
auto arg = basic_format_arg<Context>();
arg.type_ = mapped_type_constant<T, Context>::value;
arg.value_ = make_arg<true, Context>(val);
return arg;
}
template <bool PACKED, typename Context, typename T, FMT_ENABLE_IF(!PACKED)>
FMT_CONSTEXPR inline auto make_arg(T& val) -> basic_format_arg<Context> {
return make_arg<Context>(val);
}
} // namespace detail
FMT_BEGIN_EXPORT
// A formatting argument. It is a trivially copyable/constructible type to
// allow storage in basic_memory_buffer.
template <typename Context> class basic_format_arg {
@ -1505,7 +1606,7 @@ template <typename Context> class basic_format_arg {
detail::type type_;
template <typename ContextType, typename T>
friend FMT_CONSTEXPR auto detail::make_arg(T&& value)
friend FMT_CONSTEXPR auto detail::make_arg(T& value)
-> basic_format_arg<ContextType>;
template <typename Visitor, typename Ctx>
@ -1559,7 +1660,7 @@ template <typename Context> class basic_format_arg {
``vis(value)`` will be called with the value of type ``double``.
\endrst
*/
FMT_MODULE_EXPORT
// DEPRECATED!
template <typename Visitor, typename Context>
FMT_CONSTEXPR FMT_INLINE auto visit_format_arg(
Visitor&& vis, const basic_format_arg<Context>& arg) -> decltype(vis(0)) {
@ -1601,124 +1702,6 @@ FMT_CONSTEXPR FMT_INLINE auto visit_format_arg(
return vis(monostate());
}
namespace detail {
template <typename Char, typename InputIt>
auto copy_str(InputIt begin, InputIt end, appender out) -> appender {
get_container(out).append(begin, end);
return out;
}
template <typename Char, typename R, typename OutputIt>
FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt {
return detail::copy_str<Char>(rng.begin(), rng.end(), out);
}
#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500
// A workaround for gcc 4.8 to make void_t work in a SFINAE context.
template <typename...> struct void_t_impl { using type = void; };
template <typename... T> using void_t = typename void_t_impl<T...>::type;
#else
template <typename...> using void_t = void;
#endif
template <typename It, typename T, typename Enable = void>
struct is_output_iterator : std::false_type {};
template <typename It, typename T>
struct is_output_iterator<
It, T,
void_t<typename std::iterator_traits<It>::iterator_category,
decltype(*std::declval<It>() = std::declval<T>())>>
: std::true_type {};
template <typename It> struct is_back_insert_iterator : std::false_type {};
template <typename Container>
struct is_back_insert_iterator<std::back_insert_iterator<Container>>
: std::true_type {};
template <typename It>
struct is_contiguous_back_insert_iterator : std::false_type {};
template <typename Container>
struct is_contiguous_back_insert_iterator<std::back_insert_iterator<Container>>
: is_contiguous<Container> {};
template <>
struct is_contiguous_back_insert_iterator<appender> : std::true_type {};
// A type-erased reference to an std::locale to avoid a heavy <locale> include.
class locale_ref {
private:
const void* locale_; // A type-erased pointer to std::locale.
public:
constexpr FMT_INLINE locale_ref() : locale_(nullptr) {}
template <typename Locale> explicit locale_ref(const Locale& loc);
explicit operator bool() const noexcept { return locale_ != nullptr; }
template <typename Locale> auto get() const -> Locale;
};
template <typename> constexpr auto encode_types() -> unsigned long long {
return 0;
}
template <typename Context, typename Arg, typename... Args>
constexpr auto encode_types() -> unsigned long long {
return static_cast<unsigned>(mapped_type_constant<Arg, Context>::value) |
(encode_types<Context, Args...>() << packed_arg_bits);
}
template <typename Context, typename T>
FMT_CONSTEXPR FMT_INLINE auto make_value(T&& val) -> value<Context> {
auto&& arg = arg_mapper<Context>().map(FMT_FORWARD(val));
using arg_type = remove_cvref_t<decltype(arg)>;
constexpr bool formattable_char =
!std::is_same<arg_type, unformattable_char>::value;
static_assert(formattable_char, "Mixing character types is disallowed.");
// Formatting of arbitrary pointers is disallowed. If you want to format a
// pointer cast it to `void*` or `const void*`. In particular, this forbids
// formatting of `[const] volatile char*` printed as bool by iostreams.
constexpr bool formattable_pointer =
!std::is_same<arg_type, unformattable_pointer>::value;
static_assert(formattable_pointer,
"Formatting of non-void pointers is disallowed.");
constexpr bool formattable = !std::is_same<arg_type, unformattable>::value;
static_assert(
formattable,
"Cannot format an argument. To make type T formattable provide a "
"formatter<T> specialization: https://fmt.dev/latest/api.html#udt");
return {arg};
}
template <typename Context, typename T>
FMT_CONSTEXPR auto make_arg(T&& value) -> basic_format_arg<Context> {
auto arg = basic_format_arg<Context>();
arg.type_ = mapped_type_constant<T, Context>::value;
arg.value_ = make_value<Context>(value);
return arg;
}
// The DEPRECATED type template parameter is there to avoid an ODR violation
// when using a fallback formatter in one translation unit and an implicit
// conversion in another (not recommended).
template <bool IS_PACKED, typename Context, type, typename T,
FMT_ENABLE_IF(IS_PACKED)>
FMT_CONSTEXPR FMT_INLINE auto make_arg(T&& val) -> value<Context> {
return make_value<Context>(val);
}
template <bool IS_PACKED, typename Context, type, typename T,
FMT_ENABLE_IF(!IS_PACKED)>
FMT_CONSTEXPR inline auto make_arg(T&& value) -> basic_format_arg<Context> {
return make_arg<Context>(value);
}
} // namespace detail
FMT_BEGIN_EXPORT
// Formatting context.
template <typename OutputIt, typename Char> class basic_format_context {
private:
@ -1778,7 +1761,7 @@ using format_context = buffer_context<char>;
template <typename T, typename Char = char>
using is_formattable = bool_constant<!std::is_base_of<
detail::unformattable, decltype(detail::arg_mapper<buffer_context<Char>>()
.map(std::declval<T>()))>::value>;
.map(std::declval<T&>()))>::value>;
/**
\rst
@ -1796,7 +1779,7 @@ class format_arg_store
{
private:
static const size_t num_args = sizeof...(Args);
static const size_t num_named_args = detail::count_named_args<Args...>();
static constexpr size_t num_named_args = detail::count_named_args<Args...>();
static const bool is_packed = num_args <= detail::max_packed_args;
using value_type = conditional_t<is_packed, detail::value<Context>,
@ -1817,16 +1800,14 @@ class format_arg_store
public:
template <typename... T>
FMT_CONSTEXPR FMT_INLINE format_arg_store(T&&... args)
FMT_CONSTEXPR FMT_INLINE format_arg_store(T&... args)
:
#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409
basic_format_args<Context>(*this),
#endif
data_{detail::make_arg<
is_packed, Context,
detail::mapped_type_constant<remove_cvref_t<T>, Context>::value>(
FMT_FORWARD(args))...} {
detail::init_named_args(data_.named_args(), 0, 0, args...);
data_{detail::make_arg<is_packed, Context>(args)...} {
if (detail::const_check(num_named_args != 0))
detail::init_named_args(data_.named_args(), 0, 0, args...);
}
};
@ -1834,14 +1815,15 @@ class format_arg_store
\rst
Constructs a `~fmt::format_arg_store` object that contains references to
arguments and can be implicitly converted to `~fmt::format_args`. `Context`
can be omitted in which case it defaults to `~fmt::context`.
can be omitted in which case it defaults to `~fmt::format_context`.
See `~fmt::arg` for lifetime considerations.
\endrst
*/
// Arguments are taken by lvalue references to avoid some lifetime issues.
template <typename Context = format_context, typename... T>
constexpr auto make_format_args(T&&... args)
constexpr auto make_format_args(T&... args)
-> format_arg_store<Context, remove_cvref_t<T>...> {
return {FMT_FORWARD(args)...};
return {args...};
}
/**
@ -1869,7 +1851,7 @@ FMT_END_EXPORT
``vformat``::
void vlog(string_view format_str, format_args args); // OK
format_args args = make_format_args(42); // Error: dangling reference
format_args args = make_format_args(); // Error: dangling reference
\endrst
*/
template <typename Context> class basic_format_args {
@ -1986,7 +1968,7 @@ template <typename Context> class basic_format_args {
/** An alias to ``basic_format_args<format_context>``. */
// A separate type would result in shorter symbols but break ABI compatibility
// between clang and gcc on ARM (#1919).
FMT_MODULE_EXPORT using format_args = basic_format_args<format_context>;
FMT_EXPORT using format_args = basic_format_args<format_context>;
// We cannot use enum classes as bit fields because of a gcc bug, so we put them
// in namespaces instead (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414).
@ -2558,7 +2540,17 @@ FMT_CONSTEXPR auto parse_format_specs(ParseContext& ctx)
mapped_type_constant<T, context>::value != type::custom_type,
decltype(arg_mapper<context>().map(std::declval<const T&>())),
typename strip_named_arg<T>::type>;
#if defined(__cpp_if_constexpr)
if constexpr (std::is_default_constructible_v<
formatter<mapped_type, char_type>>) {
return formatter<mapped_type, char_type>().parse(ctx);
} else {
type_is_unformattable_for<T, char_type> _;
return ctx.begin();
}
#else
return formatter<mapped_type, char_type>().parse(ctx);
#endif
}
// Checks char specs and returns true iff the presentation type is char-like.
@ -2574,8 +2566,6 @@ FMT_CONSTEXPR auto check_char_specs(const format_specs<Char>& specs) -> bool {
return true;
}
constexpr FMT_INLINE_VARIABLE int invalid_arg_index = -1;
#if FMT_USE_NONTYPE_TEMPLATE_ARGS
template <int N, typename T, typename... Args, typename Char>
constexpr auto get_arg_index_by_name(basic_string_view<Char> name) -> int {
@ -2585,7 +2575,7 @@ constexpr auto get_arg_index_by_name(basic_string_view<Char> name) -> int {
if constexpr (sizeof...(Args) > 0)
return get_arg_index_by_name<N + 1, Args...>(name);
(void)name; // Workaround an MSVC bug about "unused" parameter.
return invalid_arg_index;
return -1;
}
#endif
@ -2596,7 +2586,7 @@ FMT_CONSTEXPR auto get_arg_index_by_name(basic_string_view<Char> name) -> int {
return get_arg_index_by_name<0, Args...>(name);
#endif
(void)name;
return invalid_arg_index;
return -1;
}
template <typename Char, typename... Args> class format_string_checker {
@ -2610,15 +2600,15 @@ template <typename Char, typename... Args> class format_string_checker {
// needed for compile-time checks: https://godbolt.org/z/GvWzcTjh1.
using parse_func = const Char* (*)(parse_context_type&);
type types_[num_args > 0 ? static_cast<size_t>(num_args) : 1];
parse_context_type context_;
parse_func parse_funcs_[num_args > 0 ? static_cast<size_t>(num_args) : 1];
type types_[num_args > 0 ? static_cast<size_t>(num_args) : 1];
public:
explicit FMT_CONSTEXPR format_string_checker(basic_string_view<Char> fmt)
: context_(fmt, num_args, types_),
parse_funcs_{&parse_format_specs<Args, parse_context_type>...},
types_{mapped_type_constant<Args, buffer_context<Char>>::value...} {}
: types_{mapped_type_constant<Args, buffer_context<Char>>::value...},
context_(fmt, num_args, types_),
parse_funcs_{&parse_format_specs<Args, parse_context_type>...} {}
FMT_CONSTEXPR void on_text(const Char*, const Char*) {}
@ -2629,7 +2619,7 @@ template <typename Char, typename... Args> class format_string_checker {
FMT_CONSTEXPR auto on_arg_id(basic_string_view<Char> id) -> int {
#if FMT_USE_NONTYPE_TEMPLATE_ARGS
auto index = get_arg_index_by_name<Args...>(id);
if (index == invalid_arg_index) on_error("named argument is not found");
if (index < 0) on_error("named argument is not found");
return index;
#else
(void)id;
@ -2638,7 +2628,9 @@ template <typename Char, typename... Args> class format_string_checker {
#endif
}
FMT_CONSTEXPR void on_replacement_field(int, const Char*) {}
FMT_CONSTEXPR void on_replacement_field(int id, const Char* begin) {
on_format_specs(id, begin, begin); // Call parse() on empty specs.
}
FMT_CONSTEXPR auto on_format_specs(int id, const Char* begin, const Char*)
-> const Char* {
@ -2721,27 +2713,6 @@ struct formatter<T, Char,
-> decltype(ctx.out());
};
#define FMT_FORMAT_AS(Type, Base) \
template <typename Char> \
struct formatter<Type, Char> : formatter<Base, Char> { \
template <typename FormatContext> \
auto format(const Type& val, FormatContext& ctx) const \
-> decltype(ctx.out()) { \
return formatter<Base, Char>::format(static_cast<Base>(val), ctx); \
} \
}
FMT_FORMAT_AS(signed char, int);
FMT_FORMAT_AS(unsigned char, unsigned);
FMT_FORMAT_AS(short, int);
FMT_FORMAT_AS(unsigned short, unsigned);
FMT_FORMAT_AS(long, long long);
FMT_FORMAT_AS(unsigned long, unsigned long long);
FMT_FORMAT_AS(Char*, const Char*);
FMT_FORMAT_AS(std::basic_string<Char>, basic_string_view<Char>);
FMT_FORMAT_AS(std::nullptr_t, const void*);
FMT_FORMAT_AS(detail::std_string_view<Char>, basic_string_view<Char>);
template <typename Char = char> struct runtime_format_string {
basic_string_view<Char> str;
};

View File

@ -1128,16 +1128,12 @@ bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept {
}
// Remove trailing zeros from n and return the number of zeros removed (float)
FMT_INLINE int remove_trailing_zeros(uint32_t& n) noexcept {
FMT_INLINE int remove_trailing_zeros(uint32_t& n, int s = 0) noexcept {
FMT_ASSERT(n != 0, "");
// Modular inverse of 5 (mod 2^32): (mod_inv_5 * 5) mod 2^32 = 1.
// See https://github.com/fmtlib/fmt/issues/3163 for more details.
const uint32_t mod_inv_5 = 0xcccccccd;
// Casts are needed to workaround a bug in MSVC 19.22 and older.
const uint32_t mod_inv_25 =
static_cast<uint32_t>(uint64_t(mod_inv_5) * mod_inv_5);
constexpr uint32_t mod_inv_5 = 0xcccccccd;
constexpr uint32_t mod_inv_25 = 0xc28f5c29; // = mod_inv_5 * mod_inv_5
int s = 0;
while (true) {
auto q = rotr(n * mod_inv_25, 2);
if (q > max_value<uint32_t>() / 100) break;
@ -1162,32 +1158,17 @@ FMT_INLINE int remove_trailing_zeros(uint64_t& n) noexcept {
// Is n is divisible by 10^8?
if ((nm.high() & ((1ull << (90 - 64)) - 1)) == 0 && nm.low() < magic_number) {
// If yes, work with the quotient.
// If yes, work with the quotient...
auto n32 = static_cast<uint32_t>(nm.high() >> (90 - 64));
const uint32_t mod_inv_5 = 0xcccccccd;
const uint32_t mod_inv_25 = mod_inv_5 * mod_inv_5;
int s = 8;
while (true) {
auto q = rotr(n32 * mod_inv_25, 2);
if (q > max_value<uint32_t>() / 100) break;
n32 = q;
s += 2;
}
auto q = rotr(n32 * mod_inv_5, 1);
if (q <= max_value<uint32_t>() / 10) {
n32 = q;
s |= 1;
}
// ... and use the 32 bit variant of the function
int s = remove_trailing_zeros(n32, 8);
n = n32;
return s;
}
// If n is not divisible by 10^8, work with n itself.
const uint64_t mod_inv_5 = 0xcccccccccccccccd;
const uint64_t mod_inv_25 = mod_inv_5 * mod_inv_5;
constexpr uint64_t mod_inv_5 = 0xcccccccccccccccd;
constexpr uint64_t mod_inv_25 = 0x8f5c28f5c28f5c29; // = mod_inv_5 * mod_inv_5
int s = 0;
while (true) {
@ -1458,7 +1439,7 @@ FMT_FUNC bool write_console(std::FILE* f, string_view text) {
auto u16 = utf8_to_utf16(text);
auto written = dword();
return WriteConsoleW(reinterpret_cast<void*>(_get_osfhandle(fd)), u16.c_str(),
static_cast<uint32_t>(u16.size()), &written, nullptr);
static_cast<uint32_t>(u16.size()), &written, nullptr) != 0;
}
// Print assuming legacy (non-Unicode) encoding.

View File

@ -48,9 +48,10 @@
#include "core.h"
#ifndef FMT_BEGIN_DETAIL_NAMESPACE
# define FMT_BEGIN_DETAIL_NAMESPACE namespace detail {
# define FMT_END_DETAIL_NAMESPACE }
#if defined __cpp_inline_variables && __cpp_inline_variables >= 201606L
# define FMT_INLINE_VARIABLE inline
#else
# define FMT_INLINE_VARIABLE
#endif
#if FMT_HAS_CPP17_ATTRIBUTE(fallthrough)
@ -78,16 +79,24 @@
# endif
#endif
#if FMT_GCC_VERSION
# define FMT_GCC_VISIBILITY_HIDDEN __attribute__((visibility("hidden")))
#else
# define FMT_GCC_VISIBILITY_HIDDEN
#ifndef FMT_NO_UNIQUE_ADDRESS
# if FMT_CPLUSPLUS >= 202002L
# if FMT_HAS_CPP_ATTRIBUTE(no_unique_address)
# define FMT_NO_UNIQUE_ADDRESS [[no_unique_address]]
// VS2019 v16.10 and later except clang-cl (https://reviews.llvm.org/D110485)
# elif (FMT_MSC_VERSION >= 1929) && !FMT_CLANG_VERSION
# define FMT_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]]
# endif
# endif
#endif
#ifndef FMT_NO_UNIQUE_ADDRESS
# define FMT_NO_UNIQUE_ADDRESS
#endif
#ifdef __NVCC__
# define FMT_CUDA_VERSION (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__)
#if FMT_GCC_VERSION || defined(__clang__)
# define FMT_VISIBILITY(value) __attribute__((visibility(value)))
#else
# define FMT_CUDA_VERSION 0
# define FMT_VISIBILITY(value)
#endif
#ifdef __has_builtin
@ -120,10 +129,8 @@ FMT_END_NAMESPACE
# define FMT_THROW(x) throw x
# endif
# else
# define FMT_THROW(x) \
do { \
FMT_ASSERT(false, (x).what()); \
} while (false)
# define FMT_THROW(x) \
::fmt::detail::assert_fail(__FILE__, __LINE__, (x).what())
# endif
#endif
@ -362,8 +369,6 @@ class uint128_fallback {
private:
uint64_t lo_, hi_;
friend uint128_fallback umul128(uint64_t x, uint64_t y) noexcept;
public:
constexpr uint128_fallback(uint64_t hi, uint64_t lo) : lo_(lo), hi_(hi) {}
constexpr uint128_fallback(uint64_t value = 0) : lo_(value), hi_(0) {}
@ -536,6 +541,8 @@ FMT_INLINE void assume(bool condition) {
(void)condition;
#if FMT_HAS_BUILTIN(__builtin_assume) && !FMT_ICC_VERSION
__builtin_assume(condition);
#elif FMT_GCC_VERSION
if (!condition) __builtin_unreachable();
#endif
}
@ -554,20 +561,6 @@ inline auto get_data(Container& c) -> typename Container::value_type* {
return c.data();
}
#if defined(_SECURE_SCL) && _SECURE_SCL
// Make a checked iterator to avoid MSVC warnings.
template <typename T> using checked_ptr = stdext::checked_array_iterator<T*>;
template <typename T>
constexpr auto make_checked(T* p, size_t size) -> checked_ptr<T> {
return {p, size};
}
#else
template <typename T> using checked_ptr = T*;
template <typename T> constexpr auto make_checked(T* p, size_t) -> T* {
return p;
}
#endif
// Attempts to reserve space for n extra characters in the output range.
// Returns a pointer to the reserved range or a reference to it.
template <typename Container, FMT_ENABLE_IF(is_contiguous<Container>::value)>
@ -575,12 +568,12 @@ template <typename Container, FMT_ENABLE_IF(is_contiguous<Container>::value)>
__attribute__((no_sanitize("undefined")))
#endif
inline auto
reserve(std::back_insert_iterator<Container> it, size_t n)
-> checked_ptr<typename Container::value_type> {
reserve(std::back_insert_iterator<Container> it, size_t n) ->
typename Container::value_type* {
Container& c = get_container(it);
size_t size = c.size();
c.resize(size + n);
return make_checked(get_data(c) + size, n);
return get_data(c) + size;
}
template <typename T>
@ -612,8 +605,8 @@ template <typename T> auto to_pointer(buffer_appender<T> it, size_t n) -> T* {
}
template <typename Container, FMT_ENABLE_IF(is_contiguous<Container>::value)>
inline auto base_iterator(std::back_insert_iterator<Container>& it,
checked_ptr<typename Container::value_type>)
inline auto base_iterator(std::back_insert_iterator<Container> it,
typename Container::value_type*)
-> std::back_insert_iterator<Container> {
return it;
}
@ -881,7 +874,7 @@ void buffer<T>::append(const U* begin, const U* end) {
try_reserve(size_ + count);
auto free_cap = capacity_ - size_;
if (free_cap < count) count = free_cap;
std::uninitialized_copy_n(begin, count, make_checked(ptr_ + size_, count));
std::uninitialized_copy_n(begin, count, ptr_ + size_);
size_ += count;
begin += count;
}
@ -926,8 +919,8 @@ class basic_memory_buffer final : public detail::buffer<T> {
private:
T store_[SIZE];
// Don't inherit from Allocator avoid generating type_info for it.
Allocator alloc_;
// Don't inherit from Allocator to avoid generating type_info for it.
FMT_NO_UNIQUE_ADDRESS Allocator alloc_;
// Deallocate memory allocated by the buffer.
FMT_CONSTEXPR20 void deallocate() {
@ -948,9 +941,10 @@ class basic_memory_buffer final : public detail::buffer<T> {
T* old_data = this->data();
T* new_data =
std::allocator_traits<Allocator>::allocate(alloc_, new_capacity);
// Suppress a bogus -Wstringop-overflow in gcc 13.1 (#3481).
detail::assume(this->size() <= new_capacity);
// The following code doesn't throw, so the raw pointer above doesn't leak.
std::uninitialized_copy(old_data, old_data + this->size(),
detail::make_checked(new_data, new_capacity));
std::uninitialized_copy_n(old_data, this->size(), new_data);
this->set(new_data, new_capacity);
// deallocate must not throw according to the standard, but even if it does,
// the buffer already uses the new storage and will deallocate it in
@ -978,8 +972,7 @@ class basic_memory_buffer final : public detail::buffer<T> {
size_t size = other.size(), capacity = other.capacity();
if (data == other.store_) {
this->set(store_, capacity);
detail::copy_str<T>(other.store_, other.store_ + size,
detail::make_checked(store_, capacity));
detail::copy_str<T>(other.store_, other.store_ + size, store_);
} else {
this->set(data, capacity);
// Set pointer to the inline array so that delete is not called
@ -1044,6 +1037,7 @@ namespace detail {
FMT_API bool write_console(std::FILE* f, string_view text);
FMT_API void print(std::FILE*, string_view);
} // namespace detail
FMT_BEGIN_EXPORT
// Suppress a misleading warning in older versions of clang.
@ -1052,7 +1046,7 @@ FMT_BEGIN_EXPORT
#endif
/** An error reported from a formatting function. */
class FMT_API format_error : public std::runtime_error {
class FMT_VISIBILITY("default") format_error : public std::runtime_error {
public:
using std::runtime_error::runtime_error;
};
@ -1128,7 +1122,7 @@ template <typename Locale> class format_facet : public Locale::facet {
}
};
FMT_BEGIN_DETAIL_NAMESPACE
namespace detail {
// Returns true if value is negative, false otherwise.
// Same as `value < 0` but doesn't produce warnings if T is an unsigned type.
@ -1257,7 +1251,7 @@ FMT_CONSTEXPR auto count_digits(UInt n) -> int {
FMT_INLINE auto do_count_digits(uint32_t n) -> int {
// An optimization by Kendall Willets from https://bit.ly/3uOIQrB.
// This increments the upper 32 bits (log10(T) - 1) when >= T is added.
# define FMT_INC(T) (((sizeof(# T) - 1ull) << 32) - T)
# define FMT_INC(T) (((sizeof(#T) - 1ull) << 32) - T)
static constexpr uint64_t table[] = {
FMT_INC(0), FMT_INC(0), FMT_INC(0), // 8
FMT_INC(10), FMT_INC(10), FMT_INC(10), // 64
@ -1393,8 +1387,8 @@ FMT_CONSTEXPR auto format_uint(Char* buffer, UInt value, int num_digits,
}
template <unsigned BASE_BITS, typename Char, typename It, typename UInt>
inline auto format_uint(It out, UInt value, int num_digits, bool upper = false)
-> It {
FMT_CONSTEXPR inline auto format_uint(It out, UInt value, int num_digits,
bool upper = false) -> It {
if (auto ptr = to_pointer<Char>(out, to_unsigned(num_digits))) {
format_uint<BASE_BITS>(ptr, value, num_digits, upper);
return out;
@ -1418,19 +1412,20 @@ class utf8_to_utf16 {
auto str() const -> std::wstring { return {&buffer_[0], size()}; }
};
enum class to_utf8_error_policy { abort, replace };
// A converter from UTF-16/UTF-32 (host endian) to UTF-8.
template <typename WChar, typename Buffer = memory_buffer>
class unicode_to_utf8 {
template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
private:
Buffer buffer_;
public:
unicode_to_utf8() {}
explicit unicode_to_utf8(basic_string_view<WChar> s) {
to_utf8() {}
explicit to_utf8(basic_string_view<WChar> s,
to_utf8_error_policy policy = to_utf8_error_policy::abort) {
static_assert(sizeof(WChar) == 2 || sizeof(WChar) == 4,
"Expect utf16 or utf32");
if (!convert(s))
if (!convert(s, policy))
FMT_THROW(std::runtime_error(sizeof(WChar) == 2 ? "invalid utf16"
: "invalid utf32"));
}
@ -1442,23 +1437,28 @@ class unicode_to_utf8 {
// Performs conversion returning a bool instead of throwing exception on
// conversion error. This method may still throw in case of memory allocation
// error.
bool convert(basic_string_view<WChar> s) {
if (!convert(buffer_, s)) return false;
bool convert(basic_string_view<WChar> s,
to_utf8_error_policy policy = to_utf8_error_policy::abort) {
if (!convert(buffer_, s, policy)) return false;
buffer_.push_back(0);
return true;
}
static bool convert(Buffer& buf, basic_string_view<WChar> s) {
static bool convert(
Buffer& buf, basic_string_view<WChar> s,
to_utf8_error_policy policy = to_utf8_error_policy::abort) {
for (auto p = s.begin(); p != s.end(); ++p) {
uint32_t c = static_cast<uint32_t>(*p);
if (sizeof(WChar) == 2 && c >= 0xd800 && c <= 0xdfff) {
// surrogate pair
// Handle a surrogate pair.
++p;
if (p == s.end() || (c & 0xfc00) != 0xd800 || (*p & 0xfc00) != 0xdc00) {
return false;
if (policy == to_utf8_error_policy::abort) return false;
buf.append(string_view("\xEF\xBF\xBD"));
--p;
} else {
c = (c << 10) + static_cast<uint32_t>(*p) - 0x35fdc00;
}
c = (c << 10) + static_cast<uint32_t>(*p) - 0x35fdc00;
}
if (c < 0x80) {
} else if (c < 0x80) {
buf.push_back(static_cast<char>(c));
} else if (c < 0x800) {
buf.push_back(static_cast<char>(0xc0 | (c >> 6)));
@ -1486,9 +1486,9 @@ inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept {
auto p = static_cast<uint128_opt>(x) * static_cast<uint128_opt>(y);
return {static_cast<uint64_t>(p >> 64), static_cast<uint64_t>(p)};
#elif defined(_MSC_VER) && defined(_M_X64)
auto result = uint128_fallback();
result.lo_ = _umul128(x, y, &result.hi_);
return result;
auto hi = uint64_t();
auto lo = _umul128(x, y, &hi);
return {hi, lo};
#else
const uint64_t mask = static_cast<uint64_t>(max_value<uint32_t>());
@ -1737,119 +1737,31 @@ FMT_CONSTEXPR inline fp operator*(fp x, fp y) {
}
template <typename T = void> struct basic_data {
// Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340.
// These are generated by support/compute-powers.py.
static constexpr uint64_t pow10_significands[87] = {
0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76,
0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df,
0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c,
0x8dd01fad907ffc3c, 0xd3515c2831559a83, 0x9d71ac8fada6c9b5,
0xea9c227723ee8bcb, 0xaecc49914078536d, 0x823c12795db6ce57,
0xc21094364dfb5637, 0x9096ea6f3848984f, 0xd77485cb25823ac7,
0xa086cfcd97bf97f4, 0xef340a98172aace5, 0xb23867fb2a35b28e,
0x84c8d4dfd2c63f3b, 0xc5dd44271ad3cdba, 0x936b9fcebb25c996,
0xdbac6c247d62a584, 0xa3ab66580d5fdaf6, 0xf3e2f893dec3f126,
0xb5b5ada8aaff80b8, 0x87625f056c7c4a8b, 0xc9bcff6034c13053,
0x964e858c91ba2655, 0xdff9772470297ebd, 0xa6dfbd9fb8e5b88f,
0xf8a95fcf88747d94, 0xb94470938fa89bcf, 0x8a08f0f8bf0f156b,
0xcdb02555653131b6, 0x993fe2c6d07b7fac, 0xe45c10c42a2b3b06,
0xaa242499697392d3, 0xfd87b5f28300ca0e, 0xbce5086492111aeb,
0x8cbccc096f5088cc, 0xd1b71758e219652c, 0x9c40000000000000,
0xe8d4a51000000000, 0xad78ebc5ac620000, 0x813f3978f8940984,
0xc097ce7bc90715b3, 0x8f7e32ce7bea5c70, 0xd5d238a4abe98068,
0x9f4f2726179a2245, 0xed63a231d4c4fb27, 0xb0de65388cc8ada8,
0x83c7088e1aab65db, 0xc45d1df942711d9a, 0x924d692ca61be758,
0xda01ee641a708dea, 0xa26da3999aef774a, 0xf209787bb47d6b85,
0xb454e4a179dd1877, 0x865b86925b9bc5c2, 0xc83553c5c8965d3d,
0x952ab45cfa97a0b3, 0xde469fbd99a05fe3, 0xa59bc234db398c25,
0xf6c69a72a3989f5c, 0xb7dcbf5354e9bece, 0x88fcf317f22241e2,
0xcc20ce9bd35c78a5, 0x98165af37b2153df, 0xe2a0b5dc971f303a,
0xa8d9d1535ce3b396, 0xfb9b7cd9a4a7443c, 0xbb764c4ca7a44410,
0x8bab8eefb6409c1a, 0xd01fef10a657842c, 0x9b10a4e5e9913129,
0xe7109bfba19c0c9d, 0xac2820d9623bf429, 0x80444b5e7aa7cf85,
0xbf21e44003acdd2d, 0x8e679c2f5e44ff8f, 0xd433179d9c8cb841,
0x9e19db92b4e31ba9, 0xeb96bf6ebadf77d9, 0xaf87023b9bf0ee6b,
};
#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wnarrowing"
#endif
// Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding
// to significands above.
static constexpr int16_t pow10_exponents[87] = {
-1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954,
-927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661,
-635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369,
-343, -316, -289, -263, -236, -210, -183, -157, -130, -103, -77,
-50, -24, 3, 30, 56, 83, 109, 136, 162, 189, 216,
242, 269, 295, 322, 348, 375, 402, 428, 455, 481, 508,
534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800,
827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066};
#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409
# pragma GCC diagnostic pop
#endif
static constexpr uint64_t power_of_10_64[20] = {
1, FMT_POWERS_OF_10(1ULL), FMT_POWERS_OF_10(1000000000ULL),
10000000000000000000ULL};
// For checking rounding thresholds.
// The kth entry is chosen to be the smallest integer such that the
// upper 32-bits of 10^(k+1) times it is strictly bigger than 5 * 10^k.
static constexpr uint32_t fractional_part_rounding_thresholds[8] = {
2576980378, // ceil(2^31 + 2^32/10^1)
2190433321, // ceil(2^31 + 2^32/10^2)
2151778616, // ceil(2^31 + 2^32/10^3)
2147913145, // ceil(2^31 + 2^32/10^4)
2147526598, // ceil(2^31 + 2^32/10^5)
2147487943, // ceil(2^31 + 2^32/10^6)
2147484078, // ceil(2^31 + 2^32/10^7)
2147483691 // ceil(2^31 + 2^32/10^8)
2576980378U, // ceil(2^31 + 2^32/10^1)
2190433321U, // ceil(2^31 + 2^32/10^2)
2151778616U, // ceil(2^31 + 2^32/10^3)
2147913145U, // ceil(2^31 + 2^32/10^4)
2147526598U, // ceil(2^31 + 2^32/10^5)
2147487943U, // ceil(2^31 + 2^32/10^6)
2147484078U, // ceil(2^31 + 2^32/10^7)
2147483691U // ceil(2^31 + 2^32/10^8)
};
};
// This is a struct rather than an alias to avoid shadowing warnings in gcc.
struct data : basic_data<> {};
#if FMT_CPLUSPLUS < 201703L
template <typename T> constexpr uint64_t basic_data<T>::pow10_significands[];
template <typename T> constexpr int16_t basic_data<T>::pow10_exponents[];
template <typename T> constexpr uint64_t basic_data<T>::power_of_10_64[];
template <typename T>
constexpr uint32_t basic_data<T>::fractional_part_rounding_thresholds[];
#endif
// This is a struct rather than an alias to avoid shadowing warnings in gcc.
struct data : basic_data<> {};
// Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its
// (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`.
FMT_CONSTEXPR inline fp get_cached_power(int min_exponent,
int& pow10_exponent) {
const int shift = 32;
// log10(2) = 0x0.4d104d427de7fbcc...
const int64_t significand = 0x4d104d427de7fbcc;
int index = static_cast<int>(
((min_exponent + fp::num_significand_bits - 1) * (significand >> shift) +
((int64_t(1) << shift) - 1)) // ceil
>> 32 // arithmetic shift
);
// Decimal exponent of the first (smallest) cached power of 10.
const int first_dec_exp = -348;
// Difference between 2 consecutive decimal exponents in cached powers of 10.
const int dec_exp_step = 8;
index = (index - first_dec_exp - 1) / dec_exp_step + 1;
pow10_exponent = first_dec_exp + index * dec_exp_step;
// Using *(x + index) instead of x[index] avoids an issue with some compilers
// using the EDG frontend (e.g. nvhpc/22.3 in C++17 mode).
return {*(data::pow10_significands + index),
*(data::pow10_exponents + index)};
}
template <typename T>
template <typename T, bool doublish = num_bits<T>() == num_bits<double>()>
using convert_float_result =
conditional_t<std::is_same<T, float>::value ||
std::numeric_limits<T>::digits ==
std::numeric_limits<double>::digits,
double, T>;
conditional_t<std::is_same<T, float>::value || doublish, double, T>;
template <typename T>
constexpr auto convert_float(T value) -> convert_float_result<T> {
@ -1970,7 +1882,7 @@ inline auto find_escape(const char* begin, const char* end)
[] { \
/* Use the hidden visibility as a workaround for a GCC bug (#1973). */ \
/* Use a macro-like name to avoid shadowing warnings. */ \
struct FMT_GCC_VISIBILITY_HIDDEN FMT_COMPILE_STRING : base { \
struct FMT_VISIBILITY("hidden") FMT_COMPILE_STRING : base { \
using char_type FMT_MAYBE_UNUSED = fmt::remove_cvref_t<decltype(s[0])>; \
FMT_MAYBE_UNUSED FMT_CONSTEXPR explicit \
operator fmt::basic_string_view<char_type>() const { \
@ -2475,6 +2387,49 @@ FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt {
return base_iterator(out, it);
}
// DEPRECATED!
template <typename Char>
FMT_CONSTEXPR auto parse_align(const Char* begin, const Char* end,
format_specs<Char>& specs) -> const Char* {
FMT_ASSERT(begin != end, "");
auto align = align::none;
auto p = begin + code_point_length(begin);
if (end - p <= 0) p = begin;
for (;;) {
switch (to_ascii(*p)) {
case '<':
align = align::left;
break;
case '>':
align = align::right;
break;
case '^':
align = align::center;
break;
}
if (align != align::none) {
if (p != begin) {
auto c = *begin;
if (c == '}') return begin;
if (c == '{') {
throw_format_error("invalid fill character '{'");
return begin;
}
specs.fill = {begin, to_unsigned(p - begin)};
begin = p + 1;
} else {
++begin;
}
break;
} else if (p == begin) {
break;
}
p = begin;
}
specs.align = align;
return begin;
}
// A floating-point presentation format.
enum class float_format : unsigned char {
general, // General: exponent notation or fixed point based on magnitude.
@ -2833,78 +2788,6 @@ FMT_INLINE FMT_CONSTEXPR bool signbit(T value) {
return std::signbit(static_cast<double>(value));
}
enum class round_direction { unknown, up, down };
// Given the divisor (normally a power of 10), the remainder = v % divisor for
// some number v and the error, returns whether v should be rounded up, down, or
// whether the rounding direction can't be determined due to error.
// error should be less than divisor / 2.
FMT_CONSTEXPR inline round_direction get_round_direction(uint64_t divisor,
uint64_t remainder,
uint64_t error) {
FMT_ASSERT(remainder < divisor, ""); // divisor - remainder won't overflow.
FMT_ASSERT(error < divisor, ""); // divisor - error won't overflow.
FMT_ASSERT(error < divisor - error, ""); // error * 2 won't overflow.
// Round down if (remainder + error) * 2 <= divisor.
if (remainder <= divisor - remainder && error * 2 <= divisor - remainder * 2)
return round_direction::down;
// Round up if (remainder - error) * 2 >= divisor.
if (remainder >= error &&
remainder - error >= divisor - (remainder - error)) {
return round_direction::up;
}
return round_direction::unknown;
}
namespace digits {
enum result {
more, // Generate more digits.
done, // Done generating digits.
error // Digit generation cancelled due to an error.
};
}
struct gen_digits_handler {
char* buf;
int size;
int precision;
int exp10;
bool fixed;
FMT_CONSTEXPR digits::result on_digit(char digit, uint64_t divisor,
uint64_t remainder, uint64_t error,
bool integral) {
FMT_ASSERT(remainder < divisor, "");
buf[size++] = digit;
if (!integral && error >= remainder) return digits::error;
if (size < precision) return digits::more;
if (!integral) {
// Check if error * 2 < divisor with overflow prevention.
// The check is not needed for the integral part because error = 1
// and divisor > (1 << 32) there.
if (error >= divisor || error >= divisor - error) return digits::error;
} else {
FMT_ASSERT(error == 1 && divisor > 2, "");
}
auto dir = get_round_direction(divisor, remainder, error);
if (dir != round_direction::up)
return dir == round_direction::down ? digits::done : digits::error;
++buf[size - 1];
for (int i = size - 1; i > 0 && buf[i] > '9'; --i) {
buf[i] = '0';
++buf[i - 1];
}
if (buf[0] > '9') {
buf[0] = '1';
if (fixed)
buf[size++] = '0';
else
++exp10;
}
return digits::done;
}
};
inline FMT_CONSTEXPR20 void adjust_precision(int& precision, int exp10) {
// Adjust fixed precision by exponent because it is relative to decimal
// point.
@ -2913,101 +2796,6 @@ inline FMT_CONSTEXPR20 void adjust_precision(int& precision, int exp10) {
precision += exp10;
}
// Generates output using the Grisu digit-gen algorithm.
// error: the size of the region (lower, upper) outside of which numbers
// definitely do not round to value (Delta in Grisu3).
FMT_INLINE FMT_CONSTEXPR20 auto grisu_gen_digits(fp value, uint64_t error,
int& exp,
gen_digits_handler& handler)
-> digits::result {
const fp one(1ULL << -value.e, value.e);
// The integral part of scaled value (p1 in Grisu) = value / one. It cannot be
// zero because it contains a product of two 64-bit numbers with MSB set (due
// to normalization) - 1, shifted right by at most 60 bits.
auto integral = static_cast<uint32_t>(value.f >> -one.e);
FMT_ASSERT(integral != 0, "");
FMT_ASSERT(integral == value.f >> -one.e, "");
// The fractional part of scaled value (p2 in Grisu) c = value % one.
uint64_t fractional = value.f & (one.f - 1);
exp = count_digits(integral); // kappa in Grisu.
// Non-fixed formats require at least one digit and no precision adjustment.
if (handler.fixed) {
adjust_precision(handler.precision, exp + handler.exp10);
// Check if precision is satisfied just by leading zeros, e.g.
// format("{:.2f}", 0.001) gives "0.00" without generating any digits.
if (handler.precision <= 0) {
if (handler.precision < 0) return digits::done;
// Divide by 10 to prevent overflow.
uint64_t divisor = data::power_of_10_64[exp - 1] << -one.e;
auto dir = get_round_direction(divisor, value.f / 10, error * 10);
if (dir == round_direction::unknown) return digits::error;
handler.buf[handler.size++] = dir == round_direction::up ? '1' : '0';
return digits::done;
}
}
// Generate digits for the integral part. This can produce up to 10 digits.
do {
uint32_t digit = 0;
auto divmod_integral = [&](uint32_t divisor) {
digit = integral / divisor;
integral %= divisor;
};
// This optimization by Milo Yip reduces the number of integer divisions by
// one per iteration.
switch (exp) {
case 10:
divmod_integral(1000000000);
break;
case 9:
divmod_integral(100000000);
break;
case 8:
divmod_integral(10000000);
break;
case 7:
divmod_integral(1000000);
break;
case 6:
divmod_integral(100000);
break;
case 5:
divmod_integral(10000);
break;
case 4:
divmod_integral(1000);
break;
case 3:
divmod_integral(100);
break;
case 2:
divmod_integral(10);
break;
case 1:
digit = integral;
integral = 0;
break;
default:
FMT_ASSERT(false, "invalid number of digits");
}
--exp;
auto remainder = (static_cast<uint64_t>(integral) << -one.e) + fractional;
auto result = handler.on_digit(static_cast<char>('0' + digit),
data::power_of_10_64[exp] << -one.e,
remainder, error, true);
if (result != digits::more) return result;
} while (exp > 0);
// Generate digits for the fractional part.
for (;;) {
fractional *= 10;
error *= 10;
char digit = static_cast<char>('0' + (fractional >> -one.e));
fractional &= one.f - 1;
--exp;
auto result = handler.on_digit(digit, one.f, fractional, error, false);
if (result != digits::more) return result;
}
}
class bigint {
private:
// A bigint is stored as an array of bigits (big digits), with bigit at index
@ -3108,7 +2896,7 @@ class bigint {
auto size = other.bigits_.size();
bigits_.resize(size);
auto data = other.bigits_.data();
std::copy(data, data + size, make_checked(bigits_.data(), size));
copy_str<bigit>(data, data + size, bigits_.data());
exp_ = other.exp_;
}
@ -3322,6 +3110,7 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp<uint128_t> value,
}
int even = static_cast<int>((value.f & 1) == 0);
if (!upper) upper = &lower;
bool shortest = num_digits < 0;
if ((flags & dragon::fixup) != 0) {
if (add_compare(numerator, *upper, denominator) + even <= 0) {
--exp10;
@ -3334,7 +3123,7 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp<uint128_t> value,
if ((flags & dragon::fixed) != 0) adjust_precision(num_digits, exp10 + 1);
}
// Invariant: value == (numerator / denominator) * pow(10, exp10).
if (num_digits < 0) {
if (shortest) {
// Generate the shortest representation.
num_digits = 0;
char* data = buf.data();
@ -3364,7 +3153,7 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp<uint128_t> value,
}
// Generate the given number of digits.
exp10 -= num_digits - 1;
if (num_digits == 0) {
if (num_digits <= 0) {
denominator *= 10;
auto digit = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0';
buf.push_back(digit);
@ -3389,7 +3178,8 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp<uint128_t> value,
}
if (buf[0] == overflow) {
buf[0] = '1';
++exp10;
if ((flags & dragon::fixed) != 0) buf.push_back('0');
else ++exp10;
}
return;
}
@ -3508,7 +3298,7 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
int exp = 0;
bool use_dragon = true;
unsigned dragon_flags = 0;
if (!is_fast_float<Float>()) {
if (!is_fast_float<Float>() || is_constant_evaluated()) {
const auto inv_log2_10 = 0.3010299956639812; // 1 / log2(10)
using info = dragonbox::float_info<decltype(converted_value)>;
const auto f = basic_fp<typename info::carrier_uint>(converted_value);
@ -3516,10 +3306,11 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
// 10^(exp - 1) <= value < 10^exp or 10^exp <= value < 10^(exp + 1).
// This is based on log10(value) == log2(value) / log2(10) and approximation
// of log2(value) by e + num_fraction_bits idea from double-conversion.
exp = static_cast<int>(
std::ceil((f.e + count_digits<1>(f.f) - 1) * inv_log2_10 - 1e-10));
auto e = (f.e + count_digits<1>(f.f) - 1) * inv_log2_10 - 1e-10;
exp = static_cast<int>(e);
if (e > exp) ++exp; // Compute ceil.
dragon_flags = dragon::fixup;
} else if (!is_constant_evaluated() && precision < 0) {
} else if (precision < 0) {
// Use Dragonbox for the shortest format.
if (specs.binary32) {
auto dec = dragonbox::to_decimal(static_cast<float>(value));
@ -3529,25 +3320,6 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
auto dec = dragonbox::to_decimal(static_cast<double>(value));
write<char>(buffer_appender<char>(buf), dec.significand);
return dec.exponent;
} else if (is_constant_evaluated()) {
// Use Grisu + Dragon4 for the given precision:
// https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf.
const int min_exp = -60; // alpha in Grisu.
int cached_exp10 = 0; // K in Grisu.
fp normalized = normalize(fp(converted_value));
const auto cached_pow = get_cached_power(
min_exp - (normalized.e + fp::num_significand_bits), cached_exp10);
normalized = normalized * cached_pow;
gen_digits_handler handler{buf.data(), 0, precision, -cached_exp10, fixed};
if (grisu_gen_digits(normalized, 1, exp, handler) != digits::error &&
!is_constant_evaluated()) {
exp += handler.exp10;
buf.try_resize(to_unsigned(handler.size));
use_dragon = false;
} else {
exp += handler.size - cached_exp10 - 1;
precision = handler.precision;
}
} else {
// Extract significand bits and exponent bits.
using info = dragonbox::float_info<double>;
@ -3566,7 +3338,7 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
significand <<= 1;
} else {
// Normalize subnormal inputs.
FMT_ASSERT(significand != 0, "zeros should not appear hear");
FMT_ASSERT(significand != 0, "zeros should not appear here");
int shift = countl_zero(significand);
FMT_ASSERT(shift >= num_bits<uint64_t>() - num_significand_bits<double>(),
"");
@ -3603,9 +3375,7 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
}
// Compute the actual number of decimal digits to print.
if (fixed) {
adjust_precision(precision, exp + digits_in_the_first_segment);
}
if (fixed) adjust_precision(precision, exp + digits_in_the_first_segment);
// Use Dragon4 only when there might be not enough digits in the first
// segment.
@ -4091,8 +3861,7 @@ FMT_CONSTEXPR auto get_dynamic_spec(FormatArg arg, ErrorHandler eh) -> int {
}
template <typename Context, typename ID>
FMT_CONSTEXPR auto get_arg(Context& ctx, ID id) ->
typename Context::format_arg {
FMT_CONSTEXPR auto get_arg(Context& ctx, ID id) -> decltype(ctx.arg(id)) {
auto arg = ctx.arg(id);
if (!arg) ctx.on_error("argument not found");
return arg;
@ -4117,15 +3886,6 @@ FMT_CONSTEXPR void handle_dynamic_spec(int& value,
}
#if FMT_USE_USER_DEFINED_LITERALS
template <typename Char> struct udl_formatter {
basic_string_view<Char> str;
template <typename... T>
auto operator()(T&&... args) const -> std::basic_string<Char> {
return vformat(str, fmt::make_format_args<buffer_context<Char>>(args...));
}
};
# if FMT_USE_NONTYPE_TEMPLATE_ARGS
template <typename T, typename Char, size_t N,
fmt::detail_exported::fixed_string<Char, N> Str>
@ -4179,28 +3939,28 @@ FMT_API void format_error_code(buffer<char>& out, int error_code,
FMT_API void report_error(format_func func, int error_code,
const char* message) noexcept;
FMT_END_DETAIL_NAMESPACE
} // namespace detail
FMT_API auto vsystem_error(int error_code, string_view format_str,
format_args args) -> std::system_error;
/**
\rst
Constructs :class:`std::system_error` with a message formatted with
``fmt::format(fmt, args...)``.
\rst
Constructs :class:`std::system_error` with a message formatted with
``fmt::format(fmt, args...)``.
*error_code* is a system error code as given by ``errno``.
**Example**::
**Example**::
// This throws std::system_error with the description
// cannot open file 'madeup': No such file or directory
// or similar (system message may vary).
const char* filename = "madeup";
std::FILE* file = std::fopen(filename, "r");
if (!file)
throw fmt::system_error(errno, "cannot open file '{}'", filename);
\endrst
*/
// This throws std::system_error with the description
// cannot open file 'madeup': No such file or directory
// or similar (system message may vary).
const char* filename = "madeup";
std::FILE* file = std::fopen(filename, "r");
if (!file)
throw fmt::system_error(errno, "cannot open file '{}'", filename);
\endrst
*/
template <typename... T>
auto system_error(int error_code, format_string<T...> fmt, T&&... args)
-> std::system_error {
@ -4292,8 +4052,8 @@ class format_int {
template <typename T, typename Char>
struct formatter<T, Char, enable_if_t<detail::has_format_as<T>::value>>
: private formatter<detail::format_as_t<T>> {
using base = formatter<detail::format_as_t<T>>;
: private formatter<detail::format_as_t<T>, Char> {
using base = formatter<detail::format_as_t<T>, Char>;
using base::parse;
template <typename FormatContext>
@ -4302,22 +4062,24 @@ struct formatter<T, Char, enable_if_t<detail::has_format_as<T>::value>>
}
};
template <typename Char>
struct formatter<void*, Char> : formatter<const void*, Char> {
template <typename FormatContext>
auto format(void* val, FormatContext& ctx) const -> decltype(ctx.out()) {
return formatter<const void*, Char>::format(val, ctx);
}
};
#define FMT_FORMAT_AS(Type, Base) \
template <typename Char> \
struct formatter<Type, Char> : formatter<Base, Char> {}
FMT_FORMAT_AS(signed char, int);
FMT_FORMAT_AS(unsigned char, unsigned);
FMT_FORMAT_AS(short, int);
FMT_FORMAT_AS(unsigned short, unsigned);
FMT_FORMAT_AS(long, detail::long_type);
FMT_FORMAT_AS(unsigned long, detail::ulong_type);
FMT_FORMAT_AS(Char*, const Char*);
FMT_FORMAT_AS(std::basic_string<Char>, basic_string_view<Char>);
FMT_FORMAT_AS(std::nullptr_t, const void*);
FMT_FORMAT_AS(detail::std_string_view<Char>, basic_string_view<Char>);
FMT_FORMAT_AS(void*, const void*);
template <typename Char, size_t N>
struct formatter<Char[N], Char> : formatter<basic_string_view<Char>, Char> {
template <typename FormatContext>
FMT_CONSTEXPR auto format(const Char* val, FormatContext& ctx) const
-> decltype(ctx.out()) {
return formatter<basic_string_view<Char>, Char>::format(val, ctx);
}
};
struct formatter<Char[N], Char> : formatter<basic_string_view<Char>, Char> {};
/**
\rst
@ -4393,7 +4155,9 @@ template <> struct formatter<bytes> {
};
// group_digits_view is not derived from view because it copies the argument.
template <typename T> struct group_digits_view { T value; };
template <typename T> struct group_digits_view {
T value;
};
/**
\rst
@ -4523,7 +4287,8 @@ auto join(Range&& range, string_view sep)
std::string answer = fmt::to_string(42);
\endrst
*/
template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value)>
template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value &&
!detail::has_format_as<T>::value)>
inline auto to_string(const T& value) -> std::string {
auto buffer = memory_buffer();
detail::write<char>(appender(buffer), value);
@ -4548,7 +4313,15 @@ FMT_NODISCARD auto to_string(const basic_memory_buffer<Char, SIZE>& buf)
return std::basic_string<Char>(buf.data(), size);
}
FMT_BEGIN_DETAIL_NAMESPACE
template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value &&
detail::has_format_as<T>::value)>
inline auto to_string(const T& value) -> std::string {
return to_string(format_as(value));
}
FMT_END_EXPORT
namespace detail {
template <typename Char>
void vformat_to(buffer<Char>& buf, basic_string_view<Char> fmt,
@ -4619,6 +4392,8 @@ void vformat_to(buffer<Char>& buf, basic_string_view<Char> fmt,
detail::parse_format_string<false>(fmt, format_handler(out, fmt, args, loc));
}
FMT_BEGIN_EXPORT
#ifndef FMT_HEADER_ONLY
extern template FMT_API void vformat_to(buffer<char>&, string_view,
typename vformat_args<>::type,
@ -4631,7 +4406,7 @@ extern template FMT_API auto decimal_point_impl(locale_ref) -> char;
extern template FMT_API auto decimal_point_impl(locale_ref) -> wchar_t;
#endif // FMT_HEADER_ONLY
FMT_END_DETAIL_NAMESPACE
} // namespace detail
#if FMT_USE_USER_DEFINED_LITERALS
inline namespace literals {

View File

@ -1966,7 +1966,7 @@ class CYCLES_MATERIAL_PT_settings_surface(CyclesButtonsPanel, Panel):
cmat = mat.cycles
col = layout.column()
col.prop(cmat, "displacement_method", text="Displacement")
col.prop(mat, "displacement_method", text="Displacement")
col.prop(cmat, "emission_sampling")
col.prop(mat, "use_transparent_shadow")
col.prop(cmat, "use_bump_map_correction")

View File

@ -66,10 +66,10 @@ template<> struct AttributeConverter<blender::ColorGeometry4b> {
static constexpr auto type_desc = TypeRGBA;
static CyclesT convert(const blender::ColorGeometry4b &value)
{
return color_srgb_to_linear(make_float4(byte_to_float(value[0]),
byte_to_float(value[1]),
byte_to_float(value[2]),
byte_to_float(value[3])));
return color_srgb_to_linear_v4(make_float4(byte_to_float(value[0]),
byte_to_float(value[1]),
byte_to_float(value[2]),
byte_to_float(value[3])));
}
};
template<> struct AttributeConverter<bool> {

View File

@ -56,12 +56,6 @@ static VolumeInterpolation get_volume_interpolation(PointerRNA &ptr)
ptr, "volume_interpolation", VOLUME_NUM_INTERPOLATION, VOLUME_INTERPOLATION_LINEAR);
}
static DisplacementMethod get_displacement_method(PointerRNA &ptr)
{
return (DisplacementMethod)get_enum(
ptr, "displacement_method", DISPLACE_NUM_METHODS, DISPLACE_BUMP);
}
static EmissionSampling get_emission_sampling(PointerRNA &ptr)
{
return (EmissionSampling)get_enum(
@ -76,6 +70,12 @@ static int validate_enum_value(int value, int num_values, int default_value)
return value;
}
static DisplacementMethod get_displacement_method(BL::Material &b_mat)
{
int value = b_mat.displacement_method();
return (DisplacementMethod)validate_enum_value(value, DISPLACE_NUM_METHODS, DISPLACE_BUMP);
}
template<typename NodeType> static InterpolationType get_image_interpolation(NodeType &b_node)
{
int value = b_node.interpolation();
@ -1548,7 +1548,7 @@ void BlenderSync::sync_materials(BL::Depsgraph &b_depsgraph, bool update_all)
shader->set_volume_sampling_method(get_volume_sampling(cmat));
shader->set_volume_interpolation_method(get_volume_interpolation(cmat));
shader->set_volume_step_rate(get_float(cmat, "volume_step_rate"));
shader->set_displacement_method(get_displacement_method(cmat));
shader->set_displacement_method(get_displacement_method(b_mat));
shader->set_graph(graph);

View File

@ -642,7 +642,7 @@ Device *OIDNDenoiser::ensure_denoiser_device(Progress *progress)
{
#ifndef WITH_OPENIMAGEDENOISE
(void)progress;
path_trace_device_->set_error("Build without OpenImageDenoiser");
path_trace_device_->set_error("Failed to denoise, build has no OpenImageDenoise support");
return nullptr;
#else
if (!openimagedenoise_supported()) {

View File

@ -503,14 +503,12 @@ if(WITH_CYCLES_CUDA_BINARIES)
add_custom_command(
OUTPUT ${cuda_file}
COMMAND ${CCACHE_PROGRAM} ${cuda_nvcc_executable} ${_cuda_nvcc_args}
DEPENDS ${kernel_sources}
USES_TERMINAL)
DEPENDS ${kernel_sources})
else()
add_custom_command(
OUTPUT ${cuda_file}
COMMAND ${cuda_nvcc_executable} ${_cuda_nvcc_args}
DEPENDS ${kernel_sources}
USES_TERMINAL)
DEPENDS ${kernel_sources})
endif()
unset(_cuda_nvcc_args)
@ -647,8 +645,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
add_custom_command(
OUTPUT ${hip_file}
COMMAND ${hip_command} ${hip_flags}
DEPENDS ${kernel_sources}
USES_TERMINAL)
DEPENDS ${kernel_sources})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND hip_fatbins ${hip_file})
endmacro()
@ -716,8 +713,7 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
add_custom_command(
OUTPUT ${bitcode_file}
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags}
DEPENDS ${kernel_sources}
USES_TERMINAL)
DEPENDS ${kernel_sources})
if(WIN32)
set(hiprt_link_command ${CMAKE_COMMAND})
set(hiprt_link_flags -E env "HIP_PATH=${HIP_ROOT_DIR}"
@ -737,8 +733,7 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
add_custom_command(
OUTPUT ${hiprt_file}
COMMAND ${hiprt_link_command} ${hiprt_link_flags}
DEPENDS ${bitcode_file}
USES_TERMINAL)
DEPENDS ${bitcode_file})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file}" ${CYCLES_INSTALL_PATH}/lib)
add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file})
cycles_set_solution_folder(cycles_kernel_hiprt)
@ -789,8 +784,7 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
${cuda_flags}
${input}
WORKING_DIRECTORY
"${CMAKE_CURRENT_SOURCE_DIR}"
USES_TERMINAL)
"${CMAKE_CURRENT_SOURCE_DIR}")
list(APPEND optix_ptx ${output})
@ -1083,8 +1077,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
"$<$<CONFIG:Debug>:${sycl_compiler_flags_Debug}>"
"$<$<CONFIG:MinSizeRel>:${sycl_compiler_flags_Release}>"
COMMAND_EXPAND_LISTS
DEPENDS ${cycles_oneapi_kernel_sources}
USES_TERMINAL)
DEPENDS ${cycles_oneapi_kernel_sources})
else()
if(NOT IGC_INSTALL_DIR)
get_filename_component(IGC_INSTALL_DIR "${sycl_compiler_root}/../lib/igc" ABSOLUTE)
@ -1109,8 +1102,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
"$<$<CONFIG:Debug>:${sycl_compiler_flags_Debug_str}>"
"$<$<CONFIG:MinSizeRel>:${sycl_compiler_flags_Release_str}>"
COMMAND_EXPAND_LISTS
DEPENDS ${cycles_oneapi_kernel_sources}
USES_TERMINAL)
DEPENDS ${cycles_oneapi_kernel_sources})
endif()
if(NOT WITH_BLENDER)

View File

@ -117,7 +117,7 @@ ccl_device float3 ensure_valid_specular_reflection(float3 Ng, float3 I, float3 N
const float3 R = 2 * dot(N, I) * N - I;
const float Iz = dot(I, Ng);
kernel_assert(Iz > 0);
kernel_assert(Iz >= 0);
/* Reflection rays may always be at least as shallow as the incoming ray. */
const float threshold = min(0.9f * Iz, 0.01f);

View File

@ -253,7 +253,7 @@ ccl_device float3 xyY_to_xyz(float x, float y, float Y)
* exp = exponent, encoded as uint32_t
* e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t
*/
template<unsigned exp, unsigned e2coeff> ccl_device_inline float4 fastpow(const float4 &arg)
template<unsigned exp, unsigned e2coeff> ccl_device_inline float4 fastpow_sse2(const float4 &arg)
{
float4 ret = arg * cast(make_int4(e2coeff));
ret = make_float4(cast(ret));
@ -263,7 +263,7 @@ template<unsigned exp, unsigned e2coeff> ccl_device_inline float4 fastpow(const
}
/* Improve x ^ 1.0f/5.0f solution with Newton-Raphson method */
ccl_device_inline float4 improve_5throot_solution(const float4 &old_result, const float4 &x)
ccl_device_inline float4 improve_5throot_solution_sse2(const float4 &old_result, const float4 &x)
{
float4 approx2 = old_result * old_result;
float4 approx4 = approx2 * approx2;
@ -273,7 +273,7 @@ ccl_device_inline float4 improve_5throot_solution(const float4 &old_result, cons
}
/* Calculate powf(x, 2.4). Working domain: 1e-10 < x < 1e+10 */
ccl_device_inline float4 fastpow24(const float4 &arg)
ccl_device_inline float4 fastpow24_sse2(const float4 &arg)
{
/* max, avg and |avg| errors were calculated in gcc without FMA instructions
* The final precision should be better than powf in glibc */
@ -281,27 +281,27 @@ ccl_device_inline float4 fastpow24(const float4 &arg)
/* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize avg error */
/* 0x3F4CCCCD = 4/5 */
/* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */
float4 x = fastpow<0x3F4CCCCD, 0x4F55A7FB>(
float4 x = fastpow_sse2<0x3F4CCCCD, 0x4F55A7FB>(
arg); // error max = 0.17 avg = 0.0018 |avg| = 0.05
float4 arg2 = arg * arg;
float4 arg4 = arg2 * arg2;
/* error max = 0.018 avg = 0.0031 |avg| = 0.0031 */
x = improve_5throot_solution(x, arg4);
x = improve_5throot_solution_sse2(x, arg4);
/* error max = 0.00021 avg = 1.6e-05 |avg| = 1.6e-05 */
x = improve_5throot_solution(x, arg4);
x = improve_5throot_solution_sse2(x, arg4);
/* error max = 6.1e-07 avg = 5.2e-08 |avg| = 1.1e-07 */
x = improve_5throot_solution(x, arg4);
x = improve_5throot_solution_sse2(x, arg4);
return x * (x * x);
}
ccl_device float4 color_srgb_to_linear(const float4 &c)
ccl_device float4 color_srgb_to_linear_sse2(const float4 &c)
{
int4 cmp = c < make_float4(0.04045f);
float4 lt = max(c * make_float4(1.0f / 12.92f), make_float4(0.0f));
float4 gtebase = (c + make_float4(0.055f)) * make_float4(1.0f / 1.055f); /* fma */
float4 gte = fastpow24(gtebase);
float4 gte = fastpow24_sse2(gtebase);
return select(cmp, lt, gte);
}
#endif /* __KERNEL_SSE2__ */
@ -328,7 +328,7 @@ ccl_device float4 color_srgb_to_linear_v4(float4 c)
{
#ifdef __KERNEL_SSE2__
float4 r = c;
r = color_srgb_to_linear(r);
r = color_srgb_to_linear_sse2(r);
r.w = c.w;
return r;
#else

View File

@ -13,8 +13,8 @@ CCL_NAMESPACE_BEGIN
thread::thread(function<void()> run_cb) : run_cb_(run_cb), joined_(false)
{
#ifdef __APPLE__
/* Set the stack size to 2MB to match Linux. The default 512KB on macOS is
#if defined(__APPLE__) || defined(__linux__) && !defined(__GLIBC__)
/* Set the stack size to 2MB to match glibc. The default 512KB on macOS is
* too small for Embree, and consistent stack size also makes things more
* predictable in general. */
pthread_attr_t attribute;
@ -43,7 +43,7 @@ void *thread::run(void *arg)
bool thread::join()
{
joined_ = true;
#ifdef __APPLE__
#if defined(__APPLE__) || defined(__linux__) && !defined(__GLIBC__)
return pthread_join(pthread_id, NULL) == 0;
#else
try {

View File

@ -43,7 +43,7 @@ class thread {
protected:
function<void()> run_cb_;
#ifdef __APPLE__
#if defined(__APPLE__) || defined(__linux__) && !defined(__GLIBC__)
pthread_t pthread_id;
#else
std::thread std_thread;

View File

@ -165,6 +165,7 @@ const UserDef U_default = {
.glalphaclip = 0.004,
.autokey_mode = (AUTOKEY_MODE_NORMAL & ~AUTOKEY_ON),
.autokey_flag = AUTOKEY_FLAG_XYZ2RGB,
.key_insert_channels = USER_ANIM_KEY_CHANNEL_LOCATION,
.animation_flag = USER_ANIM_HIGH_QUALITY_DRAWING,
.text_render = 0,
.navigation_mode = VIEW_NAVIGATION_WALK,

View File

@ -4684,7 +4684,7 @@ def km_object_mode(params):
("object.join", {"type": 'J', "value": 'PRESS', "ctrl": True}, None),
("wm.context_toggle", {"type": 'PERIOD', "value": 'PRESS', "ctrl": True},
{"properties": [("data_path", 'tool_settings.use_transform_data_origin')]}),
("anim.keyframe_insert_menu", {"type": 'I', "value": 'PRESS'}, None),
("anim.keyframe_insert", {"type": 'I', "value": 'PRESS'}, None),
("anim.keyframe_delete_v3d", {"type": 'I', "value": 'PRESS', "alt": True}, None),
("anim.keying_set_active_set", {"type": 'I', "value": 'PRESS', "shift": True, "ctrl": True, "alt": True}, None),
("collection.create", {"type": 'G', "value": 'PRESS', "ctrl": True}, None),
@ -4823,7 +4823,7 @@ def km_pose(params):
op_menu("VIEW3D_MT_bone_collections", {"type": 'M', "value": 'PRESS', "shift": True}),
("armature.move_to_collection", {"type": 'M', "value": 'PRESS'}, None),
("transform.bbone_resize", {"type": 'S', "value": 'PRESS', "shift": True, "ctrl": True, "alt": True}, None),
("anim.keyframe_insert_menu", {"type": 'I', "value": 'PRESS'}, None),
("anim.keyframe_insert", {"type": 'I', "value": 'PRESS'}, None),
("anim.keyframe_delete_v3d", {"type": 'I', "value": 'PRESS', "alt": True}, None),
("anim.keying_set_active_set", {"type": 'I', "value": 'PRESS', "shift": True, "ctrl": True, "alt": True}, None),
("pose.push", {"type": 'E', "value": 'PRESS', "ctrl": True}, None),

View File

@ -3225,7 +3225,7 @@ def km_pose(params):
("pose.select_hierarchy", {"type": 'DOWN_ARROW', "value": 'PRESS', "shift": True, "repeat": True},
{"properties": [("direction", 'CHILD'), ("extend", True)]}),
("pose.select_linked", {"type": 'L', "value": 'PRESS', "ctrl": True}, None),
("anim.keyframe_insert_menu", {"type": 'S', "value": 'PRESS', "shift": True}, None),
("anim.keyframe_insert", {"type": 'S', "value": 'PRESS', "shift": True}, None),
("anim.keyframe_insert_by_name", {"type": 'S', "value": 'PRESS'},
{"properties": [("type", 'LocRotScale')]}),
("anim.keyframe_insert_by_name", {"type": 'W', "value": 'PRESS', "shift": True},
@ -3297,7 +3297,7 @@ def km_object_mode(params):
{"properties": [("use_global", True), ("confirm", False)]}),
("object.duplicate_move", {"type": 'D', "value": 'PRESS', "ctrl": True}, None),
# Keyframing
("anim.keyframe_insert_menu", {"type": 'S', "value": 'PRESS', "shift": True}, None),
("anim.keyframe_insert", {"type": 'S', "value": 'PRESS', "shift": True}, None),
("anim.keyframe_insert_by_name", {"type": 'S', "value": 'PRESS'},
{"properties": [("type", 'LocRotScale')]}),
("anim.keyframe_insert_by_name", {"type": 'W', "value": 'PRESS', "shift": True},

View File

@ -4,6 +4,7 @@
import bpy
from bpy.types import Operator
from bpy.props import IntProperty
from bpy.app.translations import pgettext_data as data_
@ -459,6 +460,55 @@ class RepeatZoneItemMoveOperator(RepeatZoneOperator, ZoneMoveItemOperator, Opera
bl_options = {'REGISTER', 'UNDO'}
def _editable_tree_with_active_node_type(context, node_type):
space = context.space_data
# Needs active node editor and a tree.
if not space or space.type != 'NODE_EDITOR' or not space.edit_tree or space.edit_tree.library:
return False
node = context.active_node
if node is None or node.bl_idname != node_type:
return False
return True
class IndexSwitchItemAddOperator(Operator):
"""Add an item to the index switch"""
bl_idname = "node.index_switch_item_add"
bl_label = "Add Item"
bl_options = {'REGISTER', 'UNDO'}
@classmethod
def poll(cls, context):
return _editable_tree_with_active_node_type(context, 'GeometryNodeIndexSwitch')
def execute(self, context):
node = context.active_node
node.index_switch_items.new()
return {'FINISHED'}
class IndexSwitchItemRemoveOperator(Operator):
"""Remove an item from the index switch"""
bl_idname = "node.index_switch_item_remove"
bl_label = "Remove Item"
bl_options = {'REGISTER', 'UNDO'}
index: IntProperty(
name="Index",
description="Index of item to remove",
)
@classmethod
def poll(cls, context):
return _editable_tree_with_active_node_type(context, 'GeometryNodeIndexSwitch')
def execute(self, context):
node = context.active_node
items = node.index_switch_items
items.remove(items[self.index])
return {'FINISHED'}
classes = (
NewGeometryNodesModifier,
NewGeometryNodeTreeAssign,
@ -470,4 +520,6 @@ classes = (
RepeatZoneItemAddOperator,
RepeatZoneItemRemoveOperator,
RepeatZoneItemMoveOperator,
IndexSwitchItemAddOperator,
IndexSwitchItemRemoveOperator,
)

View File

@ -546,6 +546,7 @@ class NODE_MT_category_GEO_UTILITIES(Menu):
node_add_menu.add_node_type(layout, "FunctionNodeRandomValue")
node_add_menu.add_repeat_zone(layout, label="Repeat Zone")
node_add_menu.add_node_type(layout, "GeometryNodeSwitch")
node_add_menu.add_node_type(layout, "GeometryNodeIndexSwitch")
node_add_menu.draw_assets_for_catalog(layout, self.bl_label)

View File

@ -238,7 +238,7 @@ class DATA_PT_lightprobe_parallax(DataButtonsPanel, Panel):
class DATA_PT_lightprobe_display(DataButtonsPanel, Panel):
bl_label = "Viewport Display"
bl_options = {'DEFAULT_CLOSED'}
COMPAT_ENGINES = {'BLENDER_EEVEE', 'BLENDER_RENDER'}
COMPAT_ENGINES = {'BLENDER_EEVEE', 'BLENDER_EEVEE_NEXT', 'BLENDER_RENDER'}
def draw(self, context):
layout = self.layout

View File

@ -426,9 +426,7 @@ class DATA_PT_remesh(MeshButtonsPanel, Panel):
col = layout.column(heading="Preserve")
col.prop(mesh, "use_remesh_preserve_volume", text="Volume")
col.prop(mesh, "use_remesh_preserve_paint_mask", text="Paint Mask")
col.prop(mesh, "use_remesh_preserve_sculpt_face_sets", text="Face Sets")
col.prop(mesh, "use_remesh_preserve_vertex_colors", text="Color Attributes")
col.prop(mesh, "use_remesh_preserve_attributes", text="Attributes")
col.operator("object.voxel_remesh", text="Voxel Remesh")
else:

View File

@ -299,6 +299,9 @@ class EEVEE_NEXT_MATERIAL_PT_settings_surface(MaterialButtonsPanel, Panel):
col.prop(mat, "use_backface_culling", text="Camera")
col.prop(mat, "use_backface_culling_shadow", text="Shadow")
layout.prop(mat, "displacement_method", text="Displacement")
if mat.displacement_method == 'DISPLACEMENT':
layout.label(text="Unsupported displacement method", icon='ERROR')
layout.prop(mat, "max_vertex_displacement", text="Max Displacement")
layout.prop(mat, "use_transparent_shadow")

View File

@ -399,8 +399,8 @@ class OBJECT_PT_visibility(ObjectButtonsPanel, Panel):
layout.separator()
col = layout.column(heading="Light Probes")
col.prop(ob, "hide_probe_volume", text="Volume", toggle=False, invert_checkbox=True)
col.prop(ob, "hide_probe_cubemap", text="Cubemap", toggle=False, invert_checkbox=True)
col.prop(ob, "hide_probe_planar", text="Planar", toggle=False, invert_checkbox=True)
col.prop(ob, "hide_probe_sphere", text="Sphere", toggle=False, invert_checkbox=True)
col.prop(ob, "hide_probe_plane", text="Plane", toggle=False, invert_checkbox=True)
if ob.type == 'GPENCIL':
col = layout.column(heading="Grease Pencil")

View File

@ -615,6 +615,7 @@ class EeveeRaytracingScreenOption(RenderButtonsPanel, Panel):
layout.prop(props, "screen_trace_quality", text="Precision")
layout.prop(props, "screen_trace_thickness", text="Thickness")
layout.prop(props, "screen_trace_max_roughness", text="Max Roughness")
class EeveeRaytracingDenoisePanel(RenderButtonsPanel, Panel):

View File

@ -1145,6 +1145,34 @@ class NODE_PT_repeat_zone_items(Panel):
layout.prop(output_node, "inspection_index")
class NODE_PT_index_switch_node_items(Panel):
bl_space_type = 'NODE_EDITOR'
bl_region_type = 'UI'
bl_category = "Node"
bl_label = "Index Switch"
@classmethod
def poll(cls, context):
snode = context.space_data
if snode is None:
return False
node = context.active_node
print()
if node is None or node.bl_idname != 'GeometryNodeIndexSwitch':
return False
return True
def draw(self, context):
layout = self.layout
node = context.active_node
layout.operator("node.index_switch_item_add", icon='ADD', text="Add Item")
col = layout.column()
for i, item in enumerate(node.index_switch_items):
row = col.row()
row.label(text=node.inputs[i + 1].name)
row.operator("node.index_switch_item_remove", icon='REMOVE', text="").index = i
# Grease Pencil properties
class NODE_PT_annotation(AnnotationDataPanel, Panel):
bl_space_type = 'NODE_EDITOR'
@ -1213,6 +1241,7 @@ classes = (
NODE_UL_simulation_zone_items,
NODE_PT_simulation_zone_items,
NODE_UL_repeat_zone_items,
NODE_PT_index_switch_node_items,
NODE_PT_repeat_zone_items,
NODE_PT_active_node_properties,

View File

@ -576,6 +576,8 @@ class USERPREF_PT_animation_keyframes(AnimationPanel, CenterAlignMixIn, Panel):
prefs = context.preferences
edit = prefs.edit
layout.prop(edit, "key_insert_channels", expand=True)
col = layout.column()
col.prop(edit, "use_visual_keying")
col.prop(edit, "use_keyframe_insert_needed", text="Only Insert Needed")

View File

@ -2755,7 +2755,8 @@ class VIEW3D_MT_object_animation(Menu):
def draw(self, _context):
layout = self.layout
layout.operator("anim.keyframe_insert_menu", text="Insert Keyframe...")
layout.operator("anim.keyframe_insert", text="Insert Keyframe")
layout.operator("anim.keyframe_insert_menu", text="Insert Keyframe with Keying Set")
layout.operator("anim.keyframe_delete_v3d", text="Delete Keyframes...")
layout.operator("anim.keyframe_clear_v3d", text="Clear Keyframes...")
layout.operator("anim.keying_set_active_set", text="Change Keying Set...")
@ -3019,7 +3020,8 @@ class VIEW3D_MT_object_context_menu(Menu):
layout.separator()
layout.operator("anim.keyframe_insert_menu", text="Insert Keyframe...")
layout.operator("anim.keyframe_insert", text="Insert Keyframe")
layout.operator("anim.keyframe_insert_menu", text="Insert Keyframe with Keying Set")
layout.separator()
@ -4177,7 +4179,8 @@ class VIEW3D_MT_pose_context_menu(Menu):
layout.operator_context = 'INVOKE_REGION_WIN'
layout.operator("anim.keyframe_insert_menu", text="Insert Keyframe...")
layout.operator("anim.keyframe_insert", text="Insert Keyframe")
layout.operator("anim.keyframe_insert_menu", text="Insert Keyframe with Keying Set")
layout.separator()
@ -6729,7 +6732,7 @@ class VIEW3D_PT_shading_render_pass(Panel):
bl_region_type = 'HEADER'
bl_label = "Render Pass"
bl_parent_id = "VIEW3D_PT_shading"
COMPAT_ENGINES = {'BLENDER_EEVEE'}
COMPAT_ENGINES = {'BLENDER_EEVEE', 'BLENDER_EEVEE_NEXT'}
@classmethod
def poll(cls, context):

View File

@ -1045,9 +1045,7 @@ class VIEW3D_PT_sculpt_voxel_remesh(Panel, View3DPaintPanel):
col = layout.column(heading="Preserve", align=True)
col.prop(mesh, "use_remesh_preserve_volume", text="Volume")
col.prop(mesh, "use_remesh_preserve_paint_mask", text="Paint Mask")
col.prop(mesh, "use_remesh_preserve_sculpt_face_sets", text="Face Sets")
col.prop(mesh, "use_remesh_preserve_vertex_colors", text="Color Attributes")
col.prop(mesh, "use_remesh_preserve_attributes", text="Attributes")
layout.operator("object.voxel_remesh", text="Remesh")

View File

@ -10,6 +10,9 @@
#pragma once
#include <string>
#include "BLI_vector.hh"
#include "DNA_anim_types.h"
#include "ED_transform.hh"
#include "RNA_types.hh"
@ -129,7 +132,7 @@ bool is_autokey_on(const Scene *scene);
bool is_autokey_mode(const Scene *scene, eAutokey_Mode mode);
/** Check if a flag is set for auto-key-framing (per scene takes precedence). */
bool is_autokey_flag(const Scene *scene, eAutokey_Flag flag);
bool is_autokey_flag(const Scene *scene, eKeyInsert_Flag flag);
/**
* Auto-keyframing feature - checks for whether anything should be done for the current frame.
@ -175,4 +178,20 @@ bool autokeyframe_property(bContext *C,
/** \} */
/**
* Insert keys for the given rna_path in the given action. The length of the values Span is
* expected to be the size of the property array.
* \param frame is expected to be in the local time of the action, meaning it has to be NLA mapped
* already.
* \returns The number of keys inserted.
*/
int insert_key_action(Main *bmain,
bAction *action,
PointerRNA *ptr,
const std::string &rna_path,
float frame,
const Span<float> values,
eInsertKeyFlags insert_key_flag,
eBezTriple_KeyframeType key_type);
} // namespace blender::animrig

View File

@ -8,6 +8,7 @@
#include <cfloat>
#include <cmath>
#include <string>
#include "ANIM_action.hh"
#include "ANIM_animdata.hh"
@ -38,6 +39,7 @@
#include "RNA_access.hh"
#include "RNA_define.hh"
#include "RNA_path.hh"
#include "RNA_prototypes.h"
#include "RNA_types.hh"
#include "WM_api.hh"
@ -965,4 +967,40 @@ int clear_keyframe(Main *bmain,
return key_count;
}
int insert_key_action(Main *bmain,
bAction *action,
PointerRNA *ptr,
const std::string &rna_path,
const float frame,
const Span<float> values,
eInsertKeyFlags insert_key_flag,
eBezTriple_KeyframeType key_type)
{
BLI_assert(bmain != nullptr);
BLI_assert(action != nullptr);
std::string group;
if (ptr->type == &RNA_PoseBone) {
bPoseChannel *pose_channel = static_cast<bPoseChannel *>(ptr->data);
group = pose_channel->name;
}
else {
group = "Object Transforms";
}
int property_array_index = 0;
int inserted_keys = 0;
for (float value : values) {
FCurve *fcurve = action_fcurve_ensure(
bmain, action, group.c_str(), ptr, rna_path.c_str(), property_array_index);
const bool inserted_key = insert_keyframe_value(
fcurve, frame, value, key_type, insert_key_flag);
if (inserted_key) {
inserted_keys++;
}
property_array_index++;
}
return inserted_keys;
}
} // namespace blender::animrig

View File

@ -48,7 +48,7 @@ bool is_autokey_mode(const Scene *scene, const eAutokey_Mode mode)
return U.autokey_mode == mode;
}
bool is_autokey_flag(const Scene *scene, const eAutokey_Flag flag)
bool is_autokey_flag(const Scene *scene, const eKeyInsert_Flag flag)
{
if (scene) {
return (scene->toolsettings->autokey_flag & flag) || (U.autokey_flag & flag);
@ -114,7 +114,7 @@ void autokeyframe_object(
C, &sources, active_ks, MODIFYKEY_MODE_INSERT, anim_eval_context.eval_time);
}
else if (is_autokey_flag(scene, AUTOKEY_FLAG_INSERTAVAIL)) {
else if (is_autokey_flag(scene, AUTOKEY_FLAG_INSERTAVAILABLE)) {
/* Only key on available channels. */
AnimData *adt = ob->adt;
ToolSettings *ts = scene->toolsettings;
@ -280,7 +280,7 @@ void autokeyframe_pose(bContext *C, Scene *scene, Object *ob, int tmode, short t
C, &sources, active_ks, MODIFYKEY_MODE_INSERT, anim_eval_context.eval_time);
}
/* only insert into available channels? */
else if (blender::animrig::is_autokey_flag(scene, AUTOKEY_FLAG_INSERTAVAIL)) {
else if (blender::animrig::is_autokey_flag(scene, AUTOKEY_FLAG_INSERTAVAILABLE)) {
if (act) {
LISTBASE_FOREACH (FCurve *, fcu, &act->curves) {
/* only insert keyframes for this F-Curve if it affects the current bone */

View File

@ -891,6 +891,14 @@ void gather_attributes_group_to_group(AttributeAccessor src_attributes,
const IndexMask &selection,
MutableAttributeAccessor dst_attributes);
void gather_attributes_to_groups(AttributeAccessor src_attributes,
eAttrDomain domain,
const AnonymousAttributePropagationInfo &propagation_info,
const Set<std::string> &skip,
OffsetIndices<int> dst_offsets,
const IndexMask &src_selection,
MutableAttributeAccessor dst_attributes);
void copy_attributes(const AttributeAccessor src_attributes,
const eAttrDomain domain,
const AnonymousAttributePropagationInfo &propagation_info,

View File

@ -14,6 +14,7 @@
#include "BLI_math_quaternion.hh"
#include "BLI_math_vector.h"
#include "BLI_math_vector.hh"
#include "BLI_offset_indices.hh"
#include "BKE_customdata.hh"
@ -634,6 +635,15 @@ template<typename T> using DefaultMixer = typename DefaultMixerStruct<T>::type;
void gather(GSpan src, Span<int> map, GMutableSpan dst);
void gather(const GVArray &src, Span<int> map, GMutableSpan dst);
void gather_group_to_group(OffsetIndices<int> src_offsets,
OffsetIndices<int> dst_offsets,
const IndexMask &selection,
GSpan src,
GMutableSpan dst);
void gather_to_groups(OffsetIndices<int> dst_offsets,
const IndexMask &src_selection,
GSpan src,
GMutableSpan dst);
/** \} */

View File

@ -13,6 +13,7 @@
#include "BLI_compute_context.hh"
struct bNode;
struct bNodeTree;
namespace blender::bke {
@ -39,27 +40,41 @@ class ModifierComputeContext : public ComputeContext {
void print_current_in_line(std::ostream &stream) const override;
};
class NodeGroupComputeContext : public ComputeContext {
class GroupNodeComputeContext : public ComputeContext {
private:
static constexpr const char *s_static_type = "NODE_GROUP";
int32_t node_id_;
#ifdef DEBUG
std::string debug_node_name_;
#endif
/**
* The caller node tree and group node are not always necessary or even available, but storing
* them here simplifies "walking up" the compute context to the parent node groups.
*/
const bNodeTree *caller_tree_ = nullptr;
const bNode *caller_group_node_ = nullptr;
public:
NodeGroupComputeContext(const ComputeContext *parent,
GroupNodeComputeContext(const ComputeContext *parent,
int32_t node_id,
const std::optional<ComputeContextHash> &cached_hash = {});
NodeGroupComputeContext(const ComputeContext *parent, const bNode &node);
GroupNodeComputeContext(const ComputeContext *parent,
const bNode &node,
const bNodeTree &caller_tree);
int32_t node_id() const
{
return node_id_;
}
const bNode *caller_group_node() const
{
return caller_group_node_;
}
const bNodeTree *caller_tree() const
{
return caller_tree_;
}
private:
void print_current_in_line(std::ostream &stream) const override;
};

View File

@ -21,7 +21,6 @@ Mesh *BKE_mesh_remesh_quadriflow(const Mesh *mesh,
void (*update_cb)(void *, float progress, int *cancel),
void *update_cb_data);
/* Data reprojection functions */
void BKE_mesh_remesh_reproject_paint_mask(Mesh *target, const Mesh *source);
void BKE_remesh_reproject_vertex_paint(Mesh *target, const Mesh *source);
void BKE_remesh_reproject_sculpt_face_sets(Mesh *target, const Mesh *source);
namespace blender::bke {
void mesh_remesh_reproject_attributes(const Mesh &src, Mesh &dst);
}

View File

@ -1316,6 +1316,7 @@ void BKE_nodetree_remove_layer_n(struct bNodeTree *ntree, struct Scene *scene, i
#define GEO_NODE_INPUT_EDGE_SMOOTH 2115
#define GEO_NODE_SPLIT_TO_INSTANCES 2116
#define GEO_NODE_INPUT_NAMED_LAYER_SELECTION 2117
#define GEO_NODE_INDEX_SWITCH 2118
/** \} */

View File

@ -905,7 +905,7 @@ static bool indices_are_range(const Span<int> indices, const IndexRange range)
}
return true;
},
[](const bool a, const bool b) { return a && b; });
std::logical_and());
}
void gather_attributes(const AttributeAccessor src_attributes,
@ -942,30 +942,6 @@ void gather_attributes(const AttributeAccessor src_attributes,
}
}
template<typename T>
static void gather_group_to_group(const OffsetIndices<int> src_offsets,
const OffsetIndices<int> dst_offsets,
const IndexMask &selection,
const Span<T> src,
MutableSpan<T> dst)
{
selection.foreach_index(GrainSize(512), [&](const int64_t src_i, const int64_t dst_i) {
dst.slice(dst_offsets[dst_i]).copy_from(src.slice(src_offsets[src_i]));
});
}
static void gather_group_to_group(const OffsetIndices<int> src_offsets,
const OffsetIndices<int> dst_offsets,
const IndexMask &selection,
const GSpan src,
GMutableSpan dst)
{
attribute_math::convert_to_static_type(src.type(), [&](auto dummy) {
using T = decltype(dummy);
gather_group_to_group(src_offsets, dst_offsets, selection, src.typed<T>(), dst.typed<T>());
});
}
void gather_attributes_group_to_group(const AttributeAccessor src_attributes,
const eAttrDomain domain,
const AnonymousAttributePropagationInfo &propagation_info,
@ -991,7 +967,37 @@ void gather_attributes_group_to_group(const AttributeAccessor src_attributes,
if (!dst) {
return true;
}
gather_group_to_group(src_offsets, dst_offsets, selection, src, dst.span);
attribute_math::gather_group_to_group(src_offsets, dst_offsets, selection, src, dst.span);
dst.finish();
return true;
});
}
void gather_attributes_to_groups(const AttributeAccessor src_attributes,
const eAttrDomain domain,
const AnonymousAttributePropagationInfo &propagation_info,
const Set<std::string> &skip,
const OffsetIndices<int> dst_offsets,
const IndexMask &src_selection,
MutableAttributeAccessor dst_attributes)
{
src_attributes.for_all([&](const AttributeIDRef &id, const AttributeMetaData meta_data) {
if (meta_data.domain != domain) {
return true;
}
if (id.is_anonymous() && !propagation_info.propagate(id.anonymous_id())) {
return true;
}
if (skip.contains(id.name())) {
return true;
}
const GVArraySpan src = *src_attributes.lookup(id, domain);
bke::GSpanAttributeWriter dst = dst_attributes.lookup_or_add_for_write_only_span(
id, domain, meta_data.data_type);
if (!dst) {
return true;
}
attribute_math::gather_to_groups(dst_offsets, src_selection, src, dst.span);
dst.finish();
return true;
});

View File

@ -176,4 +176,28 @@ void gather(const GVArray &src, const Span<int> map, GMutableSpan dst)
});
}
void gather_group_to_group(const OffsetIndices<int> src_offsets,
const OffsetIndices<int> dst_offsets,
const IndexMask &selection,
const GSpan src,
GMutableSpan dst)
{
attribute_math::convert_to_static_type(src.type(), [&](auto dummy) {
using T = decltype(dummy);
array_utils::gather_group_to_group(
src_offsets, dst_offsets, selection, src.typed<T>(), dst.typed<T>());
});
}
void gather_to_groups(const OffsetIndices<int> dst_offsets,
const IndexMask &src_selection,
const GSpan src,
GMutableSpan dst)
{
bke::attribute_math::convert_to_static_type(src.type(), [&](auto dummy) {
using T = decltype(dummy);
array_utils::gather_to_groups(dst_offsets, src_selection, src.typed<T>(), dst.typed<T>());
});
}
} // namespace blender::bke::attribute_math

View File

@ -23,7 +23,7 @@ void ModifierComputeContext::print_current_in_line(std::ostream &stream) const
stream << "Modifier: " << modifier_name_;
}
NodeGroupComputeContext::NodeGroupComputeContext(
GroupNodeComputeContext::GroupNodeComputeContext(
const ComputeContext *parent,
const int32_t node_id,
const std::optional<ComputeContextHash> &cached_hash)
@ -45,23 +45,21 @@ NodeGroupComputeContext::NodeGroupComputeContext(
}
}
NodeGroupComputeContext::NodeGroupComputeContext(const ComputeContext *parent, const bNode &node)
: NodeGroupComputeContext(parent, node.identifier)
GroupNodeComputeContext::GroupNodeComputeContext(const ComputeContext *parent,
const bNode &node,
const bNodeTree &caller_tree)
: GroupNodeComputeContext(parent, node.identifier)
{
#ifdef DEBUG
debug_node_name_ = node.name;
#endif
caller_group_node_ = &node;
caller_tree_ = &caller_tree;
}
void NodeGroupComputeContext::print_current_in_line(std::ostream &stream) const
void GroupNodeComputeContext::print_current_in_line(std::ostream &stream) const
{
#ifdef DEBUG
if (!debug_node_name_.empty()) {
stream << "Node: " << debug_node_name_;
if (caller_group_node_ != nullptr) {
stream << "Node: " << caller_group_node_->name;
return;
}
#endif
stream << "Node ID: " << node_id_;
}
SimulationZoneComputeContext::SimulationZoneComputeContext(const ComputeContext *parent,

View File

@ -204,29 +204,29 @@ void BKE_crazyspace_set_quats_mesh(Mesh *me,
for (const int corner : face) {
const int vert = corner_verts[corner];
if (!vert_tag[vert]) {
const int corner_prev = mesh::face_corner_prev(face, corner);
const int corner_next = mesh::face_corner_next(face, corner);
const int vert_prev = corner_verts[mesh::face_corner_prev(face, corner)];
const int vert_next = corner_verts[mesh::face_corner_next(face, corner)];
const float *co_prev, *co_curr, *co_next; /* orig */
const float *vd_prev, *vd_curr, *vd_next; /* deform */
/* retrieve mapped coordinates */
vd_prev = mappedcos[corner_prev];
vd_curr = mappedcos[corner];
vd_next = mappedcos[corner_next];
vd_prev = mappedcos[vert_prev];
vd_curr = mappedcos[vert];
vd_next = mappedcos[vert_next];
if (!origcos.is_empty()) {
co_prev = origcos[corner_prev];
co_curr = origcos[corner];
co_next = origcos[corner_next];
co_prev = origcos[vert_prev];
co_curr = origcos[vert];
co_next = origcos[vert_next];
}
else {
co_prev = positions[corner_prev];
co_curr = positions[corner];
co_next = positions[corner_next];
co_prev = positions[vert_prev];
co_curr = positions[vert];
co_next = positions[vert_next];
}
set_crazy_vertex_quat(quats[corner], co_curr, co_next, co_prev, vd_curr, vd_next, vd_prev);
set_crazy_vertex_quat(quats[vert], co_curr, co_next, co_prev, vd_curr, vd_next, vd_prev);
vert_tag[vert].set();
}

View File

@ -347,6 +347,25 @@ static Array<int> reverse_indices_in_groups(const Span<int> group_indices,
return results;
}
/* A version of #reverse_indices_in_groups that stores face indices instead of corner indices. */
static void reverse_group_indices_in_groups(const OffsetIndices<int> groups,
const Span<int> group_to_elem,
const OffsetIndices<int> offsets,
MutableSpan<int> results)
{
int *counts = MEM_cnew_array<int>(size_t(offsets.size()), __func__);
BLI_SCOPED_DEFER([&]() { MEM_freeN(counts); })
threading::parallel_for(groups.index_range(), 1024, [&](const IndexRange range) {
for (const int64_t face : range) {
for (const int elem : group_to_elem.slice(groups[face])) {
const int index_in_group = atomic_fetch_and_add_int32(&counts[elem], 1);
results[offsets[elem][index_in_group]] = int(face);
}
}
});
sort_small_groups(offsets, 1024, results);
}
static GroupedSpan<int> gather_groups(const Span<int> group_indices,
const int groups_num,
Array<int> &r_offsets,
@ -370,16 +389,21 @@ GroupedSpan<int> build_vert_to_edge_map(const Span<int2> edges,
Array<int> &r_indices)
{
r_offsets = create_reverse_offsets(edges.cast<int>(), verts_num);
r_indices.reinitialize(r_offsets.last());
Array<int> counts(verts_num, 0);
const OffsetIndices<int> offsets(r_offsets);
r_indices.reinitialize(offsets.total_size());
for (const int64_t edge_i : edges.index_range()) {
for (const int vert : {edges[edge_i][0], edges[edge_i][1]}) {
r_indices[r_offsets[vert] + counts[vert]] = int(edge_i);
counts[vert]++;
/* Version of #reverse_indices_in_groups that accounts for storing two indices for each edge. */
int *counts = MEM_cnew_array<int>(size_t(offsets.size()), __func__);
BLI_SCOPED_DEFER([&]() { MEM_freeN(counts); })
threading::parallel_for(edges.index_range(), 1024, [&](const IndexRange range) {
for (const int64_t edge : range) {
for (const int vert : {edges[edge][0], edges[edge][1]}) {
const int index_in_group = atomic_fetch_and_add_int32(&counts[vert], 1);
r_indices[offsets[vert][index_in_group]] = int(edge);
}
}
}
return {OffsetIndices<int>(r_offsets), r_indices};
});
return {offsets, r_indices};
}
void build_vert_to_face_indices(const OffsetIndices<int> faces,
@ -387,13 +411,7 @@ void build_vert_to_face_indices(const OffsetIndices<int> faces,
const OffsetIndices<int> offsets,
MutableSpan<int> r_indices)
{
Array<int> counts(offsets.size(), 0);
for (const int64_t face_i : faces.index_range()) {
for (const int vert : corner_verts.slice(faces[face_i])) {
r_indices[offsets[vert].start() + counts[vert]] = int(face_i);
counts[vert]++;
}
}
reverse_group_indices_in_groups(faces, corner_verts, offsets, r_indices);
}
GroupedSpan<int> build_vert_to_face_map(const OffsetIndices<int> faces,
@ -438,14 +456,7 @@ GroupedSpan<int> build_edge_to_face_map(const OffsetIndices<int> faces,
{
r_offsets = create_reverse_offsets(corner_edges, edges_num);
r_indices.reinitialize(r_offsets.last());
Array<int> counts(edges_num, 0);
for (const int64_t face_i : faces.index_range()) {
for (const int edge : corner_edges.slice(faces[face_i])) {
r_indices[r_offsets[edge] + counts[edge]] = int(face_i);
counts[edge]++;
}
}
reverse_group_indices_in_groups(faces, corner_edges, OffsetIndices<int>(r_offsets), r_indices);
return {OffsetIndices<int>(r_offsets), r_indices};
}

View File

@ -17,6 +17,7 @@
#include "BLI_array.hh"
#include "BLI_array_utils.hh"
#include "BLI_enumerable_thread_specific.hh"
#include "BLI_index_range.hh"
#include "BLI_math_vector.h"
#include "BLI_span.hh"
@ -277,191 +278,329 @@ Mesh *BKE_mesh_remesh_voxel(const Mesh *mesh,
#endif
}
void BKE_mesh_remesh_reproject_paint_mask(Mesh *target, const Mesh *source)
namespace blender::bke {
static void calc_edge_centers(const Span<float3> positions,
const Span<int2> edges,
MutableSpan<float3> edge_centers)
{
BVHTreeFromMesh bvhtree = {nullptr};
BKE_bvhtree_from_mesh_get(&bvhtree, source, BVHTREE_FROM_VERTS, 2);
const Span<float3> target_positions = target->vert_positions();
const float *source_mask = (const float *)CustomData_get_layer_named(
&source->vert_data, CD_PROP_FLOAT, ".sculpt_mask");
if (source_mask == nullptr) {
return;
for (const int i : edges.index_range()) {
edge_centers[i] = math::midpoint(positions[edges[i][0]], positions[edges[i][1]]);
}
}
float *target_mask;
if (CustomData_has_layer_named(&target->vert_data, CD_PROP_FLOAT, ".sculpt_mask")) {
target_mask = (float *)CustomData_get_layer_named(
&target->vert_data, CD_PROP_FLOAT, ".sculpt_mask");
}
else {
target_mask = (float *)CustomData_add_layer_named(
&target->vert_data, CD_PROP_FLOAT, CD_CONSTRUCT, target->totvert, ".sculpt_mask");
static void calc_face_centers(const Span<float3> positions,
const OffsetIndices<int> faces,
const Span<int> corner_verts,
MutableSpan<float3> face_centers)
{
for (const int i : faces.index_range()) {
face_centers[i] = mesh::face_center_calc(positions, corner_verts.slice(faces[i]));
}
}
blender::threading::parallel_for(IndexRange(target->totvert), 4096, [&](const IndexRange range) {
for (const int i : range) {
BVHTreeNearest nearest;
nearest.index = -1;
nearest.dist_sq = FLT_MAX;
BLI_bvhtree_find_nearest(
bvhtree.tree, target_positions[i], &nearest, bvhtree.nearest_callback, &bvhtree);
if (nearest.index != -1) {
target_mask[i] = source_mask[nearest.index];
static void find_nearest_tris(const Span<float3> positions,
BVHTreeFromMesh &bvhtree,
MutableSpan<int> tris)
{
for (const int i : positions.index_range()) {
BVHTreeNearest nearest;
nearest.index = -1;
nearest.dist_sq = FLT_MAX;
BLI_bvhtree_find_nearest(
bvhtree.tree, positions[i], &nearest, bvhtree.nearest_callback, &bvhtree);
tris[i] = nearest.index;
}
}
static void find_nearest_tris_parallel(const Span<float3> positions,
BVHTreeFromMesh &bvhtree,
MutableSpan<int> tris)
{
threading::parallel_for(tris.index_range(), 512, [&](const IndexRange range) {
find_nearest_tris(positions.slice(range), bvhtree, tris.slice(range));
});
}
static void find_nearest_verts(const Span<float3> positions,
const Span<int> corner_verts,
const Span<MLoopTri> src_tris,
const Span<float3> dst_positions,
const Span<int> nearest_vert_tris,
MutableSpan<int> nearest_verts)
{
threading::parallel_for(dst_positions.index_range(), 512, [&](const IndexRange range) {
for (const int dst_vert : range) {
const float3 &dst_position = dst_positions[dst_vert];
const MLoopTri &src_tri = src_tris[nearest_vert_tris[dst_vert]];
std::array<float, 3> distances;
for (const int i : IndexRange(3)) {
const int src_vert = corner_verts[src_tri.tri[i]];
distances[i] = math::distance_squared(positions[src_vert], dst_position);
}
const int min = std::min_element(distances.begin(), distances.end()) - distances.begin();
nearest_verts[dst_vert] = corner_verts[src_tri.tri[min]];
}
});
free_bvhtree_from_mesh(&bvhtree);
}
void BKE_remesh_reproject_sculpt_face_sets(Mesh *target, const Mesh *source)
static void find_nearest_faces(const Span<int> src_tri_faces,
const Span<float3> dst_positions,
const OffsetIndices<int> dst_faces,
const Span<int> dst_corner_verts,
BVHTreeFromMesh &bvhtree,
MutableSpan<int> nearest_faces)
{
using namespace blender;
using namespace blender::bke;
const AttributeAccessor src_attributes = source->attributes();
MutableAttributeAccessor dst_attributes = target->attributes_for_write();
const Span<float3> target_positions = target->vert_positions();
const OffsetIndices target_faces = target->faces();
const Span<int> target_corner_verts = target->corner_verts();
struct TLS {
Vector<float3> face_centers;
Vector<int> tri_indices;
};
threading::EnumerableThreadSpecific<TLS> all_tls;
threading::parallel_for(dst_faces.index_range(), 512, [&](const IndexRange range) {
TLS &tls = all_tls.local();
Vector<float3> &face_centers = tls.face_centers;
face_centers.reinitialize(range.size());
calc_face_centers(dst_positions, dst_faces.slice(range), dst_corner_verts, face_centers);
const VArray src_face_sets = *src_attributes.lookup<int>(".sculpt_face_set", ATTR_DOMAIN_FACE);
if (!src_face_sets) {
return;
}
SpanAttributeWriter<int> dst_face_sets = dst_attributes.lookup_or_add_for_write_only_span<int>(
".sculpt_face_set", ATTR_DOMAIN_FACE);
if (!dst_face_sets) {
return;
}
Vector<int> &tri_indices = tls.tri_indices;
tri_indices.reinitialize(range.size());
find_nearest_tris(face_centers, bvhtree, tri_indices);
const VArraySpan<int> src(src_face_sets);
MutableSpan<int> dst = dst_face_sets.span;
const blender::Span<int> looptri_faces = source->looptri_faces();
BVHTreeFromMesh bvhtree = {nullptr};
BKE_bvhtree_from_mesh_get(&bvhtree, source, BVHTREE_FROM_LOOPTRI, 2);
blender::threading::parallel_for(
IndexRange(target->faces_num), 2048, [&](const IndexRange range) {
for (const int i : range) {
BVHTreeNearest nearest;
nearest.index = -1;
nearest.dist_sq = FLT_MAX;
const float3 from_co = mesh::face_center_calc(
target_positions, target_corner_verts.slice(target_faces[i]));
BLI_bvhtree_find_nearest(
bvhtree.tree, from_co, &nearest, bvhtree.nearest_callback, &bvhtree);
if (nearest.index != -1) {
dst[i] = src[looptri_faces[nearest.index]];
}
else {
dst[i] = 1;
}
}
});
free_bvhtree_from_mesh(&bvhtree);
dst_face_sets.finish();
array_utils::gather(src_tri_faces, tri_indices.as_span(), nearest_faces.slice(range));
});
}
void BKE_remesh_reproject_vertex_paint(Mesh *target, const Mesh *source)
static void find_nearest_corners(const Span<float3> src_positions,
const OffsetIndices<int> src_faces,
const Span<int> src_corner_verts,
const Span<int> src_tri_faces,
const Span<float3> dst_positions,
const Span<int> dst_corner_verts,
const Span<int> nearest_vert_tris,
MutableSpan<int> nearest_corners)
{
using namespace blender;
using namespace blender::bke;
const AttributeAccessor src_attributes = source->attributes();
MutableAttributeAccessor dst_attributes = target->attributes_for_write();
threading::parallel_for(nearest_corners.index_range(), 512, [&](const IndexRange range) {
Vector<float, 64> distances;
for (const int dst_corner : range) {
const int dst_vert = dst_corner_verts[dst_corner];
const float3 &dst_position = dst_positions[dst_vert];
const int src_tri = nearest_vert_tris[dst_vert];
const IndexRange src_face = src_faces[src_tri_faces[src_tri]];
const Span<int> src_face_verts = src_corner_verts.slice(src_face);
/* Find the corner in the face that's closest in the closest face. */
distances.reinitialize(src_face_verts.size());
for (const int i : src_face_verts.index_range()) {
const int src_vert = src_face_verts[i];
distances[i] = math::distance_squared(src_positions[src_vert], dst_position);
}
const int min = std::min_element(distances.begin(), distances.end()) - distances.begin();
nearest_corners[dst_corner] = src_face[min];
}
});
}
static void find_nearest_edges(const Span<float3> src_positions,
const Span<int2> src_edges,
const OffsetIndices<int> src_faces,
const Span<int> src_corner_edges,
const Span<int> src_tri_faces,
const Span<float3> dst_positions,
const Span<int2> dst_edges,
BVHTreeFromMesh &bvhtree,
MutableSpan<int> nearest_edges)
{
struct TLS {
Vector<float3> edge_centers;
Vector<int> tri_indices;
Vector<int> face_indices;
Vector<float> distances;
};
threading::EnumerableThreadSpecific<TLS> all_tls;
threading::parallel_for(nearest_edges.index_range(), 512, [&](const IndexRange range) {
TLS &tls = all_tls.local();
Vector<float3> &edge_centers = tls.edge_centers;
edge_centers.reinitialize(range.size());
calc_edge_centers(dst_positions, dst_edges.slice(range), edge_centers);
Vector<int> &tri_indices = tls.tri_indices;
tri_indices.reinitialize(range.size());
find_nearest_tris_parallel(edge_centers, bvhtree, tri_indices);
Vector<int> &face_indices = tls.face_indices;
face_indices.reinitialize(range.size());
array_utils::gather(src_tri_faces, tri_indices.as_span(), face_indices.as_mutable_span());
/* Find the source edge that's closest to the destination edge in the nearest face. Search
* through the whole face instead of just the triangle because the triangle has edges that
* might not be actual mesh edges. */
Vector<float, 64> distances;
for (const int i : range.index_range()) {
const int dst_edge = range[i];
const float3 &dst_position = edge_centers[i];
const int src_face = face_indices[i];
const Span<int> src_face_edges = src_corner_edges.slice(src_faces[src_face]);
distances.reinitialize(src_face_edges.size());
for (const int i : src_face_edges.index_range()) {
const int2 src_edge = src_edges[src_face_edges[i]];
const float3 src_center = math::midpoint(src_positions[src_edge[0]],
src_positions[src_edge[1]]);
distances[i] = math::distance_squared(src_center, dst_position);
}
const int min = std::min_element(distances.begin(), distances.end()) - distances.begin();
nearest_edges[dst_edge] = src_face_edges[min];
}
});
}
static void gather_attributes(const Span<AttributeIDRef> ids,
const AttributeAccessor src_attributes,
const eAttrDomain domain,
const Span<int> index_map,
MutableAttributeAccessor dst_attributes)
{
for (const AttributeIDRef &id : ids) {
const GVArraySpan src = *src_attributes.lookup(id, domain);
const eCustomDataType type = cpp_type_to_custom_data_type(src.type());
GSpanAttributeWriter dst = dst_attributes.lookup_or_add_for_write_only_span(id, domain, type);
attribute_math::gather(src, index_map, dst.span);
dst.finish();
}
}
void mesh_remesh_reproject_attributes(const Mesh &src, Mesh &dst)
{
/* Gather attributes to tranfer for each domain. This makes it possible to skip
* building index maps and even the main BVH tree if there are no attributes. */
const AttributeAccessor src_attributes = src.attributes();
Vector<AttributeIDRef> point_ids;
Vector<AttributeIDRef> edge_ids;
Vector<AttributeIDRef> face_ids;
Vector<AttributeIDRef> corner_ids;
source->attributes().for_all([&](const AttributeIDRef &id, const AttributeMetaData &meta_data) {
if (CD_TYPE_AS_MASK(meta_data.data_type) & CD_MASK_COLOR_ALL) {
if (meta_data.domain == ATTR_DOMAIN_POINT) {
src_attributes.for_all([&](const AttributeIDRef &id, const AttributeMetaData &meta_data) {
if (ELEM(id.name(), "position", ".edge_verts", ".corner_vert", ".corner_edge")) {
return true;
}
switch (meta_data.domain) {
case ATTR_DOMAIN_POINT:
point_ids.append(id);
}
else if (meta_data.domain == ATTR_DOMAIN_CORNER) {
break;
case ATTR_DOMAIN_EDGE:
edge_ids.append(id);
break;
case ATTR_DOMAIN_FACE:
face_ids.append(id);
break;
case ATTR_DOMAIN_CORNER:
corner_ids.append(id);
}
break;
default:
BLI_assert_unreachable();
break;
}
return true;
});
if (point_ids.is_empty() && corner_ids.is_empty()) {
if (point_ids.is_empty() && edge_ids.is_empty() && face_ids.is_empty() && corner_ids.is_empty())
{
return;
}
GroupedSpan<int> source_lmap;
GroupedSpan<int> target_lmap;
BVHTreeFromMesh bvhtree = {nullptr};
threading::parallel_invoke(
[&]() { BKE_bvhtree_from_mesh_get(&bvhtree, source, BVHTREE_FROM_VERTS, 2); },
[&]() { source_lmap = source->vert_to_corner_map(); },
[&]() { target_lmap = target->vert_to_corner_map(); });
const Span<float3> src_positions = src.vert_positions();
const OffsetIndices src_faces = src.faces();
const Span<int> src_corner_verts = src.corner_verts();
const Span<MLoopTri> src_tris = src.looptris();
const Span<float3> target_positions = target->vert_positions();
Array<int> nearest_src_verts(target_positions.size());
threading::parallel_for(target_positions.index_range(), 1024, [&](const IndexRange range) {
for (const int i : range) {
BVHTreeNearest nearest;
nearest.index = -1;
nearest.dist_sq = FLT_MAX;
BLI_bvhtree_find_nearest(
bvhtree.tree, target_positions[i], &nearest, bvhtree.nearest_callback, &bvhtree);
nearest_src_verts[i] = nearest.index;
/* The main idea in the following code is to trade some complexity in sampling for the benefit of
* only using and building a single BVH tree. Since sculpt mode doesn't generally deal with loose
* vertices and edges, we use the standard "triangles" BVH which won't contain them. Also, only
* relying on a single BVH should reduce memory usage, and work better if the BVH and PBVH are
* ever merged.
*
* One key decision is separating building transfer index maps from actually transferring any
* attribute data. This is important to keep attribute storage independent from the specifics of
* the decisions made here, which mainly results in easier refactoring, more generic code, and
* possibly improved performance from lower cache usage in the "complex" sampling part of the
* algorithm and the copying itself. */
BVHTreeFromMesh bvhtree{};
BKE_bvhtree_from_mesh_get(&bvhtree, &src, BVHTREE_FROM_LOOPTRI, 2);
const Span<float3> dst_positions = dst.vert_positions();
const OffsetIndices dst_faces = dst.faces();
const Span<int> dst_corner_verts = dst.corner_verts();
MutableAttributeAccessor dst_attributes = dst.attributes_for_write();
if (!point_ids.is_empty() || !corner_ids.is_empty()) {
Array<int> vert_nearest_tris(dst_positions.size());
find_nearest_tris_parallel(dst_positions, bvhtree, vert_nearest_tris);
if (!point_ids.is_empty()) {
Array<int> map(dst.totvert);
find_nearest_verts(
src_positions, src_corner_verts, src_tris, dst_positions, vert_nearest_tris, map);
gather_attributes(point_ids, src_attributes, ATTR_DOMAIN_POINT, map, dst_attributes);
}
});
for (const AttributeIDRef &id : point_ids) {
const GVArraySpan src = *src_attributes.lookup(id, ATTR_DOMAIN_POINT);
GSpanAttributeWriter dst = dst_attributes.lookup_or_add_for_write_only_span(
id, ATTR_DOMAIN_POINT, cpp_type_to_custom_data_type(src.type()));
attribute_math::gather(src, nearest_src_verts, dst.span);
dst.finish();
}
if (!corner_ids.is_empty()) {
for (const AttributeIDRef &id : corner_ids) {
const GVArraySpan src = *src_attributes.lookup(id, ATTR_DOMAIN_CORNER);
GSpanAttributeWriter dst = dst_attributes.lookup_or_add_for_write_only_span(
id, ATTR_DOMAIN_CORNER, cpp_type_to_custom_data_type(src.type()));
threading::parallel_for(target_positions.index_range(), 1024, [&](const IndexRange range) {
src.type().to_static_type_tag<ColorGeometry4b, ColorGeometry4f>([&](auto type_tag) {
using T = typename decltype(type_tag)::type;
if constexpr (std::is_void_v<T>) {
BLI_assert_unreachable();
}
else {
const Span<T> src_typed = src.typed<T>();
MutableSpan<T> dst_typed = dst.span.typed<T>();
for (const int dst_vert : range) {
/* Find the average value at the corners of the closest vertex on the
* source mesh. */
const int src_vert = nearest_src_verts[dst_vert];
T value;
typename blender::bke::attribute_math::DefaultMixer<T> mixer({&value, 1});
for (const int corner : source_lmap[src_vert]) {
mixer.mix_in(0, src_typed[corner]);
}
dst_typed.fill_indices(target_lmap[dst_vert], value);
}
}
});
});
dst.finish();
if (!corner_ids.is_empty()) {
const Span<int> src_tri_faces = src.looptri_faces();
Array<int> map(dst.totloop);
find_nearest_corners(src_positions,
src_faces,
src_corner_verts,
src_tri_faces,
dst_positions,
dst_corner_verts,
vert_nearest_tris,
map);
gather_attributes(corner_ids, src_attributes, ATTR_DOMAIN_CORNER, map, dst_attributes);
}
}
/* Make sure active/default color attribute (names) are brought over. */
if (source->active_color_attribute) {
BKE_id_attributes_active_color_set(&target->id, source->active_color_attribute);
if (!edge_ids.is_empty()) {
const Span<int2> src_edges = src.edges();
const Span<int> src_corner_edges = src.corner_edges();
const Span<int> src_tri_faces = src.looptri_faces();
const Span<int2> dst_edges = dst.edges();
Array<int> map(dst.totedge);
find_nearest_edges(src_positions,
src_edges,
src_faces,
src_corner_edges,
src_tri_faces,
dst_positions,
dst_edges,
bvhtree,
map);
gather_attributes(edge_ids, src_attributes, ATTR_DOMAIN_EDGE, map, dst_attributes);
}
if (source->default_color_attribute) {
BKE_id_attributes_default_color_set(&target->id, source->default_color_attribute);
if (!face_ids.is_empty()) {
const Span<int> src_tri_faces = src.looptri_faces();
Array<int> map(dst.faces_num);
find_nearest_faces(src_tri_faces, dst_positions, dst_faces, dst_corner_verts, bvhtree, map);
gather_attributes(face_ids, src_attributes, ATTR_DOMAIN_FACE, map, dst_attributes);
}
if (src.active_color_attribute) {
BKE_id_attributes_active_color_set(&dst.id, src.active_color_attribute);
}
if (src.default_color_attribute) {
BKE_id_attributes_default_color_set(&dst.id, src.default_color_attribute);
}
free_bvhtree_from_mesh(&bvhtree);
}
} // namespace blender::bke
Mesh *BKE_mesh_remesh_voxel_fix_poles(const Mesh *mesh)
{
const BMAllocTemplate allocsize = BMALLOC_TEMPLATE_FROM_ME(mesh);

View File

@ -788,6 +788,9 @@ void ntreeBlendWrite(BlendWriter *writer, bNodeTree *ntree)
if (node->type == GEO_NODE_REPEAT_OUTPUT) {
blender::nodes::RepeatItemsAccessor::blend_write(writer, *node);
}
if (node->type == GEO_NODE_INDEX_SWITCH) {
blender::nodes::IndexSwitchItemsAccessor::blend_write(writer, *node);
}
}
LISTBASE_FOREACH (bNodeLink *, link, &ntree->links) {
@ -1027,6 +1030,10 @@ void ntreeBlendReadData(BlendDataReader *reader, ID *owner_id, bNodeTree *ntree)
blender::nodes::RepeatItemsAccessor::blend_read_data(reader, *node);
break;
}
case GEO_NODE_INDEX_SWITCH: {
blender::nodes::IndexSwitchItemsAccessor::blend_read_data(reader, *node);
break;
}
default:
break;

View File

@ -165,6 +165,29 @@ inline void gather(const VArray<T> &src,
});
}
template<typename T>
inline void gather_group_to_group(const OffsetIndices<int> src_offsets,
const OffsetIndices<int> dst_offsets,
const IndexMask &selection,
const Span<T> src,
MutableSpan<T> dst)
{
selection.foreach_index(GrainSize(512), [&](const int64_t src_i, const int64_t dst_i) {
dst.slice(dst_offsets[dst_i]).copy_from(src.slice(src_offsets[src_i]));
});
}
template<typename T>
inline void gather_to_groups(const OffsetIndices<int> dst_offsets,
const IndexMask &src_selection,
const Span<T> src,
MutableSpan<T> dst)
{
src_selection.foreach_index(GrainSize(1024), [&](const int src_i, const int dst_i) {
dst.slice(dst_offsets[dst_i]).fill(src[src_i]);
});
}
/**
* Copy the \a src data from the groups defined by \a src_offsets to the groups in \a dst defined
* by \a dst_offsets. Groups to use are masked by \a selection, and it is assumed that the

View File

@ -1015,6 +1015,33 @@ class Map {
return this->count_collisions__impl(key, hash_(key));
}
/**
* True if both maps have the same key-value-pairs.
*/
friend bool operator==(const Map &a, const Map &b)
{
if (a.size() != b.size()) {
return false;
}
for (const Item item : a.items()) {
const Key &key = item.key;
const Value &value_a = item.value;
const Value *value_b = b.lookup_ptr(key);
if (value_b == nullptr) {
return false;
}
if (value_a != *value_b) {
return false;
}
}
return true;
}
friend bool operator!=(const Map &a, const Map &b)
{
return !(a == b);
}
private:
BLI_NOINLINE void realloc_and_reinsert(int64_t min_usable_slots)
{

View File

@ -21,6 +21,7 @@
*/
#include "BLI_map.hh"
#include "BLI_struct_equality_utils.hh"
#include "BLI_vector.hh"
namespace blender {
@ -157,6 +158,8 @@ template<typename Key, typename Value> class MultiValueMap {
{
map_.clear_and_shrink();
}
BLI_STRUCT_EQUALITY_OPERATORS_1(MultiValueMap, map_)
};
} // namespace blender

View File

@ -85,8 +85,8 @@ template<typename T> class OffsetIndices {
*/
OffsetIndices slice(const IndexRange range) const
{
BLI_assert(offsets_.index_range().drop_back(1).contains(range.last()));
return OffsetIndices(offsets_.slice(range.start(), range.one_after_last()));
BLI_assert(range.is_empty() || offsets_.index_range().drop_back(1).contains(range.last()));
return OffsetIndices(offsets_.slice(range.start(), range.size() + 1));
}
Span<T> data() const

View File

@ -14,6 +14,7 @@ set(INC
set(INC_SYS
../../../extern/wcwidth
../../../extern/json/include
../../../extern/fmtlib/include
${EIGEN3_INCLUDE_DIRS}
${ZLIB_INCLUDE_DIRS}
@ -397,6 +398,7 @@ set(LIB
PRIVATE bf::intern::guardedalloc
extern_wcwidth
PRIVATE bf::intern::atomic
PRIVATE extern_fmtlib
${ZLIB_LIBRARIES}
${ZSTD_LIBRARIES}
)

View File

@ -218,7 +218,7 @@ MatBase<T, Size, Size> pseudo_invert(const MatBase<T, Size, Size> &mat, T epsilo
{
/* Start by trying normal inversion first. */
bool success;
MatBase<T, Size, Size> inv = invert(mat, success);
MatBase<T, Size, Size> inv = invert<T, Size>(mat, success);
if (success) {
return inv;
}

View File

@ -7,39 +7,55 @@
#include <algorithm>
#include <iomanip>
#include <iostream>
#include <string_view>
#include <fmt/format.h>
namespace blender::timeit {
void print_duration(Nanoseconds duration)
static void format_duration(Nanoseconds duration, fmt::memory_buffer &buf)
{
using namespace std::chrono;
if (duration < microseconds(100)) {
std::cout << duration.count() << " ns";
fmt::format_to(fmt::appender(buf), FMT_STRING("{} ns"), duration.count());
}
else if (duration < seconds(5)) {
std::cout << std::fixed << std::setprecision(2) << duration.count() / 1.0e6 << " ms";
fmt::format_to(fmt::appender(buf), FMT_STRING("{:.2f} ms"), duration.count() / 1.0e6);
}
else if (duration > seconds(90)) {
/* Long durations: print seconds, and also H:m:s */
const auto dur_hours = duration_cast<hours>(duration);
const auto dur_mins = duration_cast<minutes>(duration - dur_hours);
const auto dur_sec = duration_cast<seconds>(duration - dur_hours - dur_mins);
std::cout << std::fixed << std::setprecision(1) << duration.count() / 1.0e9 << " s ("
<< dur_hours.count() << "H:" << dur_mins.count() << "m:" << dur_sec.count() << "s)";
fmt::format_to(fmt::appender(buf),
FMT_STRING("{:.1f} s ({}H:{}m:{}s)"),
duration.count() / 1.0e9,
dur_hours.count(),
dur_mins.count(),
dur_sec.count());
}
else {
std::cout << std::fixed << std::setprecision(1) << duration.count() / 1.0e9 << " s";
fmt::format_to(fmt::appender(buf), FMT_STRING("{:.1f} s"), duration.count() / 1.0e9);
}
}
void print_duration(Nanoseconds duration)
{
fmt::memory_buffer buf;
format_duration(duration, buf);
std::cout << std::string_view(buf.data(), buf.size());
}
ScopedTimer::~ScopedTimer()
{
const TimePoint end = Clock::now();
const Nanoseconds duration = end - start_;
std::cout << "Timer '" << name_ << "' took ";
print_duration(duration);
std::cout << '\n';
fmt::memory_buffer buf;
fmt::format_to(fmt::appender(buf), FMT_STRING("Timer '{}' took "), name_);
format_duration(duration, buf);
buf.append(std::string_view("\n"));
std::cout << std::string_view(buf.data(), buf.size());
}
ScopedTimerAveraged::~ScopedTimerAveraged()
@ -51,13 +67,15 @@ ScopedTimerAveraged::~ScopedTimerAveraged()
total_time_ += duration;
min_time_ = std::min(duration, min_time_);
std::cout << "Timer '" << name_ << "': (Average: ";
print_duration(total_time_ / total_count_);
std::cout << ", Min: ";
print_duration(min_time_);
std::cout << ", Last: ";
print_duration(duration);
std::cout << ")\n";
fmt::memory_buffer buf;
fmt::format_to(fmt::appender(buf), FMT_STRING("Timer '{}': (Average: "), name_);
format_duration(total_time_ / total_count_, buf);
buf.append(std::string_view(", Min: "));
format_duration(min_time_, buf);
buf.append(std::string_view(", Last: "));
format_duration(duration, buf);
buf.append(std::string_view(")\n"));
std::cout << std::string_view(buf.data(), buf.size());
}
} // namespace blender::timeit

View File

@ -694,6 +694,22 @@ TEST(map, VectorKey)
EXPECT_EQ(map.size(), 1);
}
TEST(map, Equality)
{
Map<int, int> a;
Map<int, int> b;
EXPECT_EQ(a, b);
a.add(3, 4);
EXPECT_NE(a, b);
b.add(3, 4);
EXPECT_EQ(a, b);
a.add(4, 10);
b.add(4, 11);
EXPECT_NE(a, b);
}
/**
* Set this to 1 to activate the benchmark. It is disabled by default, because it prints a lot.
*/

View File

@ -1390,7 +1390,7 @@ class StringEscape : public testing::Test {
size_t dst_test_len;
char dst_test[64]; /* Must be big enough for all input. */
for (const auto &item : items) {
/* Validate the static size is big enough (test the test it's self). */
/* Validate the static size is big enough (test the test itself). */
EXPECT_LT((strlen(item[0]) * 2) + 1, sizeof(dst_test));
/* Escape the string. */
dst_test_len = BLI_str_escape(dst_test, item[0], sizeof(dst_test));

View File

@ -4712,7 +4712,7 @@ void blo_do_versions_280(FileData *fd, Library * /*lib*/, Main *bmain)
LISTBASE_FOREACH (Mesh *, me, &bmain->meshes) {
me->flag &= ~(ME_FLAG_UNUSED_0 | ME_FLAG_UNUSED_1 | ME_FLAG_UNUSED_3 | ME_FLAG_UNUSED_4 |
ME_FLAG_UNUSED_6 | ME_FLAG_UNUSED_7 | ME_REMESH_REPROJECT_VERTEX_COLORS);
ME_FLAG_UNUSED_6 | ME_FLAG_UNUSED_7 | ME_REMESH_REPROJECT_ATTRIBUTES);
}
LISTBASE_FOREACH (Material *, mat, &bmain->materials) {

View File

@ -1292,8 +1292,9 @@ static void change_input_socket_to_rotation_type(bNodeTree &ntree,
if (link->tosock != &socket) {
continue;
}
if (ELEM(link->fromsock->type, SOCK_VECTOR, SOCK_FLOAT) &&
link->fromnode->type != NODE_REROUTE) {
if (ELEM(link->fromsock->type, SOCK_ROTATION, SOCK_VECTOR, SOCK_FLOAT) &&
link->fromnode->type != NODE_REROUTE)
{
/* No need to add the conversion node when implicit conversions will work. */
continue;
}
@ -1321,7 +1322,8 @@ static void change_output_socket_to_rotation_type(bNodeTree &ntree,
if (link->fromsock != &socket) {
continue;
}
if (link->tosock->type == SOCK_VECTOR && link->tonode->type != NODE_REROUTE) {
if (ELEM(link->tosock->type, SOCK_ROTATION, SOCK_VECTOR) && link->tonode->type != NODE_REROUTE)
{
/* No need to add the conversion node when implicit conversions will work. */
continue;
}
@ -1351,7 +1353,7 @@ static void version_geometry_nodes_use_rotation_socket(bNodeTree &ntree)
bNodeSocket *socket = nodeFindSocket(node, SOCK_IN, "Rotation");
change_input_socket_to_rotation_type(ntree, *node, *socket);
}
if (STREQ(node->idname, "GeometryNodeDistributePointsOnFaces")) {
if (STR_ELEM(node->idname, "GeometryNodeDistributePointsOnFaces", "GeometryNodeObjectInfo")) {
bNodeSocket *socket = nodeFindSocket(node, SOCK_OUT, "Rotation");
change_output_socket_to_rotation_type(ntree, *node, *socket);
}
@ -2504,5 +2506,27 @@ void blo_do_versions_400(FileData *fd, Library * /*lib*/, Main *bmain)
LISTBASE_FOREACH (Mesh *, mesh, &bmain->meshes) {
blender::bke::mesh_sculpt_mask_to_generic(*mesh);
}
if (!DNA_struct_member_exists(
fd->filesdna, "RaytraceEEVEE", "float", "screen_trace_max_roughness"))
{
LISTBASE_FOREACH (Scene *, scene, &bmain->scenes) {
scene->eevee.reflection_options.screen_trace_max_roughness = 0.5f;
scene->eevee.refraction_options.screen_trace_max_roughness = 0.5f;
scene->eevee.diffuse_options.screen_trace_max_roughness = 0.5f;
}
}
if (!DNA_struct_member_exists(fd->filesdna, "Material", "char", "displacement_method")) {
/* Replace Cycles.displacement_method by Material::displacement_method. */
LISTBASE_FOREACH (Material *, material, &bmain->materials) {
int displacement_method = MA_DISPLACEMENT_BUMP;
if (IDProperty *cmat = version_cycles_properties_from_ID(&material->id)) {
displacement_method = version_cycles_property_int(
cmat, "displacement_method", MA_DISPLACEMENT_BUMP);
}
material->displacement_method = displacement_method;
}
}
}
}

View File

@ -594,8 +594,7 @@ void BLO_update_defaults_startup_blend(Main *bmain, const char *app_template)
/* Match default for new meshes. */
mesh->smoothresh_legacy = DEG2RADF(30);
/* Match voxel remesher options for all existing meshes in templates. */
mesh->flag |= ME_REMESH_REPROJECT_VOLUME | ME_REMESH_REPROJECT_PAINT_MASK |
ME_REMESH_REPROJECT_SCULPT_FACE_SETS | ME_REMESH_REPROJECT_VERTEX_COLORS;
mesh->flag |= ME_REMESH_REPROJECT_VOLUME | ME_REMESH_REPROJECT_ATTRIBUTES;
/* For Sculpting template. */
if (app_template && STREQ(app_template, "Sculpting")) {

View File

@ -916,6 +916,9 @@ void blo_do_versions_userdef(UserDef *userdef)
*/
{
/* Keep this block, even when empty. */
userdef->key_insert_channels = (USER_ANIM_KEY_CHANNEL_LOCATION |
USER_ANIM_KEY_CHANNEL_ROTATION | USER_ANIM_KEY_CHANNEL_SCALE |
USER_ANIM_KEY_CHANNEL_CUSTOM_PROPERTIES);
}
LISTBASE_FOREACH (bTheme *, btheme, &userdef->themes) {

View File

@ -69,6 +69,7 @@ set(SRC
algorithms/intern/smaa.cc
algorithms/intern/summed_area_table.cc
algorithms/intern/symmetric_separable_blur.cc
algorithms/intern/symmetric_separable_blur_variable_size.cc
algorithms/intern/transform.cc
algorithms/COM_algorithm_jump_flooding.hh
@ -79,6 +80,7 @@ set(SRC
algorithms/COM_algorithm_smaa.hh
algorithms/COM_algorithm_summed_area_table.hh
algorithms/COM_algorithm_symmetric_separable_blur.hh
algorithms/COM_algorithm_symmetric_separable_blur_variable_size.hh
algorithms/COM_algorithm_transform.hh
cached_resources/intern/cached_mask.cc
@ -150,6 +152,7 @@ set(GLSL_SRC
shaders/compositor_image_crop.glsl
shaders/compositor_inpaint_compute_boundary.glsl
shaders/compositor_inpaint_compute_region.glsl
shaders/compositor_inpaint_fill_region.glsl
shaders/compositor_jump_flooding.glsl
shaders/compositor_keying_compute_image.glsl
shaders/compositor_keying_compute_matte.glsl
@ -186,6 +189,7 @@ set(GLSL_SRC
shaders/compositor_symmetric_blur.glsl
shaders/compositor_symmetric_blur_variable_size.glsl
shaders/compositor_symmetric_separable_blur.glsl
shaders/compositor_symmetric_separable_blur_variable_size.glsl
shaders/compositor_tone_map_photoreceptor.glsl
shaders/compositor_tone_map_simple.glsl
shaders/compositor_write_output.glsl
@ -297,6 +301,7 @@ set(SRC_SHADER_CREATE_INFOS
shaders/infos/compositor_symmetric_blur_info.hh
shaders/infos/compositor_symmetric_blur_variable_size_info.hh
shaders/infos/compositor_symmetric_separable_blur_info.hh
shaders/infos/compositor_symmetric_separable_blur_variable_size_info.hh
shaders/infos/compositor_tone_map_photoreceptor_info.hh
shaders/infos/compositor_tone_map_simple_info.hh
shaders/infos/compositor_write_output_info.hh

View File

@ -0,0 +1,33 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "DNA_scene_types.h"
#include "COM_context.hh"
#include "COM_result.hh"
namespace blender::realtime_compositor {
/* Blur the input using a horizontal and a vertical separable blur passes given the filter type
* using SymmetricSeparableBlurWeights, where the number of weights is equal to weights_resolution.
* Since the radius can be variable, the number of weights can be less than or more than the number
* of pixels actually getting accumulated during blurring, so the weights are interpolated in the
* shader as needed, the resolution is typically set to the maximum possible radius if known. The
* radius of the blur can be variable and is defined using the given radius float image. The output
* is written to the given output result, which will be allocated internally and is thus expected
* not to be previously allocated.
*
* Technically, variable size blur can't be computed separably, however, assuming a sufficiently
* smooth radius field, the results can be visually pleasing, so this can be used a more performant
* variable size blur if the quality is satisfactory. */
void symmetric_separable_blur_variable_size(Context &context,
Result &input,
Result &output,
Result &radius,
int filter_type = R_FILTER_GAUSS,
int weights_resolution = 128);
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,139 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_assert.h"
#include "BLI_math_base.hh"
#include "BLI_math_vector.hh"
#include "BLI_math_vector_types.hh"
#include "GPU_shader.h"
#include "GPU_texture.h"
#include "COM_context.hh"
#include "COM_result.hh"
#include "COM_utilities.hh"
#include "COM_algorithm_symmetric_separable_blur_variable_size.hh"
#include "COM_symmetric_separable_blur_weights.hh"
namespace blender::realtime_compositor {
static const char *get_blur_shader(ResultType type)
{
switch (type) {
case ResultType::Float:
return "compositor_symmetric_separable_blur_variable_size_float";
case ResultType::Float2:
return "compositor_symmetric_separable_blur_variable_size_float2";
case ResultType::Vector:
case ResultType::Color:
return "compositor_symmetric_separable_blur_variable_size_float4";
case ResultType::Float3:
/* GPU module does not support float3 outputs. */
break;
case ResultType::Int2:
/* Blur does not support integer types. */
break;
}
BLI_assert_unreachable();
return nullptr;
}
static Result horizontal_pass(
Context &context, Result &input, Result &radius, int filter_type, int weights_resolution)
{
GPUShader *shader = context.get_shader(get_blur_shader(input.type()));
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "is_vertical_pass", false);
input.bind_as_texture(shader, "input_tx");
const SymmetricSeparableBlurWeights &weights =
context.cache_manager().symmetric_separable_blur_weights.get(
context, filter_type, weights_resolution);
weights.bind_as_texture(shader, "weights_tx");
radius.bind_as_texture(shader, "radius_tx");
/* We allocate an output image of a transposed size, that is, with a height equivalent to the
* width of the input and vice versa. This is done as a performance optimization. The shader
* will blur the image horizontally and write it to the intermediate output transposed. Then
* the vertical pass will execute the same horizontal blur shader, but since its input is
* transposed, it will effectively do a vertical blur and write to the output transposed,
* effectively undoing the transposition in the horizontal pass. This is done to improve
* spatial cache locality in the shader and to avoid having two separate shaders for each blur
* pass. */
Domain domain = input.domain();
const int2 transposed_domain = int2(domain.size.y, domain.size.x);
Result output = context.create_temporary_result(input.type());
output.allocate_texture(transposed_domain);
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, domain.size);
GPU_shader_unbind();
input.unbind_as_texture();
weights.unbind_as_texture();
radius.unbind_as_texture();
output.unbind_as_image();
return output;
}
static void vertical_pass(Context &context,
Result &original_input,
Result &horizontal_pass_result,
Result &output,
Result &radius,
int filter_type,
int weights_resolution)
{
GPUShader *shader = context.get_shader(get_blur_shader(original_input.type()));
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "is_vertical_pass", true);
horizontal_pass_result.bind_as_texture(shader, "input_tx");
const SymmetricSeparableBlurWeights &weights =
context.cache_manager().symmetric_separable_blur_weights.get(
context, filter_type, weights_resolution);
weights.bind_as_texture(shader, "weights_tx");
radius.bind_as_texture(shader, "radius_tx");
Domain domain = original_input.domain();
output.allocate_texture(domain);
output.bind_as_image(shader, "output_img");
/* Notice that the domain is transposed, see the note on the horizontal pass method for more
* information on the reasoning behind this. */
compute_dispatch_threads_at_least(shader, int2(domain.size.y, domain.size.x));
GPU_shader_unbind();
horizontal_pass_result.unbind_as_texture();
output.unbind_as_image();
weights.unbind_as_texture();
radius.unbind_as_texture();
}
void symmetric_separable_blur_variable_size(Context &context,
Result &input,
Result &output,
Result &radius,
int filter_type,
int weights_resolution)
{
Result horizontal_pass_result = horizontal_pass(
context, input, radius, filter_type, weights_resolution);
vertical_pass(
context, input, horizontal_pass_result, output, radius, filter_type, weights_resolution);
horizontal_pass_result.release();
}
} // namespace blender::realtime_compositor

View File

@ -84,6 +84,8 @@ SymmetricSeparableBlurWeights::SymmetricSeparableBlurWeights(Context &context,
Result::texture_format(ResultType::Float, context.get_precision()),
GPU_TEXTURE_USAGE_GENERAL,
weights.data());
GPU_texture_filter_mode(texture_, true);
GPU_texture_extend_mode(texture_, GPU_SAMPLER_EXTEND_MODE_EXTEND);
}
SymmetricSeparableBlurWeights::~SymmetricSeparableBlurWeights()

View File

@ -2,12 +2,7 @@
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/* Fill the inpainting region by sampling the color of the nearest boundary pixel if it is not
* further than the user supplied distance. Additionally, apply a lateral blur in the tangential
* path to the inpainting boundary to smooth out the inpainted region. */
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_compositor_jump_flooding_lib.glsl)
void main()
{
@ -15,55 +10,23 @@ void main()
vec4 color = texture_load(input_tx, texel);
/* An opaque pixel, no inpainting needed. */
/* An opaque pixel, not part of the inpainting region, write the original color. */
if (color.a == 1.0) {
imageStore(output_img, texel, color);
return;
}
ivec2 closest_boundary_texel = texture_load(flooded_boundary_tx, texel).xy;
float distance_to_boundary = distance(vec2(texel), vec2(closest_boundary_texel));
float distance_to_boundary = texture_load(distance_to_boundary_tx, texel).x;
/* Further than the user supplied distance, write a transparent color. */
/* Further than the inpainting distance, not part of the inpainting region, write the original
* color. */
if (distance_to_boundary > max_distance) {
imageStore(output_img, texel, vec4(0.0));
imageStore(output_img, texel, color);
return;
}
/* We set the blur radius to be proportional to the distance to the boundary. */
int blur_radius = int(ceil(distance_to_boundary));
/* Laterally blur by accumulate the boundary pixels nearest to the pixels along the tangential
* path in both directions starting from the current pixel, noting that the weights texture only
* stores the weights for the left half, but since the Gaussian is symmetric, the same weight is
* used for the right half and we add both of their contributions. */
vec2 left_texel = vec2(texel);
vec2 right_texel = vec2(texel);
float accumulated_weight = 0.0;
vec4 accumulated_color = vec4(0.0);
for (int i = 0; i < blur_radius; i++) {
float weight = texture(gaussian_weights_tx, float(i / (blur_radius - 1))).x;
{
ivec2 boundary_texel = texture_load(flooded_boundary_tx, ivec2(left_texel)).xy;
accumulated_color += texture_load(input_tx, boundary_texel) * weight;
accumulated_weight += weight;
/* Move the left texel one pixel in the clockwise tangent to the boundary. */
left_texel += normalize((left_texel - vec2(boundary_texel)).yx * vec2(-1.0, 1.0));
}
/* When i is zero, we are accumulating the center pixel, which was already accumulated as the
* left texel above, so no need to accumulate it again. */
if (i != 0) {
ivec2 boundary_texel = texture_load(flooded_boundary_tx, ivec2(right_texel)).xy;
accumulated_color += texture_load(input_tx, boundary_texel) * weight;
accumulated_weight += weight;
/* Move the left texel one pixel in the anti-clockwise tangent to the boundary. */
right_texel += normalize((right_texel - vec2(boundary_texel)).yx * vec2(1.0, -1.0));
}
}
imageStore(output_img, texel, accumulated_color / accumulated_weight);
/* Mix the inpainted color with the original color using its alpha because semi-transparent areas
* are considered to be partially inpainted. */
vec4 inpainted_color = texture_load(inpainted_region_tx, texel);
imageStore(output_img, texel, vec4(mix(inpainted_color.rgb, color.rgb, color.a), 1.0));
}

View File

@ -0,0 +1,48 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/* Fill the inpainting region by sampling the color of the nearest boundary pixel. Additionally,
* compute some information about the inpainting region, like the distance to the boundary, as well
* as the blur radius to use to smooth out that region. */
#pragma BLENDER_REQUIRE(gpu_shader_math_base_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_compositor_jump_flooding_lib.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 color = texture_load(input_tx, texel);
/* An opaque pixel, not part of the inpainting region. */
if (color.a == 1.0) {
imageStore(filled_region_img, texel, color);
imageStore(smoothing_radius_img, texel, vec4(0.0));
imageStore(distance_to_boundary_img, texel, vec4(0.0));
return;
}
ivec2 closest_boundary_texel = texture_load(flooded_boundary_tx, texel).xy;
float distance_to_boundary = distance(vec2(texel), vec2(closest_boundary_texel));
imageStore(distance_to_boundary_img, texel, vec4(distance_to_boundary));
/* We follow this shader by a blur shader that smoothes out the inpainting region, where the blur
* radius is the radius of the circle that touches the boundary. We can imagine the blur window
* to be inscribed in that circle and thus the blur radius is the distance to the boundary
* divided by square root two. As a performance optimization, we limit the blurring to areas that
* will affect the inpainting region, that is, whose distance to boundary is less than double the
* inpainting distance. Additionally, we clamp to the distance to the inpainting distance since
* areas outside of the clamp range only indirectly affect the inpainting region due to blurring
* and thus needn't use higher blur radii. */
float blur_window_size = min(float(max_distance), distance_to_boundary) / M_SQRT2;
bool skip_smoothing = distance_to_boundary > (max_distance * 2.0);
float smoothing_radius = skip_smoothing ? 0.0 : blur_window_size;
imageStore(smoothing_radius_img, texel, vec4(smoothing_radius));
/* Mix the boundary color with the original color using its alpha because semi-transparent areas
* are considered to be partially inpainted. */
vec4 boundary_color = texture_load(input_tx, closest_boundary_texel);
imageStore(filled_region_img, texel, mix(boundary_color, color, color.a));
}

View File

@ -0,0 +1,41 @@
/* SPDX-FileCopyrightText: 2022 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma BLENDER_REQUIRE(gpu_shader_compositor_blur_common.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
float accumulated_weight = 0.0;
vec4 accumulated_color = vec4(0.0);
/* First, compute the contribution of the center pixel. */
vec4 center_color = texture_load(input_tx, texel);
float center_weight = texture_load(weights_tx, 0).x;
accumulated_color += center_color * center_weight;
accumulated_weight += center_weight;
/* The dispatch domain is transposed in the vertical pass, so make sure to reverse transpose the
* texel coordinates when loading the radius. See the horizontal_pass function in the
* symmetric_separable_blur_variable_size.cc file for more information. */
int radius = int(texture_load(radius_tx, is_vertical_pass ? texel.yx : texel).x);
/* Then, compute the contributions of the pixel to the right and left, noting that the
* weights texture only stores the weights for the positive half, but since the filter is
* symmetric, the same weight is used for the negative half and we add both of their
* contributions. */
for (int i = 1; i <= radius; i++) {
/* Add 0.5 to evaluate at the center of the pixels. */
float weight = texture(weights_tx, (float(i) + 0.5) / float(radius + 1)).x;
accumulated_color += texture_load(input_tx, texel + ivec2(i, 0)) * weight;
accumulated_color += texture_load(input_tx, texel + ivec2(-i, 0)) * weight;
accumulated_weight += weight * 2.0;
}
/* Write the color using the transposed texel. See the horizontal_pass function mentioned above
* for more information on the rational behind this. */
imageStore(output_img, texel.yx, accumulated_color / accumulated_weight);
}

View File

@ -11,12 +11,23 @@ GPU_SHADER_CREATE_INFO(compositor_inpaint_compute_boundary)
.compute_source("compositor_inpaint_compute_boundary.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_inpaint_compute_region)
GPU_SHADER_CREATE_INFO(compositor_inpaint_fill_region)
.local_group_size(16, 16)
.push_constant(Type::INT, "max_distance")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::INT_2D, "flooded_boundary_tx")
.sampler(2, ImageType::INT_1D, "gaussian_weights_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "filled_region_img")
.image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "distance_to_boundary_img")
.image(2, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "smoothing_radius_img")
.compute_source("compositor_inpaint_fill_region.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_inpaint_compute_region)
.local_group_size(16, 16)
.push_constant(Type::INT, "max_distance")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::FLOAT_2D, "inpainted_region_tx")
.sampler(2, ImageType::FLOAT_2D, "distance_to_boundary_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_inpaint_compute_region.glsl")
.do_static_compilation(true);

View File

@ -0,0 +1,28 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur_variable_size_shared)
.local_group_size(16, 16)
.push_constant(Type::BOOL, "is_vertical_pass")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::FLOAT_1D, "weights_tx")
.sampler(2, ImageType::FLOAT_2D, "radius_tx")
.compute_source("compositor_symmetric_separable_blur_variable_size.glsl");
GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur_variable_size_float)
.additional_info("compositor_symmetric_separable_blur_variable_size_shared")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur_variable_size_float2)
.additional_info("compositor_symmetric_separable_blur_variable_size_shared")
.image(0, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur_variable_size_float4)
.additional_info("compositor_symmetric_separable_blur_variable_size_shared")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.do_static_compilation(true);

View File

@ -510,9 +510,12 @@ set(GLSL_SRC
engines/eevee_next/shaders/eevee_geom_world_vert.glsl
engines/eevee_next/shaders/eevee_hiz_debug_frag.glsl
engines/eevee_next/shaders/eevee_hiz_update_comp.glsl
engines/eevee_next/shaders/eevee_horizon_denoise_comp.glsl
engines/eevee_next/shaders/eevee_horizon_scan_eval_lib.glsl
engines/eevee_next/shaders/eevee_horizon_scan_comp.glsl
engines/eevee_next/shaders/eevee_horizon_scan_lib.glsl
engines/eevee_next/shaders/eevee_horizon_scan_test.glsl
engines/eevee_next/shaders/eevee_horizon_setup_comp.glsl
engines/eevee_next/shaders/eevee_light_culling_debug_frag.glsl
engines/eevee_next/shaders/eevee_light_culling_select_comp.glsl
engines/eevee_next/shaders/eevee_light_culling_sort_comp.glsl

View File

@ -103,7 +103,6 @@
/* Keep this as a define to avoid shader variations. */
#define RAYTRACE_RADIANCE_FORMAT GPU_R11F_G11F_B10F
#define RAYTRACE_RAYTIME_FORMAT GPU_R32F
#define RAYTRACE_HORIZON_FORMAT GPU_R32UI
#define RAYTRACE_VARIANCE_FORMAT GPU_R16F
#define RAYTRACE_TILEMASK_FORMAT GPU_R8UI

View File

@ -295,11 +295,13 @@ void Film::init(const int2 &extent, const rcti *output_rect)
EEVEE_RENDER_PASS_ENVIRONMENT |
EEVEE_RENDER_PASS_MIST |
EEVEE_RENDER_PASS_SHADOW | EEVEE_RENDER_PASS_AO;
const eViewLayerEEVEEPassType color_passes_3 = EEVEE_RENDER_PASS_TRANSPARENT;
data_.exposure_scale = pow2f(scene.view_settings.exposure);
data_.has_data = (enabled_passes_ & data_passes) != 0;
data_.any_render_pass_1 = (enabled_passes_ & color_passes_1) != 0;
data_.any_render_pass_2 = (enabled_passes_ & color_passes_2) != 0;
data_.any_render_pass_3 = (enabled_passes_ & color_passes_3) != 0;
}
{
/* Set pass offsets. */
@ -341,6 +343,7 @@ void Film::init(const int2 &extent, const rcti *output_rect)
data_.environment_id = pass_index_get(EEVEE_RENDER_PASS_ENVIRONMENT);
data_.shadow_id = pass_index_get(EEVEE_RENDER_PASS_SHADOW);
data_.ambient_occlusion_id = pass_index_get(EEVEE_RENDER_PASS_AO);
data_.transparent_id = pass_index_get(EEVEE_RENDER_PASS_TRANSPARENT);
data_.aov_color_id = data_.color_len;
data_.aov_value_id = data_.value_len;
@ -629,7 +632,7 @@ void Film::update_sample_table()
}
}
void Film::accumulate(const DRWView *view, GPUTexture *combined_final_tx)
void Film::accumulate(View &view, GPUTexture *combined_final_tx)
{
if (inst_.is_viewport()) {
DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get();
@ -650,9 +653,7 @@ void Film::accumulate(const DRWView *view, GPUTexture *combined_final_tx)
data_.display_only = false;
inst_.push_uniform_data();
draw::View drw_view("MainView", view);
inst_.manager->submit(accumulate_ps_, drw_view);
inst_.manager->submit(accumulate_ps_, view);
combined_tx_.swap();
weight_tx_.swap();

View File

@ -90,7 +90,7 @@ class Film {
void end_sync();
/** Accumulate the newly rendered sample contained in #RenderBuffers and blit to display. */
void accumulate(const DRWView *view, GPUTexture *combined_final_tx);
void accumulate(View &view, GPUTexture *combined_final_tx);
/** Sort and normalize cryptomatte samples. */
void cryptomatte_sort();
@ -180,6 +180,8 @@ class Film {
return data_.shadow_id;
case EEVEE_RENDER_PASS_AO:
return data_.ambient_occlusion_id;
case EEVEE_RENDER_PASS_TRANSPARENT:
return data_.transparent_id;
case EEVEE_RENDER_PASS_CRYPTOMATTE_OBJECT:
return data_.cryptomatte_object_id;
case EEVEE_RENDER_PASS_CRYPTOMATTE_ASSET:

View File

@ -95,6 +95,8 @@ void HiZBuffer::update()
else {
inst_.manager->submit(hiz_update_layer_ps_);
}
is_dirty_ = false;
}
void HiZBuffer::debug_draw(View &view, GPUFrameBuffer *view_fb)

View File

@ -113,23 +113,21 @@ bNodeTree *LookdevWorldNodeTree::nodetree_get(const LookdevParameters &parameter
LookdevModule::~LookdevModule()
{
GPU_material_free(&gpu_materials_);
gpu_material_ = nullptr;
}
bool LookdevModule::sync_world()
{
/* Check based on the v3d if the world is overridden. */
LookdevParameters new_parameters(inst_.v3d);
bool parameters_changed = parameters_ != new_parameters;
if (parameters_changed) {
if (parameters_.gpu_parameters_changed(new_parameters)) {
GPU_material_free(&gpu_materials_);
gpu_material_ = nullptr;
}
const bool parameters_changed = parameters_ != new_parameters;
const bool gpu_parameters_changed = parameters_.gpu_parameters_changed(new_parameters);
if (gpu_parameters_changed) {
GPU_material_free(&gpu_materials_);
}
if (parameters_changed) {
parameters_ = new_parameters;
inst_.sampling.reset();
gpu_status_ = GPU_MAT_CREATED;
}
if (parameters_.show_scene_world) {
@ -137,25 +135,15 @@ bool LookdevModule::sync_world()
}
::bNodeTree *node_tree = world_override_tree.nodetree_get(parameters_);
gpu_material_ = inst_.shaders.material_shader_get("EEVEE Lookdev Background",
gpu_materials_,
node_tree,
MAT_PIPE_DEFERRED,
MAT_GEOM_WORLD,
true);
GPUMaterial *gpu_material = inst_.shaders.material_shader_get(
"EEVEE Lookdev Background", gpu_materials_, node_tree, MAT_PIPE_DEFERRED, MAT_GEOM_WORLD);
if (assign_if_different(gpu_status_, GPU_material_status(gpu_material_)) &&
gpu_status_ == GPU_MAT_SUCCESS)
{
if (gpu_parameters_changed) {
inst_.reflection_probes.sync_world_lookdev();
}
else if (gpu_status_ == GPU_MAT_QUEUED) {
inst_.sampling.reset();
DRW_viewport_request_redraw();
}
inst_.pipelines.world.sync(gpu_material_);
inst_.pipelines.background.sync(gpu_material_, parameters_.background_opacity);
inst_.pipelines.world.sync(gpu_material);
inst_.pipelines.background.sync(gpu_material, parameters_.background_opacity);
return true;
}

View File

@ -79,8 +79,6 @@ class LookdevModule {
LookdevParameters parameters_;
ListBase gpu_materials_ = {nullptr, nullptr};
GPUMaterial *gpu_material_ = nullptr;
eGPUMaterialStatus gpu_status_ = GPU_MAT_CREATED;
public:
LookdevModule(Instance &inst) : inst_(inst){};

View File

@ -61,6 +61,24 @@ static inline bool geometry_type_has_surface(eMaterialGeometry geometry_type)
return geometry_type < MAT_GEOM_VOLUME;
}
enum eMaterialDisplacement {
MAT_DISPLACEMENT_BUMP = 0,
MAT_DISPLACEMENT_VERTEX_WITH_BUMP,
};
static inline eMaterialDisplacement to_displacement_type(int displacement_method)
{
switch (displacement_method) {
case MA_DISPLACEMENT_DISPLACE:
/* Currently unsupported. Revert to vertex displacement + bump. */
ATTR_FALLTHROUGH;
case MA_DISPLACEMENT_BOTH:
return MAT_DISPLACEMENT_VERTEX_WITH_BUMP;
default:
return MAT_DISPLACEMENT_BUMP;
}
}
enum eMaterialProbe {
MAT_PROBE_NONE = 0,
MAT_PROBE_REFLECTION,
@ -70,23 +88,30 @@ enum eMaterialProbe {
static inline void material_type_from_shader_uuid(uint64_t shader_uuid,
eMaterialPipeline &pipeline_type,
eMaterialGeometry &geometry_type,
eMaterialDisplacement &displacement_type,
bool &transparent_shadows)
{
const uint64_t geometry_mask = ((1u << 4u) - 1u);
const uint64_t pipeline_mask = ((1u << 4u) - 1u);
const uint64_t displacement_mask = ((1u << 2u) - 1u);
geometry_type = static_cast<eMaterialGeometry>(shader_uuid & geometry_mask);
pipeline_type = static_cast<eMaterialPipeline>((shader_uuid >> 4u) & pipeline_mask);
transparent_shadows = (shader_uuid >> 8u) & 1u;
displacement_type = static_cast<eMaterialDisplacement>((shader_uuid >> 8u) & displacement_mask);
transparent_shadows = (shader_uuid >> 10u) & 1u;
}
static inline uint64_t shader_uuid_from_material_type(eMaterialPipeline pipeline_type,
eMaterialGeometry geometry_type,
char blend_flags)
static inline uint64_t shader_uuid_from_material_type(
eMaterialPipeline pipeline_type,
eMaterialGeometry geometry_type,
eMaterialDisplacement displacement_type = MAT_DISPLACEMENT_BUMP,
char blend_flags = 0)
{
BLI_assert(displacement_type < (1 << 2));
BLI_assert(geometry_type < (1 << 4));
BLI_assert(pipeline_type < (1 << 4));
uchar transparent_shadows = blend_flags & MA_BL_TRANSPARENT_SHADOW ? 1 : 0;
return geometry_type | (pipeline_type << 4) | (transparent_shadows << 8);
uint64_t transparent_shadows = blend_flags & MA_BL_TRANSPARENT_SHADOW ? 1 : 0;
return geometry_type | (pipeline_type << 4) | (displacement_type << 8) |
(transparent_shadows << 10);
}
ENUM_OPERATORS(eClosureBits, CLOSURE_AMBIENT_OCCLUSION)
@ -147,18 +172,21 @@ struct MaterialKey {
MaterialKey(::Material *mat_, eMaterialGeometry geometry, eMaterialPipeline pipeline) : mat(mat_)
{
options = shader_uuid_from_material_type(pipeline, geometry, mat_->blend_flag);
options = shader_uuid_from_material_type(
pipeline, geometry, to_displacement_type(mat_->displacement_method), mat_->blend_flag);
}
uint64_t hash() const
{
BLI_assert(options < sizeof(*mat));
return uint64_t(mat) + options;
}
bool operator<(const MaterialKey &k) const
{
return (mat < k.mat) || (options < k.options);
if (mat == k.mat) {
return options < k.options;
}
return mat < k.mat;
}
bool operator==(const MaterialKey &k) const

View File

@ -371,7 +371,7 @@ PassMain::Sub *ForwardPipeline::material_transparent_add(const Object *ob,
::Material *blender_mat,
GPUMaterial *gpumat)
{
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM | DRW_STATE_DEPTH_LESS_EQUAL;
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_LESS_EQUAL;
if (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) {
state |= DRW_STATE_CULL_BACK;
}
@ -382,24 +382,14 @@ PassMain::Sub *ForwardPipeline::material_transparent_add(const Object *ob,
return pass;
}
void ForwardPipeline::render(View &view,
Framebuffer &prepass_fb,
Framebuffer &combined_fb,
GPUTexture * /*combined_tx*/)
void ForwardPipeline::render(View &view, Framebuffer &prepass_fb, Framebuffer &combined_fb)
{
DRW_stats_group_start("Forward.Opaque");
prepass_fb.bind();
inst_.manager->submit(prepass_ps_, view);
// if (!DRW_pass_is_empty(prepass_ps_)) {
inst_.hiz_buffer.set_dirty();
// }
// if (inst_.raytracing.enabled()) {
// rt_buffer.radiance_copy(combined_tx);
// inst_.hiz_buffer.update();
// }
inst_.shadows.set_view(view);
inst_.irradiance_cache.set_view(view);
@ -413,10 +403,6 @@ void ForwardPipeline::render(View &view,
combined_fb.bind();
inst_.manager->submit(transparent_ps_, view);
// if (inst_.raytracing.enabled()) {
// gbuffer.ray_radiance_tx.release();
// }
}
/** \} */
@ -483,8 +469,8 @@ void DeferredLayer::begin_sync()
inst_.cryptomatte.bind_resources(gbuffer_ps_);
}
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM | DRW_STATE_DEPTH_EQUAL |
DRW_STATE_WRITE_STENCIL | DRW_STATE_STENCIL_ALWAYS;
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL | DRW_STATE_WRITE_STENCIL |
DRW_STATE_STENCIL_ALWAYS;
gbuffer_double_sided_ps_ = &gbuffer_ps_.sub("DoubleSided");
gbuffer_double_sided_ps_->state_set(state);
@ -1032,8 +1018,8 @@ void DeferredProbeLayer::begin_sync()
inst_.cryptomatte.bind_resources(gbuffer_ps_);
}
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM | DRW_STATE_DEPTH_EQUAL |
DRW_STATE_WRITE_STENCIL | DRW_STATE_STENCIL_ALWAYS;
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL | DRW_STATE_WRITE_STENCIL |
DRW_STATE_STENCIL_ALWAYS;
gbuffer_double_sided_ps_ = &gbuffer_ps_.sub("DoubleSided");
gbuffer_double_sided_ps_->state_set(state);
@ -1191,7 +1177,7 @@ void PlanarProbePipeline::begin_sync()
gbuffer_ps_.bind_image(RBUFS_VALUE_SLOT, &inst_.render_buffers.rp_value_tx);
inst_.cryptomatte.bind_resources(gbuffer_ps_);
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM | DRW_STATE_DEPTH_EQUAL;
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL;
gbuffer_double_sided_ps_ = &gbuffer_ps_.sub("DoubleSided");
gbuffer_double_sided_ps_->state_set(state);

View File

@ -148,8 +148,6 @@ class ForwardPipeline {
PassSortable transparent_ps_ = {"Forward.Transparent"};
float3 camera_forward_;
// GPUTexture *input_screen_radiance_tx_ = nullptr;
public:
ForwardPipeline(Instance &inst) : inst_(inst){};
@ -165,10 +163,7 @@ class ForwardPipeline {
::Material *blender_mat,
GPUMaterial *gpumat);
void render(View &view,
Framebuffer &prepass_fb,
Framebuffer &combined_fb,
GPUTexture *combined_tx);
void render(View &view, Framebuffer &prepass_fb, Framebuffer &combined_fb);
};
/** \} */

View File

@ -60,7 +60,9 @@ void RayTraceModule::sync()
pass.shader_set(inst_.shaders.static_shader_get(RAY_TILE_CLASSIFY));
pass.bind_image("tile_mask_img", &tile_mask_tx_);
pass.bind_ssbo("ray_dispatch_buf", &ray_dispatch_buf_);
pass.bind_ssbo("denoise_dispatch_buf", &denoise_dispatch_buf_);
pass.bind_ssbo("ray_denoise_dispatch_buf", &ray_denoise_dispatch_buf_);
pass.bind_ssbo("horizon_dispatch_buf", &horizon_dispatch_buf_);
pass.bind_ssbo("horizon_denoise_dispatch_buf", &horizon_denoise_dispatch_buf_);
inst_.bind_uniform_data(&pass);
inst_.gbuffer.bind_resources(pass);
pass.dispatch(&tile_classify_dispatch_size_);
@ -72,9 +74,13 @@ void RayTraceModule::sync()
pass.shader_set(inst_.shaders.static_shader_get(RAY_TILE_COMPACT));
pass.bind_image("tile_mask_img", &tile_mask_tx_);
pass.bind_ssbo("ray_dispatch_buf", &ray_dispatch_buf_);
pass.bind_ssbo("denoise_dispatch_buf", &denoise_dispatch_buf_);
pass.bind_ssbo("ray_denoise_dispatch_buf", &ray_denoise_dispatch_buf_);
pass.bind_ssbo("ray_tiles_buf", &ray_tiles_buf_);
pass.bind_ssbo("denoise_tiles_buf", &denoise_tiles_buf_);
pass.bind_ssbo("ray_denoise_tiles_buf", &ray_denoise_tiles_buf_);
pass.bind_ssbo("horizon_dispatch_buf", &horizon_dispatch_buf_);
pass.bind_ssbo("horizon_denoise_dispatch_buf", &horizon_denoise_dispatch_buf_);
pass.bind_ssbo("horizon_tiles_buf", &horizon_tiles_buf_);
pass.bind_ssbo("horizon_denoise_tiles_buf", &horizon_denoise_tiles_buf_);
inst_.bind_uniform_data(&pass);
pass.dispatch(&tile_compact_dispatch_size_);
pass.barrier(GPU_BARRIER_SHADER_STORAGE);
@ -148,7 +154,7 @@ void RayTraceModule::sync()
PassSimple &pass = PASS_VARIATION(denoise_spatial_, type, _ps_);
pass.init();
pass.shader_set(inst_.shaders.static_shader_get(SHADER_VARIATION(RAY_DENOISE_SPATIAL_, type)));
pass.bind_ssbo("tiles_coord_buf", &denoise_tiles_buf_);
pass.bind_ssbo("tiles_coord_buf", &ray_denoise_tiles_buf_);
pass.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
pass.bind_texture("depth_tx", &depth_tx);
pass.bind_image("ray_data_img", &ray_data_tx_);
@ -161,7 +167,7 @@ void RayTraceModule::sync()
inst_.bind_uniform_data(&pass);
inst_.sampling.bind_resources(pass);
inst_.gbuffer.bind_resources(pass);
pass.dispatch(denoise_dispatch_buf_);
pass.dispatch(ray_denoise_dispatch_buf_);
pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
}
{
@ -178,9 +184,9 @@ void RayTraceModule::sync()
pass.bind_image("out_radiance_img", &denoised_temporal_tx_);
pass.bind_image("in_variance_img", &hit_variance_tx_);
pass.bind_image("out_variance_img", &denoise_variance_tx_);
pass.bind_ssbo("tiles_coord_buf", &denoise_tiles_buf_);
pass.bind_ssbo("tiles_coord_buf", &ray_denoise_tiles_buf_);
inst_.sampling.bind_resources(pass);
pass.dispatch(denoise_dispatch_buf_);
pass.dispatch(ray_denoise_dispatch_buf_);
pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
}
for (auto type : IndexRange(3)) {
@ -193,14 +199,63 @@ void RayTraceModule::sync()
pass.bind_image("out_radiance_img", &denoised_bilateral_tx_);
pass.bind_image("in_variance_img", &denoise_variance_tx_);
pass.bind_image("tile_mask_img", &tile_mask_tx_);
pass.bind_ssbo("tiles_coord_buf", &denoise_tiles_buf_);
pass.bind_ssbo("tiles_coord_buf", &ray_denoise_tiles_buf_);
inst_.bind_uniform_data(&pass);
inst_.sampling.bind_resources(pass);
inst_.gbuffer.bind_resources(pass);
pass.dispatch(denoise_dispatch_buf_);
pass.dispatch(ray_denoise_dispatch_buf_);
pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
}
{
PassSimple &pass = horizon_setup_ps_;
pass.init();
pass.shader_set(inst_.shaders.static_shader_get(HORIZON_SETUP));
inst_.bind_uniform_data(&pass);
pass.bind_texture("depth_tx", &depth_tx);
pass.bind_texture("in_radiance_tx", &screen_radiance_tx_, GPUSamplerState::default_sampler());
pass.bind_image("out_radiance_img", &downsampled_in_radiance_tx_);
pass.bind_image("out_normal_img", &downsampled_in_normal_tx_);
inst_.bind_uniform_data(&pass);
inst_.gbuffer.bind_resources(pass);
pass.dispatch(&tracing_dispatch_size_);
pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
}
for (auto type : IndexRange(3)) {
PassSimple &pass = PASS_VARIATION(horizon_scan_, type, _ps_);
pass.init();
pass.shader_set(inst_.shaders.static_shader_get(SHADER_VARIATION(HORIZON_SCAN_, type)));
pass.bind_image("horizon_radiance_img", &horizon_radiance_tx_);
pass.bind_image("horizon_occlusion_img", &horizon_occlusion_tx_);
pass.bind_ssbo("tiles_coord_buf", &horizon_tiles_buf_);
pass.bind_texture("screen_radiance_tx", &downsampled_in_radiance_tx_);
pass.bind_texture("screen_normal_tx", &downsampled_in_normal_tx_);
pass.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
inst_.bind_uniform_data(&pass);
inst_.hiz_buffer.bind_resources(pass);
inst_.sampling.bind_resources(pass);
inst_.gbuffer.bind_resources(pass);
pass.dispatch(horizon_dispatch_buf_);
pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
}
{
PassSimple &pass = horizon_denoise_ps_;
pass.init();
pass.shader_set(inst_.shaders.static_shader_get(HORIZON_DENOISE));
inst_.bind_uniform_data(&pass);
pass.bind_texture("depth_tx", &depth_tx);
pass.bind_image("horizon_radiance_img", &horizon_radiance_tx_);
pass.bind_image("horizon_occlusion_img", &horizon_occlusion_tx_);
pass.bind_image("radiance_img", &horizon_scan_output_tx_);
pass.bind_image("tile_mask_img", &tile_mask_tx_);
pass.bind_ssbo("tiles_coord_buf", &horizon_denoise_tiles_buf_);
inst_.bind_uniform_data(&pass);
inst_.sampling.bind_resources(pass);
inst_.gbuffer.bind_resources(pass);
inst_.irradiance_cache.bind_resources(pass);
inst_.reflection_probes.bind_resources(pass);
pass.dispatch(horizon_denoise_dispatch_buf_);
pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
}
#undef SHADER_VARIATION
#undef PASS_VARIATION
}
@ -236,6 +291,7 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
PassSimple *trace_ray_ps = nullptr;
PassSimple *denoise_spatial_ps = nullptr;
PassSimple *denoise_bilateral_ps = nullptr;
PassSimple *horizon_scan_ps = nullptr;
RayTraceBuffer::DenoiseBuffer *denoise_buf = nullptr;
if (raytrace_closure == CLOSURE_DIFFUSE) {
@ -245,6 +301,7 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
denoise_spatial_ps = &denoise_spatial_diffuse_ps_;
denoise_bilateral_ps = &denoise_bilateral_diffuse_ps_;
denoise_buf = &rt_buffer.diffuse;
horizon_scan_ps = &horizon_scan_diffuse_ps_;
}
else if (raytrace_closure == CLOSURE_REFLECTION) {
options = reflection_options_;
@ -253,6 +310,7 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
denoise_spatial_ps = &denoise_spatial_reflect_ps_;
denoise_bilateral_ps = &denoise_bilateral_reflect_ps_;
denoise_buf = &rt_buffer.reflection;
horizon_scan_ps = &horizon_scan_reflect_ps_;
}
else if (raytrace_closure == CLOSURE_REFRACTION) {
options = refraction_options_;
@ -261,6 +319,7 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
denoise_spatial_ps = &denoise_spatial_refract_ps_;
denoise_bilateral_ps = &denoise_bilateral_refract_ps_;
denoise_buf = &rt_buffer.refraction;
horizon_scan_ps = &horizon_scan_refract_ps_;
}
if ((active_closures & raytrace_closure) == 0) {
@ -278,6 +337,8 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
const int2 tracing_res = math::divide_ceil(extent, int2(resolution_scale));
const int2 dummy_extent(1, 1);
tracing_dispatch_size_ = int3(math::divide_ceil(tracing_res, int2(RAYTRACE_GROUP_SIZE)), 1);
tile_classify_dispatch_size_ = int3(math::divide_ceil(extent, int2(RAYTRACE_GROUP_SIZE)), 1);
const int denoise_tile_count = tile_classify_dispatch_size_.x * tile_classify_dispatch_size_.y;
const int2 tile_mask_extent = tile_classify_dispatch_size_.xy();
@ -289,20 +350,25 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
renderbuf_stencil_view_ = inst_.render_buffers.depth_tx.stencil_view();
renderbuf_depth_view_ = inst_.render_buffers.depth_tx;
bool use_denoise = (options.flag & RAYTRACE_EEVEE_USE_DENOISE);
bool use_spatial_denoise = (options.denoise_stages & RAYTRACE_EEVEE_DENOISE_SPATIAL) &&
use_denoise;
bool use_temporal_denoise = (options.denoise_stages & RAYTRACE_EEVEE_DENOISE_TEMPORAL) &&
use_spatial_denoise;
bool use_bilateral_denoise = (options.denoise_stages & RAYTRACE_EEVEE_DENOISE_BILATERAL) &&
use_temporal_denoise;
const bool use_denoise = (options.flag & RAYTRACE_EEVEE_USE_DENOISE);
const bool use_spatial_denoise = (options.denoise_stages & RAYTRACE_EEVEE_DENOISE_SPATIAL) &&
use_denoise;
const bool use_temporal_denoise = (options.denoise_stages & RAYTRACE_EEVEE_DENOISE_TEMPORAL) &&
use_spatial_denoise;
const bool use_bilateral_denoise = (options.denoise_stages & RAYTRACE_EEVEE_DENOISE_BILATERAL) &&
use_temporal_denoise;
const bool use_horizon_scan = true;
DRW_stats_group_start("Raytracing");
data_.thickness = options.screen_trace_thickness;
data_.quality = 1.0f - 0.95f * options.screen_trace_quality;
data_.brightness_clamp = (options.sample_clamp > 0.0) ? options.sample_clamp : 1e20;
data_.max_trace_roughness = 1.0f;
float roughness_mask_start = options.screen_trace_max_roughness;
float roughness_mask_fade = 0.2f;
data_.roughness_mask_scale = 1.0 / roughness_mask_fade;
data_.roughness_mask_bias = data_.roughness_mask_scale * roughness_mask_start;
data_.resolution_scale = resolution_scale;
data_.closure_active = raytrace_closure;
@ -315,8 +381,10 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
inst_.push_uniform_data();
tile_mask_tx_.acquire(tile_mask_extent, RAYTRACE_TILEMASK_FORMAT);
denoise_tiles_buf_.resize(ceil_to_multiple_u(denoise_tile_count, 512));
horizon_tiles_buf_.resize(ceil_to_multiple_u(ray_tile_count, 512));
horizon_denoise_tiles_buf_.resize(ceil_to_multiple_u(denoise_tile_count, 512));
ray_tiles_buf_.resize(ceil_to_multiple_u(ray_tile_count, 512));
ray_denoise_tiles_buf_.resize(ceil_to_multiple_u(denoise_tile_count, 512));
/* Ray setup. */
inst_.manager->submit(tile_classify_ps_);
@ -372,8 +440,6 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
inst_.manager->submit(denoise_temporal_ps_, render_view);
/* Swap after last use. */
TextureFromPool::swap(tile_mask_tx_, denoise_buf->tilemask_history_tx);
/* Save view-projection matrix for next reprojection. */
denoise_buf->history_persmat = main_view.persmat();
/* Radiance will be swapped with history in #RayTraceResult::release().
@ -393,13 +459,10 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
if (use_bilateral_denoise) {
denoise_buf->denoised_bilateral_tx.acquire(extent, RAYTRACE_RADIANCE_FORMAT);
denoised_bilateral_tx_ = denoise_buf->denoised_bilateral_tx;
/* Swap back for one last use. */
TextureFromPool::swap(tile_mask_tx_, denoise_buf->tilemask_history_tx);
inst_.manager->submit(*denoise_bilateral_ps, render_view);
/* Swap after last use. */
TextureFromPool::swap(tile_mask_tx_, denoise_buf->tilemask_history_tx);
TextureFromPool::swap(denoise_buf->denoised_temporal_tx, denoise_buf->radiance_history_tx);
TextureFromPool::swap(denoise_variance_tx_, denoise_buf->variance_history_tx);
@ -408,9 +471,37 @@ RayTraceResult RayTraceModule::trace(RayTraceBuffer &rt_buffer,
denoise_buf->denoised_temporal_tx.release();
}
tile_mask_tx_.release();
denoise_variance_tx_.release();
if (use_horizon_scan) {
downsampled_in_radiance_tx_.acquire(tracing_res, RAYTRACE_RADIANCE_FORMAT);
downsampled_in_normal_tx_.acquire(tracing_res, GPU_RGBA8);
inst_.manager->submit(horizon_setup_ps_, render_view);
horizon_occlusion_tx_.acquire(tracing_res, GPU_R8);
horizon_radiance_tx_.acquire(tracing_res, RAYTRACE_RADIANCE_FORMAT);
inst_.manager->submit(*horizon_scan_ps, render_view);
downsampled_in_radiance_tx_.release();
downsampled_in_normal_tx_.release();
horizon_scan_output_tx_ = result.get();
inst_.manager->submit(horizon_denoise_ps_, render_view);
horizon_occlusion_tx_.release();
horizon_radiance_tx_.release();
}
tile_mask_tx_.release();
if (tile_mask_tx_.is_valid()) {
/* Swap after last use. */
TextureFromPool::swap(tile_mask_tx_, denoise_buf->tilemask_history_tx);
}
DRW_stats_group_end();
return result;

View File

@ -117,26 +117,49 @@ class RayTraceModule {
draw::PassSimple denoise_bilateral_diffuse_ps_ = {"DenoiseBilateral.Diffuse"};
draw::PassSimple denoise_bilateral_reflect_ps_ = {"DenoiseBilateral.Reflection"};
draw::PassSimple denoise_bilateral_refract_ps_ = {"DenoiseBilateral.Refraction"};
draw::PassSimple horizon_setup_ps_ = {"HorizonScan.Setup"};
draw::PassSimple horizon_scan_diffuse_ps_ = {"HorizonScan.Diffuse"};
draw::PassSimple horizon_scan_reflect_ps_ = {"HorizonScan.Reflection"};
draw::PassSimple horizon_scan_refract_ps_ = {"HorizonScan.Refraction"};
draw::PassSimple horizon_denoise_ps_ = {"HorizonScan.Denoise"};
/** Dispatch with enough tiles for the whole screen. */
int3 tile_classify_dispatch_size_ = int3(1);
/** Dispatch with enough tiles for the tile mask. */
int3 tile_compact_dispatch_size_ = int3(1);
/** Dispatch with enough tiles for the tracing resolution. */
int3 tracing_dispatch_size_ = int3(1);
/** 2D tile mask to check which unused adjacent tile we need to clear. */
TextureFromPool tile_mask_tx_ = {"tile_mask_tx"};
/** Indirect dispatch rays. Avoid dispatching work-groups that will not trace anything.*/
DispatchIndirectBuf ray_dispatch_buf_ = {"ray_dispatch_buf_"};
/** Indirect dispatch denoise full-resolution tiles. */
DispatchIndirectBuf denoise_dispatch_buf_ = {"denoise_dispatch_buf_"};
DispatchIndirectBuf ray_denoise_dispatch_buf_ = {"ray_denoise_dispatch_buf_"};
/** Indirect dispatch horizon scan. Avoid dispatching work-groups that will not scan anything.*/
DispatchIndirectBuf horizon_dispatch_buf_ = {"horizon_dispatch_buf_"};
/** Indirect dispatch denoise full-resolution tiles. */
DispatchIndirectBuf horizon_denoise_dispatch_buf_ = {"horizon_denoise_dispatch_buf_"};
/** Pointer to the texture to store the result of horizon scan in. */
GPUTexture *horizon_scan_output_tx_ = nullptr;
/** Tile buffer that contains tile coordinates. */
RayTraceTileBuf ray_tiles_buf_ = {"ray_tiles_buf_"};
RayTraceTileBuf denoise_tiles_buf_ = {"denoise_tiles_buf_"};
RayTraceTileBuf ray_denoise_tiles_buf_ = {"ray_denoise_tiles_buf_"};
RayTraceTileBuf horizon_tiles_buf_ = {"horizon_tiles_buf_"};
RayTraceTileBuf horizon_denoise_tiles_buf_ = {"horizon_denoise_tiles_buf_"};
/** Texture containing the ray direction and PDF. */
TextureFromPool ray_data_tx_ = {"ray_data_tx"};
/** Texture containing the ray hit time. */
TextureFromPool ray_time_tx_ = {"ray_data_tx"};
/** Texture containing the ray hit radiance (tracing-res). */
TextureFromPool ray_radiance_tx_ = {"ray_radiance_tx"};
/** Texture containing the horizon visibility mask. */
TextureFromPool horizon_occlusion_tx_ = {"horizon_occlusion_tx_"};
/** Texture containing the horizon local radiance. */
TextureFromPool horizon_radiance_tx_ = {"horizon_radiance_tx_"};
/** Texture containing the input screen radiance but re-projected. */
TextureFromPool downsampled_in_radiance_tx_ = {"downsampled_in_radiance_tx_"};
/** Texture containing the view space normal. The BSDF normal is arbitrarily chosen. */
TextureFromPool downsampled_in_normal_tx_ = {"downsampled_in_normal_tx_"};
/** Textures containing the ray hit radiance denoised (full-res). One of them is result_tx. */
GPUTexture *denoised_spatial_tx_ = nullptr;
GPUTexture *denoised_temporal_tx_ = nullptr;

View File

@ -50,6 +50,7 @@ void RenderBuffers::sync()
data.environment_id = pass_index_get(EEVEE_RENDER_PASS_ENVIRONMENT);
data.shadow_id = pass_index_get(EEVEE_RENDER_PASS_SHADOW);
data.ambient_occlusion_id = pass_index_get(EEVEE_RENDER_PASS_AO);
data.transparent_id = pass_index_get(EEVEE_RENDER_PASS_TRANSPARENT);
data.aovs = inst_.film.aovs_info;
}

View File

@ -106,6 +106,16 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_
return "eevee_hiz_update";
case HIZ_UPDATE_LAYER:
return "eevee_hiz_update_layer";
case HORIZON_DENOISE:
return "eevee_horizon_denoise";
case HORIZON_SCAN_DIFFUSE:
return "eevee_horizon_scan_diffuse";
case HORIZON_SCAN_REFLECT:
return "eevee_horizon_scan_reflect";
case HORIZON_SCAN_REFRACT:
return "eevee_horizon_scan_refract";
case HORIZON_SETUP:
return "eevee_horizon_setup";
case MOTION_BLUR_GATHER:
return "eevee_motion_blur_gather";
case MOTION_BLUR_TILE_DILATE:
@ -311,8 +321,10 @@ void ShaderModule::material_create_info_ammend(GPUMaterial *gpumat, GPUCodegenOu
eMaterialPipeline pipeline_type;
eMaterialGeometry geometry_type;
eMaterialDisplacement displacement_type;
bool transparent_shadows;
material_type_from_shader_uuid(shader_uuid, pipeline_type, geometry_type, transparent_shadows);
material_type_from_shader_uuid(
shader_uuid, pipeline_type, geometry_type, displacement_type, transparent_shadows);
GPUCodegenOutput &codegen = *codegen_;
ShaderCreateInfo &info = *reinterpret_cast<ShaderCreateInfo *>(codegen.create_info);
@ -491,17 +503,18 @@ void ShaderModule::material_create_info_ammend(GPUMaterial *gpumat, GPUCodegenOu
}
if (!is_compute) {
if (!ELEM(geometry_type,
MAT_GEOM_WORLD,
MAT_GEOM_VOLUME_WORLD,
MAT_GEOM_VOLUME_OBJECT,
MAT_GEOM_VOLUME))
{
vert_gen << "vec3 nodetree_displacement()\n";
vert_gen << "{\n";
vert_gen << ((codegen.displacement) ? codegen.displacement : "return vec3(0);\n");
vert_gen << "}\n\n";
}
const bool use_vertex_displacement = (codegen.displacement != nullptr) &&
(displacement_type != MAT_DISPLACEMENT_BUMP) &&
(!ELEM(geometry_type,
MAT_GEOM_WORLD,
MAT_GEOM_VOLUME_WORLD,
MAT_GEOM_VOLUME_OBJECT,
MAT_GEOM_VOLUME));
vert_gen << "vec3 nodetree_displacement()\n";
vert_gen << "{\n";
vert_gen << ((use_vertex_displacement) ? codegen.displacement : "return vec3(0);\n");
vert_gen << "}\n\n";
info.vertex_source_generated = vert_gen.str();
}
@ -644,8 +657,10 @@ GPUMaterial *ShaderModule::material_shader_get(::Material *blender_mat,
{
bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_MATERIAL, MAT_PIPE_VOLUME_OCCUPANCY);
eMaterialDisplacement displacement_type = to_displacement_type(blender_mat->displacement_method);
uint64_t shader_uuid = shader_uuid_from_material_type(
pipeline_type, geometry_type, blender_mat->blend_flag);
pipeline_type, geometry_type, displacement_type, blender_mat->blend_flag);
return DRW_shader_from_material(
blender_mat, nodetree, shader_uuid, is_volume, deferred_compilation, codegen_callback, this);
@ -660,7 +675,7 @@ GPUMaterial *ShaderModule::world_shader_get(::World *blender_world,
eMaterialGeometry geometry_type = is_volume ? MAT_GEOM_VOLUME_WORLD : MAT_GEOM_WORLD;
uint64_t shader_uuid = shader_uuid_from_material_type(pipeline_type, geometry_type, 0);
uint64_t shader_uuid = shader_uuid_from_material_type(pipeline_type, geometry_type);
return DRW_shader_from_world(
blender_world, nodetree, shader_uuid, is_volume, defer_compilation, codegen_callback, this);
@ -672,10 +687,9 @@ GPUMaterial *ShaderModule::material_shader_get(const char *name,
ListBase &materials,
bNodeTree *nodetree,
eMaterialPipeline pipeline_type,
eMaterialGeometry geometry_type,
bool is_lookdev)
eMaterialGeometry geometry_type)
{
uint64_t shader_uuid = shader_uuid_from_material_type(pipeline_type, geometry_type, 0);
uint64_t shader_uuid = shader_uuid_from_material_type(pipeline_type, geometry_type);
bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_MATERIAL, MAT_PIPE_VOLUME_OCCUPANCY);
@ -686,10 +700,10 @@ GPUMaterial *ShaderModule::material_shader_get(const char *name,
name,
shader_uuid,
is_volume,
is_lookdev,
false,
codegen_callback,
this);
GPU_material_status_set(gpumat, GPU_MAT_QUEUED);
GPU_material_status_set(gpumat, GPU_MAT_CREATED);
GPU_material_compile(gpumat);
/* Queue deferred material optimization. */
DRW_shader_queue_optimize_material(gpumat);

View File

@ -66,6 +66,12 @@ enum eShaderType {
HIZ_UPDATE_LAYER,
HIZ_DEBUG,
HORIZON_DENOISE,
HORIZON_SCAN_DIFFUSE,
HORIZON_SCAN_REFLECT,
HORIZON_SCAN_REFRACT,
HORIZON_SETUP,
LIGHT_CULLING_DEBUG,
LIGHT_CULLING_SELECT,
LIGHT_CULLING_SORT,
@ -167,8 +173,7 @@ class ShaderModule {
ListBase &materials,
bNodeTree *nodetree,
eMaterialPipeline pipeline_type,
eMaterialGeometry geometry_type,
bool is_lookdev);
eMaterialGeometry geometry_type);
void material_create_info_ammend(GPUMaterial *mat, GPUCodegenOutput *codegen);

View File

@ -268,9 +268,9 @@ struct FilmData {
/** Is true if accumulation of filtered passes is needed. */
bool1 any_render_pass_1;
bool1 any_render_pass_2;
bool1 any_render_pass_3;
/** Controlled by user in lookdev mode or by render settings. */
float background_opacity;
float _pad0, _pad1;
/** Output counts per type. */
int color_len, value_len;
/** Index in color_accum_img or value_accum_img of each pass. -1 if pass is not enabled. */
@ -287,6 +287,7 @@ struct FilmData {
int environment_id;
int shadow_id;
int ambient_occlusion_id;
int transparent_id;
/** Not indexed but still not -1 if enabled. */
int depth_id;
int combined_id;
@ -376,11 +377,12 @@ struct RenderBuffersInfoData {
int volume_light_id;
int emission_id;
int environment_id;
int transparent_id;
/* Value */
int value_len;
int shadow_id;
int ambient_occlusion_id;
int _pad0, _pad1, _pad2;
int _pad0, _pad1;
};
BLI_STATIC_ASSERT_ALIGN(RenderBuffersInfoData, 16)
@ -1200,14 +1202,14 @@ struct RayTraceData {
/** Maximum brightness during lighting evaluation. */
float brightness_clamp;
/** Maximum roughness for which we will trace a ray. */
float max_trace_roughness;
float roughness_mask_scale;
float roughness_mask_bias;
/** If set to true will bypass spatial denoising. */
bool1 skip_denoise;
/** Closure being ray-traced. */
eClosureBits closure_active;
int _pad0;
int _pad1;
int _pad2;
};
BLI_STATIC_ASSERT_ALIGN(RayTraceData, 16)

View File

@ -28,11 +28,7 @@ namespace blender::eevee {
/** \name ShadingView
* \{ */
void ShadingView::init()
{
// dof_.init();
// mb_.init();
}
void ShadingView::init() {}
void ShadingView::sync()
{
@ -59,31 +55,21 @@ void ShadingView::sync()
const CameraData &cam = inst_.camera.data_get();
float4x4 viewmat, winmat;
const float(*viewmat_p)[4] = viewmat.ptr(), (*winmat_p)[4] = winmat.ptr();
if (false /* inst_.camera.is_panoramic() */) {
/* TODO(@fclem) Over-scans. */
/* For now a mandatory 5% over-scan for DoF. */
float side = cam.clip_near * 1.05f;
float near = cam.clip_near;
float far = cam.clip_far;
perspective_m4(winmat.ptr(), -side, side, -side, side, near, far);
winmat = math::projection::perspective(-side, side, -side, side, near, far);
viewmat = face_matrix_ * cam.viewmat;
}
else {
viewmat_p = cam.viewmat.ptr();
winmat_p = cam.winmat.ptr();
viewmat = cam.viewmat;
winmat = cam.winmat;
}
main_view_ = DRW_view_create(viewmat_p, winmat_p, nullptr, nullptr, nullptr);
sub_view_ = DRW_view_create_sub(main_view_, viewmat_p, winmat_p);
render_view_ = DRW_view_create_sub(main_view_, viewmat_p, winmat_p);
// dof_.sync(winmat_p, extent_);
// rt_buffer_opaque_.sync(extent_);
// rt_buffer_refract_.sync(extent_);
// inst_.hiz_back.view_sync(extent_);
// inst_.hiz_front.view_sync(extent_);
// inst_.gbuffer.view_sync(extent_);
main_view_.sync(viewmat, winmat);
}
void ShadingView::render()
@ -103,9 +89,8 @@ void ShadingView::render()
update_view();
DRW_stats_group_start(name_);
DRW_view_set_active(render_view_);
inst_.planar_probes.set_view(render_view_new_, extent_);
inst_.planar_probes.set_view(render_view_, extent_);
/* If camera has any motion, compute motion vector in the film pass. Otherwise, we avoid float
* precision issue by setting the motion of all static geometry to 0. */
@ -121,42 +106,42 @@ void ShadingView::render()
inst_.hiz_buffer.set_source(&inst_.render_buffers.depth_tx);
inst_.hiz_buffer.set_dirty();
inst_.pipelines.background.render(render_view_new_);
inst_.pipelines.background.render(render_view_);
/* TODO(fclem): Move it after the first prepass (and hiz update) once pipeline is stabilized. */
inst_.lights.set_view(render_view_new_, extent_);
inst_.reflection_probes.set_view(render_view_new_);
inst_.lights.set_view(render_view_, extent_);
inst_.reflection_probes.set_view(render_view_);
inst_.volume.draw_prepass(render_view_new_);
inst_.volume.draw_prepass(render_view_);
/* TODO: cleanup. */
View main_view_new("MainView", main_view_);
/* TODO(Miguel Pozo): Deferred and forward prepass should happen before the GBuffer pass. */
inst_.pipelines.deferred.render(main_view_new,
render_view_new_,
inst_.pipelines.deferred.render(main_view_,
render_view_,
prepass_fb_,
combined_fb_,
extent_,
rt_buffer_opaque_,
rt_buffer_refract_);
inst_.volume.draw_compute(render_view_new_);
inst_.volume.draw_compute(render_view_);
// inst_.lookdev.render_overlay(view_fb_);
inst_.pipelines.forward.render(render_view_new_, prepass_fb_, combined_fb_, rbufs.combined_tx);
inst_.pipelines.forward.render(render_view_, prepass_fb_, combined_fb_);
inst_.lights.debug_draw(render_view_new_, combined_fb_);
inst_.hiz_buffer.debug_draw(render_view_new_, combined_fb_);
inst_.shadows.debug_draw(render_view_new_, combined_fb_);
inst_.irradiance_cache.viewport_draw(render_view_new_, combined_fb_);
inst_.reflection_probes.viewport_draw(render_view_new_, combined_fb_);
inst_.planar_probes.viewport_draw(render_view_new_, combined_fb_);
render_transparent_pass(rbufs);
inst_.ambient_occlusion.render_pass(render_view_new_);
inst_.lights.debug_draw(render_view_, combined_fb_);
inst_.hiz_buffer.debug_draw(render_view_, combined_fb_);
inst_.shadows.debug_draw(render_view_, combined_fb_);
inst_.irradiance_cache.viewport_draw(render_view_, combined_fb_);
inst_.reflection_probes.viewport_draw(render_view_, combined_fb_);
inst_.planar_probes.viewport_draw(render_view_, combined_fb_);
inst_.ambient_occlusion.render_pass(render_view_);
GPUTexture *combined_final_tx = render_postfx(rbufs.combined_tx);
inst_.film.accumulate(sub_view_, combined_final_tx);
inst_.film.accumulate(jitter_view_, combined_final_tx);
rbufs.release();
postfx_tx_.release();
@ -164,6 +149,20 @@ void ShadingView::render()
DRW_stats_group_end();
}
void ShadingView::render_transparent_pass(RenderBuffers &rbufs)
{
if (rbufs.data.transparent_id != -1) {
transparent_fb_.ensure(
GPU_ATTACHMENT_TEXTURE(rbufs.depth_tx),
GPU_ATTACHMENT_TEXTURE_LAYER(rbufs.rp_color_tx, rbufs.data.transparent_id));
/* Alpha stores transmittance. So start at 1. */
float4 clear_color = {0.0f, 0.0f, 0.0f, 1.0f};
GPU_framebuffer_bind(transparent_fb_);
GPU_framebuffer_clear_color(transparent_fb_, clear_color);
inst_.pipelines.forward.render(render_view_, prepass_fb_, transparent_fb_);
}
}
GPUTexture *ShadingView::render_postfx(GPUTexture *input_tx)
{
if (!inst_.depth_of_field.postfx_enabled() && !inst_.motion_blur.postfx_enabled()) {
@ -174,17 +173,16 @@ GPUTexture *ShadingView::render_postfx(GPUTexture *input_tx)
GPUTexture *output_tx = postfx_tx_;
/* Swapping is done internally. Actual output is set to the next input. */
inst_.depth_of_field.render(render_view_new_, &input_tx, &output_tx, dof_buffer_);
inst_.motion_blur.render(render_view_new_, &input_tx, &output_tx);
inst_.depth_of_field.render(render_view_, &input_tx, &output_tx, dof_buffer_);
inst_.motion_blur.render(render_view_, &input_tx, &output_tx);
return input_tx;
}
void ShadingView::update_view()
{
float4x4 viewmat, winmat;
DRW_view_viewmat_get(main_view_, viewmat.ptr(), false);
DRW_view_winmat_get(main_view_, winmat.ptr(), false);
float4x4 viewmat = main_view_.viewmat();
float4x4 winmat = main_view_.winmat();
/* TODO(fclem): Mixed-resolution rendering: We need to make sure we render with exactly the same
* distances between pixels to line up render samples and target pixels.
@ -197,14 +195,12 @@ void ShadingView::update_view()
jitter *= 2.0f;
window_translate_m4(winmat.ptr(), winmat.ptr(), UNPACK2(jitter));
DRW_view_update_sub(sub_view_, viewmat.ptr(), winmat.ptr());
jitter_view_.sync(winmat, winmat);
/* FIXME(fclem): The offset may be noticeably large and the culling might make object pop
* out of the blurring radius. To fix this, use custom enlarged culling matrix. */
inst_.depth_of_field.jitter_apply(winmat, viewmat);
DRW_view_update_sub(render_view_, viewmat.ptr(), winmat.ptr());
render_view_new_.sync(viewmat, winmat);
render_view_.sync(viewmat, winmat);
}
/** \} */

View File

@ -48,16 +48,16 @@ class ShadingView {
Framebuffer prepass_fb_;
Framebuffer combined_fb_;
Framebuffer transparent_fb_ = {"transparent"};
TextureFromPool postfx_tx_;
/** Main views is created from the camera (or is from the viewport). It is not jittered. */
DRWView *main_view_ = nullptr;
View main_view_ = {"main_view"};
/** Sub views is jittered versions or the main views. This allows jitter updates without trashing
* the visibility culling cache. */
DRWView *sub_view_ = nullptr;
/** Same as sub_view_ but has Depth Of Field jitter applied. */
DRWView *render_view_ = nullptr;
View render_view_new_;
View jitter_view_ = {"jitter_view"};
/** Same as jitter_view_ but has Depth Of Field jitter applied. */
View render_view_;
/** Render size of the view. Can change between scene sample eval. */
int2 extent_ = {-1, -1};
@ -66,7 +66,7 @@ class ShadingView {
public:
ShadingView(Instance &inst, const char *name, const float4x4 &face_matrix)
: inst_(inst), name_(name), face_matrix_(face_matrix), render_view_new_(name){};
: inst_(inst), name_(name), face_matrix_(face_matrix), render_view_(name){};
~ShadingView(){};
@ -76,9 +76,11 @@ class ShadingView {
void render();
private:
void render_transparent_pass(RenderBuffers &rbufs);
GPUTexture *render_postfx(GPUTexture *input_tx);
private:
void update_view();
};

View File

@ -29,21 +29,24 @@ void main()
vec2 noise;
noise.x = interlieved_gradient_noise(vec2(texel), 3.0, 0.0);
noise.y = utility_tx_fetch(utility_tx, texel, UTIL_BLUE_NOISE_LAYER).r;
noise.y = utility_tx_fetch(utility_tx, vec2(texel), UTIL_BLUE_NOISE_LAYER).r;
noise = fract(noise + sampling_rng_2D_get(SAMPLING_AO_U));
vec3 ambient_occlusion = horizon_scan_eval(vP,
vN,
hiz_tx,
noise,
uniform_buf.ao.pixel_size,
uniform_buf.ao.distance,
uniform_buf.ao.thickness,
uniform_buf.ao.angle_bias,
10);
ClosureOcclusion occlusion;
occlusion.N = vN;
/* We can have some float imprecision because of the weighted accumulation. */
ambient_occlusion = saturate(ambient_occlusion * 1.02);
HorizonScanContext ctx;
ctx.occlusion = occlusion;
imageStore(out_ao_img, ivec3(texel, out_ao_img_layer_index), saturate(ambient_occlusion.rrrr));
horizon_scan_eval(vP,
ctx,
noise,
uniform_buf.ao.pixel_size,
uniform_buf.ao.distance,
uniform_buf.ao.thickness,
uniform_buf.ao.angle_bias,
10);
imageStore(
out_ao_img, ivec3(texel, out_ao_img_layer_index), vec4(saturate(ctx.occlusion_result.r)));
}

View File

@ -759,6 +759,23 @@ void film_process_data(ivec2 texel_film, out vec4 out_color, out float out_depth
film_store_value(dst, uniform_buf.film.mist_id, mist_accum, out_color);
}
if (uniform_buf.film.any_render_pass_3) {
vec4 transparent_accum = vec4(0.0);
for (int i = 0; i < uniform_buf.film.samples_len; i++) {
FilmSample src = film_sample_get(i, texel_film);
film_sample_accum(src,
uniform_buf.film.transparent_id,
uniform_buf.render_pass.transparent_id,
rp_color_tx,
transparent_accum);
}
/* Alpha stores transmittance for transparent pass. */
transparent_accum.a = weight_accum - transparent_accum.a;
film_store_color(dst, uniform_buf.film.transparent_id, transparent_accum, out_color);
}
for (int aov = 0; aov < uniform_buf.film.aov_color_len; aov++) {
vec4 aov_accum = vec4(0.0);

View File

@ -0,0 +1,188 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma BLENDER_REQUIRE(draw_view_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_utildefines_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_math_vector_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_gbuffer_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_lightprobe_eval_lib.glsl)
float bilateral_depth_weight(vec3 center_N, vec3 center_P, vec3 sample_P)
{
vec4 center_plane_eq = vec4(center_N, -dot(center_N, center_P));
/* Only compare distance to the center plane formed by the normal. */
float depth_delta = dot(center_plane_eq, vec4(sample_P, 1.0));
/* TODO(fclem): Scene parameter. This is dependent on scene scale. */
const float scale = 10000.0;
float weight = exp2(-scale * square(depth_delta));
return weight;
}
float bilateral_spatial_weight(float sigma, vec2 offset_from_center)
{
/* From https://github.com/tranvansang/bilateral-filter/blob/master/fshader.frag */
float fac = -1.0 / square(sigma);
/* Take two standard deviation. */
fac *= 2.0;
float weight = exp2(fac * length_squared(offset_from_center));
return weight;
}
float bilateral_normal_weight(vec3 center_N, vec3 sample_N)
{
float facing_ratio = dot(center_N, sample_N);
float weight = saturate(pow8f(facing_ratio));
return weight;
}
/* In order to remove some more fireflies, "tone-map" the color samples during the accumulation. */
vec3 to_accumulation_space(vec3 color)
{
/* This 4 factor is to avoid killing too much energy. */
/* TODO(fclem): Parameter? */
color /= 4.0;
color = color / (1.0 + reduce_add(color));
return color;
}
vec3 from_accumulation_space(vec3 color)
{
color = color / (1.0 - reduce_add(color));
color *= 4.0;
return color;
}
vec3 load_normal(ivec2 texel)
{
GBufferData gbuf = gbuffer_read(gbuf_header_tx, gbuf_closure_tx, gbuf_color_tx, texel);
/* TODO(fclem): Load preprocessed Normal. */
vec3 N = vec3(0.0);
if (gbuf.has_diffuse) {
N = gbuf.diffuse.N;
}
if (gbuf.has_reflection) {
N = gbuf.reflection.N;
}
if (gbuf.has_refraction) {
N = gbuf.refraction.N;
}
return N;
}
void main()
{
const uint tile_size = RAYTRACE_GROUP_SIZE;
uvec2 tile_coord = unpackUvec2x16(tiles_coord_buf[gl_WorkGroupID.x]);
ivec2 texel_fullres = ivec2(gl_LocalInvocationID.xy + tile_coord * tile_size);
ivec2 texel = (texel_fullres) / uniform_buf.raytrace.resolution_scale;
ivec2 extent = textureSize(gbuf_header_tx, 0).xy;
if (any(greaterThanEqual(texel_fullres, extent))) {
return;
}
vec2 center_uv = (vec2(texel_fullres) + 0.5) * uniform_buf.raytrace.full_resolution_inv;
float center_depth = texelFetch(depth_tx, texel_fullres, 0).r;
vec3 center_P = drw_point_screen_to_world(vec3(center_uv, center_depth));
if (center_depth == 1.0) {
/* Do not trace for background */
return;
}
GBufferData gbuf = gbuffer_read(gbuf_header_tx, gbuf_closure_tx, gbuf_color_tx, texel_fullres);
uint closure_bits = texelFetch(gbuf_header_tx, texel_fullres, 0).r;
if (!flag_test(closure_bits, uniform_buf.raytrace.closure_active)) {
return;
}
vec3 center_N = gbuf.diffuse.N;
float roughness = 1.0;
if (uniform_buf.raytrace.closure_active == eClosureBits(CLOSURE_REFLECTION)) {
roughness = gbuf.reflection.roughness;
center_N = gbuf.reflection.N;
}
if (uniform_buf.raytrace.closure_active == eClosureBits(CLOSURE_REFRACTION)) {
roughness = 1.0; /* TODO(fclem): Apparent roughness. */
center_N = gbuf.refraction.N;
}
float mix_fac = saturate(roughness * uniform_buf.raytrace.roughness_mask_scale -
uniform_buf.raytrace.roughness_mask_bias);
bool use_raytrace = mix_fac < 1.0;
bool use_horizon = mix_fac > 0.0;
if (use_horizon == false) {
return;
}
vec3 accum_radiance = vec3(0.0);
float accum_occlusion = 0.0;
float accum_weight = 0.0;
for (int x = -1; x <= 1; x++) {
for (int y = -1; y <= 1; y++) {
ivec2 offset = ivec2(x, y);
ivec2 sample_texel = texel + ivec2(x, y);
ivec2 sample_texel_fullres = sample_texel * uniform_buf.raytrace.resolution_scale +
uniform_buf.raytrace.resolution_bias;
ivec2 sample_tile = sample_texel_fullres / RAYTRACE_GROUP_SIZE;
/* Make sure the sample has been processed and do not contain garbage data. */
uint tile_mask = imageLoad(tile_mask_img, sample_tile).r;
bool unprocessed_tile = !flag_test(tile_mask, 1u << 1u);
if (unprocessed_tile) {
continue;
}
float sample_depth = texelFetch(depth_tx, sample_texel_fullres, 0).r;
vec2 sample_uv = (vec2(sample_texel_fullres) + 0.5) *
uniform_buf.raytrace.full_resolution_inv;
vec3 sample_P = drw_point_screen_to_world(vec3(sample_uv, sample_depth));
/* Background case. */
if (sample_depth == 0.0) {
continue;
}
vec3 sample_N = load_normal(sample_texel_fullres);
float depth_weight = bilateral_depth_weight(center_N, center_P, sample_P);
float spatial_weight = bilateral_spatial_weight(1.5, vec2(offset));
float normal_weight = bilateral_normal_weight(center_N, sample_N);
float weight = depth_weight * spatial_weight * normal_weight;
vec3 radiance = imageLoad(horizon_radiance_img, sample_texel).rgb;
/* Do not gather unprocessed pixels. */
if (all(equal(radiance, FLT_11_11_10_MAX))) {
continue;
}
float occlusion = imageLoad(horizon_occlusion_img, sample_texel).r;
accum_radiance += to_accumulation_space(radiance) * weight;
accum_occlusion += occlusion * weight;
accum_weight += weight;
}
}
float occlusion = accum_occlusion * safe_rcp(accum_weight);
vec3 radiance = from_accumulation_space(accum_radiance * safe_rcp(accum_weight));
vec3 P = center_P;
vec3 N = center_N;
vec3 Ng = center_N;
vec3 V = drw_world_incident_vector(P);
/* Fallback to nearest light-probe. */
LightProbeSample samp = lightprobe_load(P, Ng, V);
vec3 radiance_probe = spherical_harmonics_evaluate_lambert(N, samp.volume_irradiance);
/* Apply missing distant lighting. */
radiance += occlusion * radiance_probe;
vec4 radiance_horizon = vec4(radiance, 0.0);
vec4 radiance_raytrace = use_raytrace ? imageLoad(radiance_img, texel_fullres) : vec4(0.0);
vec4 radiance_mixed = mix(radiance_raytrace, radiance_horizon, mix_fac);
imageStore(radiance_img, texel_fullres, radiance_mixed);
}

View File

@ -0,0 +1,98 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma BLENDER_REQUIRE(draw_view_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_utildefines_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_math_vector_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_gbuffer_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_horizon_scan_eval_lib.glsl)
void main()
{
const uint tile_size = RAYTRACE_GROUP_SIZE;
uvec2 tile_coord = unpackUvec2x16(tiles_coord_buf[gl_WorkGroupID.x]);
ivec2 texel = ivec2(gl_LocalInvocationID.xy + tile_coord * tile_size);
ivec2 texel_fullres = texel * uniform_buf.raytrace.resolution_scale +
uniform_buf.raytrace.resolution_bias;
ivec2 extent = textureSize(gbuf_header_tx, 0).xy;
if (any(greaterThanEqual(texel_fullres, extent))) {
return;
}
vec2 uv = (vec2(texel_fullres) + 0.5) * uniform_buf.raytrace.full_resolution_inv;
float depth = texelFetch(hiz_tx, texel_fullres, 0).r;
if (depth == 1.0) {
/* Do not trace for background */
imageStore(horizon_radiance_img, texel, vec4(FLT_11_11_10_MAX, 0.0));
return;
}
GBufferData gbuf = gbuffer_read(gbuf_header_tx, gbuf_closure_tx, gbuf_color_tx, texel_fullres);
HorizonScanContext ctx;
#ifdef HORIZON_DIFFUSE
if (gbuf.has_diffuse == false) {
imageStore(horizon_radiance_img, texel, vec4(0.0));
return;
}
vec3 Ng = gbuf.diffuse.N;
ctx.diffuse = gbuf.diffuse;
ctx.diffuse.N = drw_normal_world_to_view(ctx.diffuse.N);
#endif
#ifdef HORIZON_REFLECT
if (gbuf.has_reflection == false) {
imageStore(horizon_radiance_img, texel, vec4(0.0));
return;
}
vec3 Ng = gbuf.reflection.N;
ctx.reflection = gbuf.reflection;
ctx.reflection.roughness = max(ctx.reflection.roughness, 0.1);
ctx.reflection.N = drw_normal_world_to_view(ctx.reflection.N);
#endif
#ifdef HORIZON_REFRACT
if (gbuf.has_refraction == false) {
imageStore(horizon_radiance_img, texel, vec4(0.0));
return;
}
vec3 Ng = gbuf.refraction.N;
ctx.refraction = gbuf.refraction;
ctx.refraction.N = drw_normal_world_to_view(ctx.refraction.N);
#endif
vec3 vP = drw_point_screen_to_view(vec3(uv, depth));
vec2 noise = utility_tx_fetch(utility_tx, vec2(texel), UTIL_BLUE_NOISE_LAYER).rg;
noise = fract(noise + sampling_rng_2D_get(SAMPLING_AO_U));
horizon_scan_eval(vP,
ctx,
noise,
uniform_buf.ao.pixel_size,
1.0e16,
uniform_buf.ao.thickness,
uniform_buf.ao.angle_bias,
8);
float occlusion = 0.0;
vec4 radiance = vec4(1.0, 0.0, 1.0, 1.0);
#ifdef HORIZON_DIFFUSE
radiance.rgb = ctx.diffuse_result.rgb;
occlusion = ctx.diffuse_result.a;
#endif
#ifdef HORIZON_REFLECT
radiance.rgb = ctx.reflection_result.rgb;
occlusion = ctx.reflection_result.a;
#endif
#ifdef HORIZON_REFRACT
radiance.rgb = ctx.refraction_result.rgb;
occlusion = ctx.refraction_result.a;
#endif
imageStore(horizon_radiance_img, texel, radiance);
imageStore(horizon_occlusion_img, texel, vec4(occlusion));
}

View File

@ -8,12 +8,239 @@
* This mostly follows the paper:
* "Screen Space Indirect Lighting with Visibility Bitmask"
* by Olivier Therrien, Yannick Levesque, Guillaume Gilet
*
* Expects `screen_radiance_tx` and `screen_normal_tx` to be bound if `HORIZON_OCCLUSION` is not
* defined.
*/
#pragma BLENDER_REQUIRE(common_shape_lib.glsl)
#pragma BLENDER_REQUIRE(draw_view_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_horizon_scan_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_ray_types_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_codegen_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_bxdf_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_spherical_harmonics_lib.glsl)
#ifdef RAYTRACE_DIFFUSE
# define HORIZON_DIFFUSE
#endif
#ifdef RAYTRACE_REFLECT
# define HORIZON_REFLECT
#endif
#ifdef RAYTRACE_REFRACT
# define HORIZON_REFRACT
#endif
vec3 horizon_scan_sample_radiance(vec2 uv)
{
#ifndef HORIZON_OCCLUSION
return texture(screen_radiance_tx, uv).rgb;
#else
return vec3(0.0);
#endif
}
vec3 horizon_scan_sample_normal(vec2 uv)
{
#ifndef HORIZON_OCCLUSION
return texture(screen_normal_tx, uv).rgb * 2.0 - 1.0;
#else
return vec3(0.0);
#endif
}
/* Note: Expects all normals to be in view-space. */
struct HorizonScanContextCommon {
float N_angle;
float N_length;
uint bitmask;
float weight_slice;
float weight_accum;
vec3 light_slice;
vec4 light_accum;
};
struct HorizonScanContext {
#ifdef HORIZON_OCCLUSION
ClosureOcclusion occlusion;
HorizonScanContextCommon occlusion_common;
vec4 occlusion_result;
#endif
#ifdef HORIZON_DIFFUSE
ClosureDiffuse diffuse;
HorizonScanContextCommon diffuse_common;
vec4 diffuse_result;
#endif
#ifdef HORIZON_REFLECT
ClosureReflection reflection;
HorizonScanContextCommon reflection_common;
vec4 reflection_result;
#endif
#ifdef HORIZON_REFRACT
ClosureRefraction refraction;
HorizonScanContextCommon refraction_common;
vec4 refraction_result;
#endif
};
void horizon_scan_context_accumulation_reset(inout HorizonScanContext context)
{
#ifdef HORIZON_OCCLUSION
context.occlusion_common.light_accum = vec4(0.0);
context.occlusion_common.weight_accum = 0.0;
#endif
#ifdef HORIZON_DIFFUSE
context.diffuse_common.light_accum = vec4(0.0);
context.diffuse_common.weight_accum = 0.0;
#endif
#ifdef HORIZON_REFLECT
context.reflection_common.light_accum = vec4(0.0);
context.reflection_common.weight_accum = 0.0;
#endif
#ifdef HORIZON_REFRACT
context.refraction_common.light_accum = vec4(0.0);
context.refraction_common.weight_accum = 0.0;
#endif
}
void horizon_scan_context_slice_start(
inout HorizonScanContextCommon context, vec3 vN, vec3 vV, vec3 vT, vec3 vB)
{
context.bitmask = 0u;
context.weight_slice = 0.0;
context.light_slice = vec3(0.0);
horizon_scan_projected_normal_to_plane_angle_and_length(
vN, vV, vT, vB, context.N_length, context.N_angle);
}
void horizon_scan_context_slice_start(inout HorizonScanContext context, vec3 vV, vec3 vT, vec3 vB)
{
#ifdef HORIZON_OCCLUSION
horizon_scan_context_slice_start(context.occlusion_common, context.occlusion.N, vV, vT, vB);
#endif
#ifdef HORIZON_DIFFUSE
horizon_scan_context_slice_start(context.diffuse_common, context.diffuse.N, vV, vT, vB);
#endif
#ifdef HORIZON_REFLECT
horizon_scan_context_slice_start(context.reflection_common, context.reflection.N, vV, vT, vB);
#endif
#ifdef HORIZON_REFRACT
horizon_scan_context_slice_start(context.refraction_common, context.refraction.N, vV, vT, vB);
#endif
}
void horizon_scan_context_sample_finish(inout HorizonScanContextCommon context,
vec3 sample_radiance,
float sample_weight,
vec2 sample_theta,
float angle_bias)
{
/* Angular bias shrinks the visibility bitmask around the projected normal. */
sample_theta = (sample_theta - context.N_angle) * angle_bias;
uint sample_bitmask = horizon_scan_angles_to_bitmask(sample_theta);
sample_weight *= horizon_scan_bitmask_to_visibility_uniform(sample_bitmask & ~context.bitmask);
context.weight_slice += sample_weight;
context.light_slice += sample_radiance * sample_weight;
context.bitmask |= sample_bitmask;
}
float bxdf_eval(ClosureDiffuse closure, vec3 L, vec3 V)
{
return bsdf_lambert(closure.N, L);
}
float bxdf_eval(ClosureReflection closure, vec3 L, vec3 V)
{
return bsdf_ggx(closure.N, L, V, closure.roughness);
}
float bxdf_eval(ClosureRefraction closure, vec3 L, vec3 V)
{
return btdf_ggx(closure.N, L, V, closure.roughness, closure.ior);
}
void horizon_scan_context_sample_finish(
inout HorizonScanContext ctx, vec3 L, vec3 V, vec2 sample_uv, vec2 theta, float bias)
{
vec3 sample_radiance = horizon_scan_sample_radiance(sample_uv);
/* Take emitter surface normal into consideration. */
vec3 sample_normal = horizon_scan_sample_normal(sample_uv);
/* Discard backfacing samples.
* The paper suggests a smooth test which is not physically correct since we
* already consider the sample reflected radiance.
* Set the weight to allow energy conservation. If we modulate the radiance, we loose energy. */
float weight = step(dot(sample_normal, -L), 0.0);
#ifdef HORIZON_OCCLUSION
horizon_scan_context_sample_finish(ctx.occlusion_common, sample_radiance, 1.0, theta, bias);
#endif
#ifdef HORIZON_DIFFUSE
weight = bxdf_eval(ctx.diffuse, L, V);
horizon_scan_context_sample_finish(ctx.diffuse_common, sample_radiance, weight, theta, bias);
#endif
#ifdef HORIZON_REFLECT
weight = bxdf_eval(ctx.reflection, L, V);
horizon_scan_context_sample_finish(ctx.reflection_common, sample_radiance, weight, theta, bias);
#endif
#ifdef HORIZON_REFRACT
/* TODO(fclem): Broken: Black. */
weight = bxdf_eval(ctx.refraction, L, V);
horizon_scan_context_sample_finish(ctx.refraction_common, sample_radiance, weight, theta, bias);
#endif
}
void horizon_scan_context_slice_finish(inout HorizonScanContextCommon context)
{
/* Use uniform visibility since this is what we use for near field lighting.
* Also the lighting we are going to mask is already containing the cosine lobe. */
float slice_occlusion = horizon_scan_bitmask_to_visibility_uniform(~context.bitmask);
/* Normalize radiance since BxDF is applied when merging direct and indirect light. */
context.light_slice *= safe_rcp(context.weight_slice) * (1.0 - slice_occlusion);
/* Correct normal not on plane (Eq. 8 of GTAO paper). */
context.light_accum += vec4(context.light_slice, slice_occlusion) * context.N_length;
context.weight_accum += context.N_length;
}
void horizon_scan_context_slice_finish(inout HorizonScanContext context)
{
#ifdef HORIZON_OCCLUSION
float occlusion = horizon_scan_bitmask_to_occlusion_cosine(context.occlusion_common.bitmask);
context.occlusion_common.light_accum += vec4(occlusion) * context.occlusion_common.N_length;
context.occlusion_common.weight_accum += context.occlusion_common.N_length;
#endif
#ifdef HORIZON_DIFFUSE
horizon_scan_context_slice_finish(context.diffuse_common);
#endif
#ifdef HORIZON_REFLECT
horizon_scan_context_slice_finish(context.reflection_common);
#endif
#ifdef HORIZON_REFRACT
horizon_scan_context_slice_finish(context.refraction_common);
#endif
}
void horizon_scan_context_accumulation_finish(HorizonScanContextCommon context, out vec4 result)
{
result = context.light_accum * safe_rcp(context.weight_accum);
}
void horizon_scan_context_accumulation_finish(inout HorizonScanContext context)
{
#ifdef HORIZON_OCCLUSION
horizon_scan_context_accumulation_finish(context.occlusion_common, context.occlusion_result);
#endif
#ifdef HORIZON_DIFFUSE
horizon_scan_context_accumulation_finish(context.diffuse_common, context.diffuse_result);
#endif
#ifdef HORIZON_REFLECT
horizon_scan_context_accumulation_finish(context.reflection_common, context.reflection_result);
#endif
#ifdef HORIZON_REFRACT
horizon_scan_context_accumulation_finish(context.refraction_common, context.refraction_result);
#endif
}
/**
* Returns the start and end point of a ray clipped to its intersection
@ -47,11 +274,10 @@ void horizon_scan_occluder_intersection_ray_sphere_clip(Ray ray,
/**
* Scans the horizon in many directions and returns the indirect lighting radiance.
* Returned lighting depends on configuration.
* Returned lighting is stored inside the context in `_accum` members already normalized.
*/
vec3 horizon_scan_eval(vec3 vP,
vec3 vN,
sampler2D depth_tx,
void horizon_scan_eval(vec3 vP,
inout HorizonScanContext context,
vec2 noise,
vec2 pixel_size,
float search_distance,
@ -61,29 +287,21 @@ vec3 horizon_scan_eval(vec3 vP,
{
vec3 vV = drw_view_incident_vector(vP);
/* Only a quarter of a turn because we integrate using 2 slices.
* We use this instead of using full circle noise to improve cache hits
* since all tracing direction will be in the same quadrant. */
vec2 v_dir = sample_circle(noise.x * 0.25);
const int slice_len = 2;
vec2 v_dir = sample_circle(noise.x * (0.5 / float(slice_len)));
vec3 accum_light = vec3(0.0);
float accum_weight = 0.0;
horizon_scan_context_accumulation_reset(context);
for (int slice = 0; slice < slice_len; slice++) {
#if 0 /* For debug purpose. For when slice_len is greater than 2. */
vec2 v_dir = sample_circle(((float(slice) + noise.x) / float(slice_len)));
#endif
for (int i = 0; i < 2; i++) {
/* Setup integration domain around V. */
vec3 vB = normalize(cross(vV, vec3(v_dir, 0.0)));
vec3 vT = cross(vB, vV);
/* Projected view normal onto the integration plane. */
float vN_proj_len;
vec3 vN_proj = normalize_and_get_length(vN - vB * dot(vN, vB), vN_proj_len);
float vN_sin = dot(vN_proj, vT);
float vN_cos = saturate(dot(vN_proj, vV));
/* Angle between normalized projected normal and view vector. */
float vN_angle = sign(vN_sin) * acos_fast(vN_cos);
vec3 slice_light = vec3(0.0);
uint slice_bitmask = 0u;
horizon_scan_context_slice_start(context, vV, vT, vB);
/* For both sides of the view vector. */
for (int side = 0; side < 2; side++) {
@ -100,18 +318,18 @@ vec3 horizon_scan_eval(vec3 vP,
/* Always cross at least one pixel. */
float time = 1.0 + square((float(j) + noise.y) / float(sample_count)) * ssray.max_time;
float lod = float(j >> 2) / (1.0 + uniform_buf.ao.quality);
float lod = 1.0 + (float(j >> 2) / (1.0 + uniform_buf.ao.quality));
vec2 sample_uv = ssray.origin.xy + ssray.direction.xy * time;
float sample_depth =
textureLod(depth_tx, sample_uv * uniform_buf.hiz.uv_scale, floor(lod)).r;
float sample_depth = textureLod(hiz_tx, sample_uv * uniform_buf.hiz.uv_scale, lod).r;
if (sample_depth == 1.0) {
/* Skip background. Avoids making shadow on the geometry near the far plane. */
continue;
}
bool front_facing = vN.z > 0.0;
/* TODO(fclem): Re-introduce bias. But this is difficult to do per closure. */
bool front_facing = true; // vN.z > 0.0;
/* Bias depth a bit to avoid self shadowing issues. */
const float bias = 2.0 * 2.4e-7;
@ -137,35 +355,16 @@ vec3 horizon_scan_eval(vec3 vP,
vec2 theta = acos_fast(vec2(dot(vL_front, vV), dot(vL_back, vV)));
/* If we are tracing backward, the angles are negative. Swizzle to keep correct order. */
theta = (side == 0) ? theta.xy : -theta.yx;
theta -= vN_angle;
/* Angular bias. Shrink the visibility bitmask around the projected normal. */
theta *= angle_bias;
uint sample_bitmask = horizon_scan_angles_to_bitmask(theta);
#ifdef USE_RADIANCE_ACCUMULATION
float sample_visibility = horizon_scan_bitmask_to_visibility_uniform(sample_bitmask &
~slice_bitmask);
if (sample_visibility > 0.0) {
vec3 sample_radiance = horizon_scan_sample_radiance(sample_uv);
# ifdef USE_NORMAL_MASKING
vec3 sample_normal = horizon_scan_sample_normal(sample_uv);
sample_visibility *= dot(sample_normal, -vL_front);
# endif
slice_light += sample_radiance * (bsdf_eval(vN, vL_front) * sample_visibility);
}
#endif
slice_bitmask |= sample_bitmask;
horizon_scan_context_sample_finish(context, vL_front, vV, sample_uv, theta, angle_bias);
}
}
/* Add distant lighting. */
slice_light = vec3(horizon_scan_bitmask_to_occlusion_cosine(slice_bitmask));
/* Correct normal not on plane (Eq. 8 of GTAO paper). */
accum_light += slice_light * vN_proj_len;
accum_weight += vN_proj_len;
horizon_scan_context_slice_finish(context);
/* Rotate 90 degrees. */
v_dir = orthogonal(v_dir);
}
return accum_light * safe_rcp(accum_weight);
horizon_scan_context_accumulation_finish(context);
}

View File

@ -12,6 +12,7 @@
#pragma BLENDER_REQUIRE(gpu_shader_utildefines_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_math_vector_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_math_fast_lib.glsl)
/**
* Returns the bitmask for a given ordered pair of angle in [-pi/2..pi/2] range.
@ -77,7 +78,24 @@ float horizon_scan_bitmask_to_occlusion_cosine(uint bitmask)
#endif
}
float bsdf_eval(vec3 N, vec3 L)
float bsdf_eval(vec3 N, vec3 L, vec3 V)
{
return dot(N, L);
}
/**
* Projects the normal `N` onto a plane defined by `V` and `T`.
* V, T, B forms an orthonormal basis around V.
* Returns the angle of the normal projected normal with `V` and its length.
*/
void horizon_scan_projected_normal_to_plane_angle_and_length(
vec3 N, vec3 V, vec3 T, vec3 B, out float N_proj_len, out float N_angle)
{
/* Projected view normal onto the integration plane. */
vec3 N_proj = normalize_and_get_length(N - B * dot(N, B), N_proj_len);
float N_sin = dot(N_proj, T);
float N_cos = dot(N_proj, V);
/* Angle between normalized projected normal and view vector. */
N_angle = sign(N_sin) * acos_fast(N_cos);
}

View File

@ -0,0 +1,47 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* This pass reprojects the input radiance if needed, downsample it and output the matching normal.
*
* Dispatched as one thread for each trace resolution pixel.
*/
#pragma BLENDER_REQUIRE(draw_view_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_gbuffer_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_math_matrix_lib.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
ivec2 texel_fullres = texel * uniform_buf.raytrace.resolution_scale +
uniform_buf.raytrace.resolution_bias;
/* Load Gbuffer. */
GBufferData gbuf = gbuffer_read(gbuf_header_tx, gbuf_closure_tx, gbuf_color_tx, texel_fullres);
/* Export normal. */
/* TODO(fclem): Export the most visible normal. */
vec3 N = gbuf.has_diffuse ? gbuf.diffuse.N : gbuf.reflection.N;
if (is_zero(N)) {
/* Avoid NaN. But should be fixed in any case. */
N = vec3(1.0, 0.0, 0.0);
}
vec3 vN = drw_normal_world_to_view(N);
imageStore(out_normal_img, texel, vec4(vN * 0.5 + 0.5, 0.0));
/* Re-project radiance. */
vec2 uv = (vec2(texel_fullres) + 0.5) / vec2(textureSize(depth_tx, 0).xy);
float depth = texelFetch(depth_tx, texel_fullres, 0).r;
vec3 P = drw_point_screen_to_world(vec3(uv, depth));
vec3 ssP_prev = drw_ndc_to_screen(project_point(uniform_buf.raytrace.radiance_persmat, P));
vec4 radiance = texture(in_radiance_tx, ssP_prev.xy);
float luma = max(1e-8, reduce_max(radiance.rgb));
radiance *= 1.0 - max(0.0, luma - uniform_buf.raytrace.brightness_clamp) / luma;
imageStore(out_radiance_img, texel, radiance);
}

View File

@ -53,11 +53,11 @@ float bilateral_normal_weight(vec3 center_N, vec3 sample_N)
/* In order to remove some more fireflies, "tone-map" the color samples during the accumulation. */
vec3 to_accumulation_space(vec3 color)
{
return color / (1.0 + dot(color, vec3(1.0)));
return color / (1.0 + reduce_add(color));
}
vec3 from_accumulation_space(vec3 color)
{
return color / (1.0 - dot(color, vec3(1.0)));
return color / (1.0 - reduce_add(color));
}
void gbuffer_load_closure_data(sampler2DArray gbuf_closure_tx,
@ -101,7 +101,7 @@ void main()
const uint tile_size = RAYTRACE_GROUP_SIZE;
uvec2 tile_coord = unpackUvec2x16(tiles_coord_buf[gl_WorkGroupID.x]);
ivec2 texel_fullres = ivec2(gl_LocalInvocationID.xy + tile_coord * tile_size);
vec2 center_uv = vec2(texel_fullres) * uniform_buf.raytrace.full_resolution_inv;
vec2 center_uv = (vec2(texel_fullres) + 0.5) * uniform_buf.raytrace.full_resolution_inv;
float center_depth = texelFetch(depth_tx, texel_fullres, 0).r;
vec3 center_P = drw_point_screen_to_world(vec3(center_uv, center_depth));
@ -157,13 +157,14 @@ void main()
ivec2 sample_texel = texel_fullres + offset;
ivec2 sample_tile = sample_texel / RAYTRACE_GROUP_SIZE;
/* Make sure the sample has been processed and do not contain garbage data. */
bool unprocessed_tile = imageLoad(tile_mask_img, sample_tile).r == 0;
uint tile_mask = imageLoad(tile_mask_img, sample_tile).r;
bool unprocessed_tile = !flag_test(tile_mask, 1u << 0u);
if (unprocessed_tile) {
continue;
}
float sample_depth = texelFetch(depth_tx, sample_texel, 0).r;
vec2 sample_uv = vec2(sample_texel) * uniform_buf.raytrace.full_resolution_inv;
vec2 sample_uv = (vec2(sample_texel) + 0.5) * uniform_buf.raytrace.full_resolution_inv;
vec3 sample_P = drw_point_screen_to_world(vec3(sample_uv, sample_depth));
/* Background case. */
@ -181,7 +182,7 @@ void main()
vec3 radiance = imageLoad(in_radiance_img, sample_texel).rgb;
/* Do not gather unprocessed pixels. */
if (all(equal(in_radiance, FLT_11_11_10_MAX))) {
if (all(equal(radiance, FLT_11_11_10_MAX))) {
continue;
}
accum_radiance += to_accumulation_space(radiance) * weight;

View File

@ -63,7 +63,8 @@ void main()
continue;
}
bool tile_is_unused = imageLoad(tile_mask_img, tile_coord_neighbor).r == 0;
uint tile_mask = imageLoad(tile_mask_img, tile_coord_neighbor).r;
bool tile_is_unused = !flag_test(tile_mask, 1u << 0u);
if (tile_is_unused) {
ivec2 texel_fullres_neighbor = texel_fullres + ivec2(x, y) * int(tile_size);

View File

@ -12,13 +12,13 @@
#pragma BLENDER_REQUIRE(gpu_shader_codegen_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_gbuffer_lib.glsl)
shared uint tile_contains_glossy_rays;
shared uint tile_contains_ray_tracing;
shared uint tile_contains_horizon_scan;
/* Returns a blend factor between different irradiance fetching method for reflections. */
float ray_glossy_factor(float roughness)
/* Returns a blend factor between different tracing method. */
float ray_roughness_factor(RayTraceData raytrace, float roughness)
{
/* TODO */
return 1.0;
return saturate(roughness * raytrace.roughness_mask_scale - raytrace.roughness_mask_bias);
}
void main()
@ -27,15 +27,22 @@ void main()
/* Clear num_groups_x to 0 so that we can use it as counter in the compaction phase.
* Note that these writes are subject to race condition, but we write the same value
* from all work-groups. */
denoise_dispatch_buf.num_groups_x = 0u;
denoise_dispatch_buf.num_groups_y = 1u;
denoise_dispatch_buf.num_groups_z = 1u;
ray_denoise_dispatch_buf.num_groups_x = 0u;
ray_denoise_dispatch_buf.num_groups_y = 1u;
ray_denoise_dispatch_buf.num_groups_z = 1u;
ray_dispatch_buf.num_groups_x = 0u;
ray_dispatch_buf.num_groups_y = 1u;
ray_dispatch_buf.num_groups_z = 1u;
horizon_dispatch_buf.num_groups_x = 0u;
horizon_dispatch_buf.num_groups_y = 1u;
horizon_dispatch_buf.num_groups_z = 1u;
horizon_denoise_dispatch_buf.num_groups_x = 0u;
horizon_denoise_dispatch_buf.num_groups_y = 1u;
horizon_denoise_dispatch_buf.num_groups_z = 1u;
/* Init shared variables. */
tile_contains_glossy_rays = 0;
tile_contains_ray_tracing = 0;
tile_contains_horizon_scan = 0;
}
barrier();
@ -48,13 +55,22 @@ void main()
if (flag_test(closure_bits, uniform_buf.raytrace.closure_active)) {
GBufferData gbuf = gbuffer_read(gbuf_header_tx, gbuf_closure_tx, gbuf_color_tx, texel);
float roughness = (uniform_buf.raytrace.closure_active == CLOSURE_REFRACTION) ?
gbuf.refraction.roughness :
gbuf.reflection.roughness;
float roughness = 1.0;
if (uniform_buf.raytrace.closure_active == eClosureBits(CLOSURE_REFLECTION)) {
roughness = gbuf.reflection.roughness;
}
if (uniform_buf.raytrace.closure_active == eClosureBits(CLOSURE_REFRACTION)) {
roughness = 0.0; /* TODO(fclem): Apparent roughness. For now, always raytrace. */
}
if (ray_glossy_factor(roughness) > 0.0) {
float ray_roughness_fac = ray_roughness_factor(uniform_buf.raytrace, roughness);
if (ray_roughness_fac > 0.0) {
/* We don't care about race condition here. */
tile_contains_glossy_rays = 1;
tile_contains_horizon_scan = 1;
}
if (ray_roughness_fac < 1.0) {
/* We don't care about race condition here. */
tile_contains_ray_tracing = 1;
}
}
@ -64,8 +80,11 @@ void main()
ivec2 tile_co = ivec2(gl_WorkGroupID.xy);
uint tile_mask = 0u;
if (tile_contains_glossy_rays > 0) {
tile_mask = 1u;
if (tile_contains_ray_tracing > 0) {
tile_mask |= 1u << 0u;
}
if (tile_contains_horizon_scan > 0) {
tile_mask |= 1u << 1u;
}
imageStore(tile_mask_img, tile_co, uvec4(tile_mask));

View File

@ -19,9 +19,13 @@ void main()
ivec2 tile = ivec2(gl_GlobalInvocationID.xy);
/* True if an adjacent tile is tracing and will need this tile data for denoising. */
bool tile_is_sampled = false;
bool tile_is_ray_sampled = false;
/* True if this tile is shooting and tracing rays. */
bool tile_is_tracing = false;
bool tile_is_ray_tracing = false;
/* True if this tile is using horizon scan. */
bool tile_is_horizon_tracing = false;
/* True if an adjacent tile is tracing and will need this tile data for denoising (horizon). */
bool tile_is_horizon_sampled = false;
/* Could be optimized if that becomes an issue. */
for (int x_tile = -1; x_tile <= 1; x_tile++) {
for (int y_tile = -1; y_tile <= 1; y_tile++) {
@ -32,17 +36,32 @@ void main()
if (any(greaterThanEqual(full_res_tile, imageSize(tile_mask_img)))) {
continue;
}
bool denoise_tile_is_used = imageLoad(tile_mask_img, full_res_tile).r != 0u;
if (denoise_tile_is_used) {
uint tile_mask = imageLoad(tile_mask_img, full_res_tile).r;
bool tile_uses_ray_tracing = flag_test(tile_mask, 1u << 0u);
bool tile_uses_horizon_scan = flag_test(tile_mask, 1u << 1u);
if (tile_uses_ray_tracing) {
if (x_tile == 0 && y_tile == 0) {
/* Dispatch full resolution denoise tile. */
uint tile_index = atomicAdd(denoise_dispatch_buf.num_groups_x, 1u);
denoise_tiles_buf[tile_index] = packUvec2x16(uvec2(full_res_tile));
tile_is_tracing = true;
uint tile_index = atomicAdd(ray_denoise_dispatch_buf.num_groups_x, 1u);
ray_denoise_tiles_buf[tile_index] = packUvec2x16(uvec2(full_res_tile));
tile_is_ray_tracing = true;
}
else {
/* This denoise tile will sample the target tracing tile. Make sure it is cleared. */
tile_is_sampled = true;
tile_is_ray_sampled = true;
}
}
if (tile_uses_horizon_scan) {
if (x_tile == 0 && y_tile == 0) {
/* Dispatch full resolution horizon scan. */
uint tile_horizon_index = atomicAdd(horizon_denoise_dispatch_buf.num_groups_x, 1u);
horizon_denoise_tiles_buf[tile_horizon_index] = packUvec2x16(uvec2(full_res_tile));
tile_is_horizon_tracing = true;
}
else {
/* This denoise tile will sample the target tracing tile. Make sure it is cleared. */
tile_is_horizon_sampled = true;
}
}
}
@ -51,9 +70,16 @@ void main()
}
/* TODO(fclem): we might want to dispatch another type of shader only for clearing. */
if (tile_is_tracing || tile_is_sampled) {
if (tile_is_ray_tracing || tile_is_ray_sampled) {
/* Dispatch trace resolution tracing tile. */
uint tile_index = atomicAdd(ray_dispatch_buf.num_groups_x, 1u);
ray_tiles_buf[tile_index] = packUvec2x16(uvec2(tile));
}
/* TODO(fclem): we might want to dispatch another type of shader only for clearing. */
if (tile_is_horizon_tracing || tile_is_horizon_sampled) {
/* Dispatch trace resolution tracing tile. */
uint tile_index = atomicAdd(horizon_dispatch_buf.num_groups_x, 1u);
horizon_tiles_buf[tile_index] = packUvec2x16(uvec2(tile));
}
}

View File

@ -3,6 +3,7 @@
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma BLENDER_REQUIRE(draw_view_lib.glsl)
#pragma BLENDER_REQUIRE(draw_math_geom_lib.glsl)
/**
* General purpose 3D ray.

View File

@ -90,6 +90,12 @@ void main(void)
vec2 sample_scale = vec2(ProjectionMatrix[0][0], ProjectionMatrix[1][1]) *
(0.5 * max_radius / homcoord);
float pixel_footprint = sample_scale.x * textureSize(depth_tx, 0).x;
if (pixel_footprint <= 1.0) {
/* Early out, avoid divisions by zero. */
return;
}
/* Avoid too small radii that have float imprecision. */
vec3 clamped_sss_radius = max(vec3(1e-4), gbuf.diffuse.sss_radius / max_radius) * max_radius;
/* Scale albedo because we can have HDR value caused by BSDF sampling. */

View File

@ -144,7 +144,5 @@ void main()
/* Only output emission during the gbuffer pass. */
out_radiance = vec4(g_emission, 0.0);
out_radiance.rgb *= 1.0 - g_holdout;
out_transmittance.rgb = g_transmittance;
out_transmittance.a = saturate(average(g_transmittance));
out_radiance.a = g_holdout;
}

Some files were not shown because too many files have changed in this diff Show More