Compare commits
301 Commits
temp-inter
...
blender-v3
Author | SHA1 | Date | |
---|---|---|---|
dc2d180181 | |||
![]() |
ddf92d719b | ||
add07576a0 | |||
63b9e5378b | |||
822501d86d | |||
b3fe135854 | |||
0039432cfc | |||
![]() |
9600f36cfc | ||
688713db24 | |||
5079a460a7 | |||
bcca7bf975 | |||
d7a1fc0868 | |||
40c5786df3 | |||
71bc9d4760 | |||
1d462e1729 | |||
44ac03785c | |||
aafbd74646 | |||
edc85e182e | |||
![]() |
9d6680e7f9 | ||
15e4d0f25d | |||
e53f3954a4 | |||
3b686b8233 | |||
![]() |
4c8740a452 | ||
9a7b1d2245 | |||
72aefef9d2 | |||
ed397ff507 | |||
7d26cf01f7 | |||
c3457af23f | |||
3707a78471 | |||
e84625dcbc | |||
29c4c78a38 | |||
cd804fb2e9 | |||
69c56d2819 | |||
7339663bbc | |||
6bd924c748 | |||
![]() |
e8a8e953b3 | ||
784c04bcbd | |||
50c39ff8fe | |||
512014f042 | |||
3d5dbc1c44 | |||
5d80b64d28 | |||
![]() |
08226693cf | ||
b2e15cb19d | |||
6514e4c418 | |||
![]() |
66addab27a | ||
606f6e73b0 | |||
56d45a2974 | |||
fa0173f728 | |||
52905c02ae | |||
50ecf9dcf5 | |||
0564b19ff4 | |||
ff5630b7fa | |||
0a6b6eb13b | |||
b9f5e6c0b4 | |||
62ce0c60cd | |||
58ee4852b6 | |||
ad4d66580e | |||
21f02dd85d | |||
7d5fa51666 | |||
61fe0d6264 | |||
![]() |
99efb95441 | ||
dc7ff75ef8 | |||
e83df74008 | |||
55ecdf3195 | |||
5beadc31d6 | |||
a8a9a08bf7 | |||
88f8b01e66 | |||
10d65b821b | |||
56bd7adce7 | |||
b4c9f8da88 | |||
0b7dbff04a | |||
![]() |
b93127c57b | ||
d009056b01 | |||
![]() |
669577a973 | ||
e29026bb4b | |||
1fd824345d | |||
fd4c343dcd | |||
46f5b305e4 | |||
4c8b93c5c2 | |||
256c1b82f6 | |||
580c603df0 | |||
![]() |
c37cd35469 | ||
490f1648a7 | |||
256a2d1e98 | |||
77694b571f | |||
![]() |
8b44b756d8 | ||
24a79289b0 | |||
2af6cb9dce | |||
8ca4d20878 | |||
e78a21afb6 | |||
d02eecc0ca | |||
f17593ff26 | |||
f1cca30557 | |||
0988711575 | |||
cef8f5ff50 | |||
1e98a0cee5 | |||
431255e5e8 | |||
7e60d8a713 | |||
2fd657db5b | |||
61e92eeb3e | |||
67c490daaf | |||
68e3755209 | |||
594656e7a3 | |||
9cec9b4d6e | |||
24b84e4688 | |||
b3d101ac29 | |||
3788003cda | |||
![]() |
de7f1e8e07 | ||
4b971bb87c | |||
![]() |
2e53f8b4b1 | ||
d8edc2c634 | |||
c12d8a72ce | |||
e7ae9f493a | |||
aa7051c8f2 | |||
dae9917915 | |||
2206b6b9a0 | |||
![]() |
03c9563582 | ||
b31250feba | |||
![]() |
d2e6087335 | ||
2fb8c6805a | |||
e6a41e1c80 | |||
b2bb3e4b72 | |||
5a11c6e558 | |||
5514ca58a4 | |||
94e8db1e86 | |||
c91d196159 | |||
e253fb2143 | |||
3bf10e5d0a | |||
ffddf9e5c9 | |||
845716e600 | |||
82808e18e6 | |||
![]() |
a0acb9bd0c | ||
e6cd4761e7 | |||
726bc3a46b | |||
ce5561b815 | |||
40d28b40df | |||
![]() |
b41c72b710 | ||
8f2db94627 | |||
![]() |
a9642f8d61 | ||
752c6d668b | |||
7a7ae4df43 | |||
71c39a9e2e | |||
cae3b581b0 | |||
01ab36ebc1 | |||
a07089dcb1 | |||
56b068a664 | |||
64d9291d26 | |||
![]() |
2cc56495f3 | ||
5a50b46376 | |||
cd818fd081 | |||
785503a7e4 | |||
4b259edb0a | |||
60c0b79256 | |||
3844e9dbe7 | |||
9e5aae4215 | |||
792badcfef | |||
fb4851fbbc | |||
cf266ecaa6 | |||
a6b7f32112 | |||
70424195a8 | |||
2cbb9d7a76 | |||
![]() |
3bb8d173e7 | ||
1a7c32a0ab | |||
ceb25cbeba | |||
5efddc4347 | |||
436ce22194 | |||
dab04bc053 | |||
0479a66313 | |||
611e4ffaab | |||
b7c98c87ac | |||
![]() |
bba6fe83e2 | ||
6ab3349bd4 | |||
3e65bb86f9 | |||
cd2849c89b | |||
b02ac2d8be | |||
84be741329 | |||
6987060f70 | |||
![]() |
f749506163 | ||
481f032f5c | |||
34615cd269 | |||
e2b736aa40 | |||
![]() |
ee0277271c | ||
8a84a61f6b | |||
336ca6796a | |||
25c83c217b | |||
875f24352a | |||
![]() |
819b9bdfa1 | ||
7b09213f2f | |||
0b246ed813 | |||
6c16bb2706 | |||
6eaa69c66c | |||
1b2ee3cf20 | |||
![]() |
f2bb42a095 | ||
b20997cb34 | |||
092df87534 | |||
fb0ea94c63 | |||
00e4d665f4 | |||
1b686c60b5 | |||
0f1a200a67 | |||
1a1ddcb5e2 | |||
06ead314b6 | |||
33c5e7bcd5 | |||
d6ea881a74 | |||
04ec36f677 | |||
a20e703d1a | |||
fa6a913ef1 | |||
83e245023c | |||
de3fda29c7 | |||
4ea6b4ba84 | |||
4d09a692e2 | |||
fd2a155d06 | |||
7c4e4d605c | |||
12fc395436 | |||
f0be276514 | |||
ed91e759d1 | |||
8d1357ea6b | |||
31afa1bb9a | |||
0624acf088 | |||
b926f54f3c | |||
f71813204c | |||
3ad2bf1327 | |||
bd2e3bb7bd | |||
e5774282b9 | |||
![]() |
8c0370ef7b | ||
a182b05f07 | |||
![]() |
daaa43232d | ||
d8fd575af9 | |||
b071083496 | |||
00a9617f92 | |||
51b8e34fb7 | |||
6e6123b40f | |||
9bdf3fa5f0 | |||
f829b86039 | |||
1e4d1eb398 | |||
b496c1c721 | |||
3189171a94 | |||
f30e1fd2f0 | |||
25d30e6c99 | |||
cfd0e96e47 | |||
7293c1b357 | |||
1572c4d3d3 | |||
bd37553850 | |||
0335df9384 | |||
b3529ecf0e | |||
72ee62e0da | |||
07af45eec5 | |||
ce0d817bb7 | |||
c7a1e115b5 | |||
faa8aa3bb9 | |||
052c22199d | |||
![]() |
da14a482f2 | ||
7d985d6b69 | |||
a040d2a93a | |||
7e148c45c8 | |||
d3c45e1c39 | |||
ef8240e64c | |||
62da41d63d | |||
![]() |
a5c59fb90e | ||
622e6f05f1 | |||
0a6f428be7 | |||
eed48a7322 | |||
6b4ca78108 | |||
8d3a771574 | |||
888b879f5f | |||
71131b4969 | |||
b4d9b8b7f8 | |||
ef0b8d6306 | |||
9d0d4b8601 | |||
2b394e1108 | |||
![]() |
896d3f1ce5 | ||
76105eb752 | |||
d48523cb4d | |||
b4cfe80547 | |||
5f7d5c0809 | |||
0ea60cf6b8 | |||
de8a46c6ad | |||
2b633f12ad | |||
![]() |
456876208b | ||
1061f5a1ba | |||
1a7757b0bc | |||
f133c6b094 | |||
d612d92630 | |||
bd734cc441 | |||
f3bdabbe24 | |||
393879f30c | |||
3d9c8397fc | |||
![]() |
4bc08b79aa | ||
![]() |
7aa39b40f4 | ||
d26d3cfe19 | |||
9be6880d02 | |||
b7e2408ea4 | |||
bb64155c63 | |||
d753ebd40a | |||
ddf0bacaa9 | |||
3929db265f | |||
22ffd69a91 | |||
040630bb9a | |||
7689f501e2 | |||
![]() |
e507a789b3 | ||
6b0008129e | |||
c8e93da0a7 |
@@ -440,7 +440,11 @@ mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
|
|||||||
mark_as_advanced(WITH_CUDA_DYNLOAD)
|
mark_as_advanced(WITH_CUDA_DYNLOAD)
|
||||||
|
|
||||||
# AMD HIP
|
# AMD HIP
|
||||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF)
|
if(WIN32)
|
||||||
|
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
|
||||||
|
else()
|
||||||
|
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF)
|
||||||
|
endif()
|
||||||
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
||||||
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 CACHE STRING "AMD HIP architectures to build binaries for")
|
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 CACHE STRING "AMD HIP architectures to build binaries for")
|
||||||
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
||||||
@@ -490,7 +494,8 @@ endif()
|
|||||||
|
|
||||||
# This should be turned off when Blender enter beta/rc/release
|
# This should be turned off when Blender enter beta/rc/release
|
||||||
if("${BLENDER_VERSION_CYCLE}" STREQUAL "release" OR
|
if("${BLENDER_VERSION_CYCLE}" STREQUAL "release" OR
|
||||||
"${BLENDER_VERSION_CYCLE}" STREQUAL "rc")
|
"${BLENDER_VERSION_CYCLE}" STREQUAL "rc" OR
|
||||||
|
"${BLENDER_VERSION_CYCLE}" STREQUAL "beta")
|
||||||
set(WITH_EXPERIMENTAL_FEATURES OFF)
|
set(WITH_EXPERIMENTAL_FEATURES OFF)
|
||||||
else()
|
else()
|
||||||
set(WITH_EXPERIMENTAL_FEATURES ON)
|
set(WITH_EXPERIMENTAL_FEATURES ON)
|
||||||
|
@@ -42,6 +42,7 @@ ExternalProject_Add(nanovdb
|
|||||||
URL_HASH ${NANOVDB_HASH_TYPE}=${NANOVDB_HASH}
|
URL_HASH ${NANOVDB_HASH_TYPE}=${NANOVDB_HASH}
|
||||||
PREFIX ${BUILD_DIR}/nanovdb
|
PREFIX ${BUILD_DIR}/nanovdb
|
||||||
SOURCE_SUBDIR nanovdb
|
SOURCE_SUBDIR nanovdb
|
||||||
|
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/nanovdb/src/nanovdb < ${PATCH_DIR}/nanovdb.diff
|
||||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/nanovdb ${DEFAULT_CMAKE_FLAGS} ${NANOVDB_EXTRA_ARGS}
|
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/nanovdb ${DEFAULT_CMAKE_FLAGS} ${NANOVDB_EXTRA_ARGS}
|
||||||
INSTALL_DIR ${LIBDIR}/nanovdb
|
INSTALL_DIR ${LIBDIR}/nanovdb
|
||||||
)
|
)
|
||||||
|
374
build_files/build_environment/patches/nanovdb.diff
Normal file
374
build_files/build_environment/patches/nanovdb.diff
Normal file
@@ -0,0 +1,374 @@
|
|||||||
|
Index: nanovdb/nanovdb/NanoVDB.h
|
||||||
|
===================================================================
|
||||||
|
--- a/nanovdb/nanovdb/NanoVDB.h (revision 62751)
|
||||||
|
+++ b/nanovdb/nanovdb/NanoVDB.h (working copy)
|
||||||
|
@@ -152,8 +152,8 @@
|
||||||
|
|
||||||
|
#endif // __CUDACC_RTC__
|
||||||
|
|
||||||
|
-#ifdef __CUDACC__
|
||||||
|
-// Only define __hostdev__ when using NVIDIA CUDA compiler
|
||||||
|
+#if defined(__CUDACC__) || defined(__HIP__)
|
||||||
|
+// Only define __hostdev__ when using NVIDIA CUDA or HIP compiler
|
||||||
|
#define __hostdev__ __host__ __device__
|
||||||
|
#else
|
||||||
|
#define __hostdev__
|
||||||
|
@@ -461,7 +461,7 @@
|
||||||
|
/// Maximum floating-point values
|
||||||
|
template<typename T>
|
||||||
|
struct Maximum;
|
||||||
|
-#ifdef __CUDA_ARCH__
|
||||||
|
+#if defined(__CUDA_ARCH__) || defined(__HIP__)
|
||||||
|
template<>
|
||||||
|
struct Maximum<int>
|
||||||
|
{
|
||||||
|
@@ -1006,10 +1006,10 @@
|
||||||
|
using Vec3i = Vec3<int>;
|
||||||
|
|
||||||
|
/// @brief Return a single precision floating-point vector of this coordinate
|
||||||
|
-Vec3f Coord::asVec3s() const { return Vec3f(float(mVec[0]), float(mVec[1]), float(mVec[2])); }
|
||||||
|
+inline __hostdev__ Vec3f Coord::asVec3s() const { return Vec3f(float(mVec[0]), float(mVec[1]), float(mVec[2])); }
|
||||||
|
|
||||||
|
/// @brief Return a double precision floating-point vector of this coordinate
|
||||||
|
-Vec3d Coord::asVec3d() const { return Vec3d(double(mVec[0]), double(mVec[1]), double(mVec[2])); }
|
||||||
|
+inline __hostdev__ Vec3d Coord::asVec3d() const { return Vec3d(double(mVec[0]), double(mVec[1]), double(mVec[2])); }
|
||||||
|
|
||||||
|
// ----------------------------> Vec4 <--------------------------------------
|
||||||
|
|
||||||
|
@@ -1820,7 +1820,7 @@
|
||||||
|
}; // Map
|
||||||
|
|
||||||
|
template<typename Mat4T>
|
||||||
|
-void Map::set(const Mat4T& mat, const Mat4T& invMat, double taper)
|
||||||
|
+__hostdev__ void Map::set(const Mat4T& mat, const Mat4T& invMat, double taper)
|
||||||
|
{
|
||||||
|
float * mf = mMatF, *vf = mVecF;
|
||||||
|
float* mif = mInvMatF;
|
||||||
|
@@ -2170,7 +2170,7 @@
|
||||||
|
}; // Class Grid
|
||||||
|
|
||||||
|
template<typename TreeT>
|
||||||
|
-int Grid<TreeT>::findBlindDataForSemantic(GridBlindDataSemantic semantic) const
|
||||||
|
+__hostdev__ int Grid<TreeT>::findBlindDataForSemantic(GridBlindDataSemantic semantic) const
|
||||||
|
{
|
||||||
|
for (uint32_t i = 0, n = blindDataCount(); i < n; ++i)
|
||||||
|
if (blindMetaData(i).mSemantic == semantic)
|
||||||
|
@@ -2328,7 +2328,7 @@
|
||||||
|
}; // Tree class
|
||||||
|
|
||||||
|
template<typename RootT>
|
||||||
|
-void Tree<RootT>::extrema(ValueType& min, ValueType& max) const
|
||||||
|
+__hostdev__ void Tree<RootT>::extrema(ValueType& min, ValueType& max) const
|
||||||
|
{
|
||||||
|
min = this->root().valueMin();
|
||||||
|
max = this->root().valueMax();
|
||||||
|
@@ -2336,7 +2336,7 @@
|
||||||
|
|
||||||
|
template<typename RootT>
|
||||||
|
template<typename NodeT>
|
||||||
|
-const NodeT* Tree<RootT>::getNode(uint32_t i) const
|
||||||
|
+__hostdev__ const NodeT* Tree<RootT>::getNode(uint32_t i) const
|
||||||
|
{
|
||||||
|
static_assert(is_same<TreeNodeT<NodeT::LEVEL>, NodeT>::value, "Tree::getNode: unvalid node type");
|
||||||
|
NANOVDB_ASSERT(i < DataType::mCount[NodeT::LEVEL]);
|
||||||
|
@@ -2345,7 +2345,7 @@
|
||||||
|
|
||||||
|
template<typename RootT>
|
||||||
|
template<int LEVEL>
|
||||||
|
-const typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i) const
|
||||||
|
+__hostdev__ const typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i) const
|
||||||
|
{
|
||||||
|
NANOVDB_ASSERT(i < DataType::mCount[LEVEL]);
|
||||||
|
return reinterpret_cast<const TreeNodeT<LEVEL>*>(reinterpret_cast<const uint8_t*>(this) + DataType::mBytes[LEVEL]) + i;
|
||||||
|
@@ -2353,7 +2353,7 @@
|
||||||
|
|
||||||
|
template<typename RootT>
|
||||||
|
template<typename NodeT>
|
||||||
|
-NodeT* Tree<RootT>::getNode(uint32_t i)
|
||||||
|
+__hostdev__ NodeT* Tree<RootT>::getNode(uint32_t i)
|
||||||
|
{
|
||||||
|
static_assert(is_same<TreeNodeT<NodeT::LEVEL>, NodeT>::value, "Tree::getNode: invalid node type");
|
||||||
|
NANOVDB_ASSERT(i < DataType::mCount[NodeT::LEVEL]);
|
||||||
|
@@ -2362,7 +2362,7 @@
|
||||||
|
|
||||||
|
template<typename RootT>
|
||||||
|
template<int LEVEL>
|
||||||
|
-typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i)
|
||||||
|
+__hostdev__ typename TreeNode<Tree<RootT>, LEVEL>::type* Tree<RootT>::getNode(uint32_t i)
|
||||||
|
{
|
||||||
|
NANOVDB_ASSERT(i < DataType::mCount[LEVEL]);
|
||||||
|
return reinterpret_cast<TreeNodeT<LEVEL>*>(reinterpret_cast<uint8_t*>(this) + DataType::mBytes[LEVEL]) + i;
|
||||||
|
@@ -2370,7 +2370,7 @@
|
||||||
|
|
||||||
|
template<typename RootT>
|
||||||
|
template<typename NodeT>
|
||||||
|
-uint32_t Tree<RootT>::getNodeID(const NodeT& node) const
|
||||||
|
+__hostdev__ uint32_t Tree<RootT>::getNodeID(const NodeT& node) const
|
||||||
|
{
|
||||||
|
static_assert(is_same<TreeNodeT<NodeT::LEVEL>, NodeT>::value, "Tree::getNodeID: invalid node type");
|
||||||
|
const NodeT* first = reinterpret_cast<const NodeT*>(reinterpret_cast<const uint8_t*>(this) + DataType::mBytes[NodeT::LEVEL]);
|
||||||
|
@@ -2380,7 +2380,7 @@
|
||||||
|
|
||||||
|
template<typename RootT>
|
||||||
|
template<typename NodeT>
|
||||||
|
-uint32_t Tree<RootT>::getLinearOffset(const NodeT& node) const
|
||||||
|
+__hostdev__ uint32_t Tree<RootT>::getLinearOffset(const NodeT& node) const
|
||||||
|
{
|
||||||
|
return this->getNodeID(node) + DataType::mPFSum[NodeT::LEVEL];
|
||||||
|
}
|
||||||
|
@@ -3366,7 +3366,7 @@
|
||||||
|
}; // LeafNode class
|
||||||
|
|
||||||
|
template<typename ValueT, typename CoordT, template<uint32_t> class MaskT, uint32_t LOG2DIM>
|
||||||
|
-inline void LeafNode<ValueT, CoordT, MaskT, LOG2DIM>::updateBBox()
|
||||||
|
+inline __hostdev__ void LeafNode<ValueT, CoordT, MaskT, LOG2DIM>::updateBBox()
|
||||||
|
{
|
||||||
|
static_assert(LOG2DIM == 3, "LeafNode::updateBBox: only supports LOGDIM = 3!");
|
||||||
|
if (!this->isActive()) return;
|
||||||
|
Index: nanovdb/nanovdb/util/SampleFromVoxels.h
|
||||||
|
===================================================================
|
||||||
|
--- a/nanovdb/nanovdb/util/SampleFromVoxels.h (revision 62751)
|
||||||
|
+++ b/nanovdb/nanovdb/util/SampleFromVoxels.h (working copy)
|
||||||
|
@@ -22,7 +22,7 @@
|
||||||
|
#define NANOVDB_SAMPLE_FROM_VOXELS_H_HAS_BEEN_INCLUDED
|
||||||
|
|
||||||
|
// Only define __hostdev__ when compiling as NVIDIA CUDA
|
||||||
|
-#ifdef __CUDACC__
|
||||||
|
+#if defined(__CUDACC__) || defined(__HIP__)
|
||||||
|
#define __hostdev__ __host__ __device__
|
||||||
|
#else
|
||||||
|
#include <cmath> // for floor
|
||||||
|
@@ -136,7 +136,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const Vec3T& xyz) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const Vec3T& xyz) const
|
||||||
|
{
|
||||||
|
const CoordT ijk = Round<CoordT>(xyz);
|
||||||
|
if (ijk != mPos) {
|
||||||
|
@@ -147,7 +147,7 @@
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const CoordT& ijk) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, true>::operator()(const CoordT& ijk) const
|
||||||
|
{
|
||||||
|
if (ijk != mPos) {
|
||||||
|
mPos = ijk;
|
||||||
|
@@ -158,7 +158,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, false>::operator()(const Vec3T& xyz) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 0, false>::operator()(const Vec3T& xyz) const
|
||||||
|
{
|
||||||
|
return mAcc.getValue(Round<CoordT>(xyz));
|
||||||
|
}
|
||||||
|
@@ -195,7 +195,7 @@
|
||||||
|
}; // TrilinearSamplerBase
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
-void TrilinearSampler<TreeOrAccT>::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const
|
||||||
|
+__hostdev__ void TrilinearSampler<TreeOrAccT>::stencil(CoordT& ijk, ValueT (&v)[2][2][2]) const
|
||||||
|
{
|
||||||
|
v[0][0][0] = mAcc.getValue(ijk); // i, j, k
|
||||||
|
|
||||||
|
@@ -224,7 +224,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType TrilinearSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType TrilinearSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
|
||||||
|
{
|
||||||
|
#if 0
|
||||||
|
auto lerp = [](ValueT a, ValueT b, ValueT w){ return fma(w, b-a, a); };// = w*(b-a) + a
|
||||||
|
@@ -239,7 +239,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-Vec3T<typename TreeOrAccT::ValueType> TrilinearSampler<TreeOrAccT>::gradient(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
|
||||||
|
+__hostdev__ Vec3T<typename TreeOrAccT::ValueType> TrilinearSampler<TreeOrAccT>::gradient(const Vec3T<RealT> &uvw, const ValueT (&v)[2][2][2])
|
||||||
|
{
|
||||||
|
static_assert(std::is_floating_point<ValueT>::value, "TrilinearSampler::gradient requires a floating-point type");
|
||||||
|
#if 0
|
||||||
|
@@ -270,7 +270,7 @@
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
-bool TrilinearSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[2][2][2])
|
||||||
|
+__hostdev__ bool TrilinearSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[2][2][2])
|
||||||
|
{
|
||||||
|
static_assert(std::is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
|
||||||
|
const bool less = v[0][0][0] < ValueT(0);
|
||||||
|
@@ -363,7 +363,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
this->cache(xyz);
|
||||||
|
return BaseT::sample(xyz, mVal);
|
||||||
|
@@ -370,7 +370,7 @@
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(const CoordT &ijk) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, true>::operator()(const CoordT &ijk) const
|
||||||
|
{
|
||||||
|
return ijk == mPos ? mVal[0][0][0] : BaseT::mAcc.getValue(ijk);
|
||||||
|
}
|
||||||
|
@@ -377,7 +377,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, true>::gradient(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, true>::gradient(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
this->cache(xyz);
|
||||||
|
return BaseT::gradient(xyz, mVal);
|
||||||
|
@@ -393,7 +393,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-void SampleFromVoxels<TreeOrAccT, 1, true>::cache(Vec3T<RealT>& xyz) const
|
||||||
|
+__hostdev__ void SampleFromVoxels<TreeOrAccT, 1, true>::cache(Vec3T<RealT>& xyz) const
|
||||||
|
{
|
||||||
|
CoordT ijk = Floor<CoordT>(xyz);
|
||||||
|
if (ijk != mPos) {
|
||||||
|
@@ -406,7 +406,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
ValueT val[2][2][2];
|
||||||
|
CoordT ijk = Floor<CoordT>(xyz);
|
||||||
|
@@ -418,7 +418,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 1, false>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
auto lerp = [](ValueT a, ValueT b, RealT w) { return a + ValueT(w) * (b - a); };
|
||||||
|
|
||||||
|
@@ -463,7 +463,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-inline Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, false>::gradient(Vec3T<RealT> xyz) const
|
||||||
|
+inline __hostdev__ Vec3T<typename TreeOrAccT::ValueType> SampleFromVoxels<TreeOrAccT, 1, false>::gradient(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
ValueT val[2][2][2];
|
||||||
|
CoordT ijk = Floor<CoordT>(xyz);
|
||||||
|
@@ -473,7 +473,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-bool SampleFromVoxels<TreeOrAccT, 1, false>::zeroCrossing(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ bool SampleFromVoxels<TreeOrAccT, 1, false>::zeroCrossing(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
ValueT val[2][2][2];
|
||||||
|
CoordT ijk = Floor<CoordT>(xyz);
|
||||||
|
@@ -510,7 +510,7 @@
|
||||||
|
}; // TriquadraticSamplerBase
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
-void TriquadraticSampler<TreeOrAccT>::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const
|
||||||
|
+__hostdev__ void TriquadraticSampler<TreeOrAccT>::stencil(const CoordT &ijk, ValueT (&v)[3][3][3]) const
|
||||||
|
{
|
||||||
|
CoordT p(ijk[0] - 1, 0, 0);
|
||||||
|
for (int dx = 0; dx < 3; ++dx, ++p[0]) {
|
||||||
|
@@ -526,7 +526,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType TriquadraticSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[3][3][3])
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType TriquadraticSampler<TreeOrAccT>::sample(const Vec3T<RealT> &uvw, const ValueT (&v)[3][3][3])
|
||||||
|
{
|
||||||
|
auto kernel = [](const ValueT* value, double weight)->ValueT {
|
||||||
|
return weight * (weight * (0.5f * (value[0] + value[2]) - value[1]) +
|
||||||
|
@@ -545,7 +545,7 @@
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
-bool TriquadraticSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[3][3][3])
|
||||||
|
+__hostdev__ bool TriquadraticSampler<TreeOrAccT>::zeroCrossing(const ValueT (&v)[3][3][3])
|
||||||
|
{
|
||||||
|
static_assert(std::is_floating_point<ValueT>::value, "TrilinearSampler::zeroCrossing requires a floating-point type");
|
||||||
|
const bool less = v[0][0][0] < ValueT(0);
|
||||||
|
@@ -624,7 +624,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
this->cache(xyz);
|
||||||
|
return BaseT::sample(xyz, mVal);
|
||||||
|
@@ -631,7 +631,7 @@
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(const CoordT &ijk) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, true>::operator()(const CoordT &ijk) const
|
||||||
|
{
|
||||||
|
return ijk == mPos ? mVal[1][1][1] : BaseT::mAcc.getValue(ijk);
|
||||||
|
}
|
||||||
|
@@ -646,7 +646,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-void SampleFromVoxels<TreeOrAccT, 2, true>::cache(Vec3T<RealT>& xyz) const
|
||||||
|
+__hostdev__ void SampleFromVoxels<TreeOrAccT, 2, true>::cache(Vec3T<RealT>& xyz) const
|
||||||
|
{
|
||||||
|
CoordT ijk = Floor<CoordT>(xyz);
|
||||||
|
if (ijk != mPos) {
|
||||||
|
@@ -657,7 +657,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, false>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 2, false>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
ValueT val[3][3][3];
|
||||||
|
CoordT ijk = Floor<CoordT>(xyz);
|
||||||
|
@@ -667,7 +667,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-bool SampleFromVoxels<TreeOrAccT, 2, false>::zeroCrossing(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ bool SampleFromVoxels<TreeOrAccT, 2, false>::zeroCrossing(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
ValueT val[3][3][3];
|
||||||
|
CoordT ijk = Floor<CoordT>(xyz);
|
||||||
|
@@ -710,7 +710,7 @@
|
||||||
|
}; // TricubicSampler
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
-void TricubicSampler<TreeOrAccT>::stencil(const CoordT& ijk, ValueT (&C)[64]) const
|
||||||
|
+__hostdev__ void TricubicSampler<TreeOrAccT>::stencil(const CoordT& ijk, ValueT (&C)[64]) const
|
||||||
|
{
|
||||||
|
auto fetch = [&](int i, int j, int k) -> ValueT& { return C[((i + 1) << 4) + ((j + 1) << 2) + k + 1]; };
|
||||||
|
|
||||||
|
@@ -929,7 +929,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, true>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
+__hostdev__ typename TreeOrAccT::ValueType SampleFromVoxels<TreeOrAccT, 3, true>::operator()(Vec3T<RealT> xyz) const
|
||||||
|
{
|
||||||
|
this->cache(xyz);
|
||||||
|
return BaseT::sample(xyz, mC);
|
||||||
|
@@ -937,7 +937,7 @@
|
||||||
|
|
||||||
|
template<typename TreeOrAccT>
|
||||||
|
template<typename RealT, template<typename...> class Vec3T>
|
||||||
|
-void SampleFromVoxels<TreeOrAccT, 3, true>::cache(Vec3T<RealT>& xyz) const
|
||||||
|
+__hostdev__ void SampleFromVoxels<TreeOrAccT, 3, true>::cache(Vec3T<RealT>& xyz) const
|
||||||
|
{
|
||||||
|
CoordT ijk = Floor<CoordT>(xyz);
|
||||||
|
if (ijk != mPos) {
|
@@ -81,4 +81,5 @@ if(NOT APPLE)
|
|||||||
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
|
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
|
||||||
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
|
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
|
||||||
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
|
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
|
||||||
|
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
|
||||||
endif()
|
endif()
|
||||||
|
@@ -5,38 +5,38 @@
|
|||||||
update-code:
|
update-code:
|
||||||
git:
|
git:
|
||||||
submodules:
|
submodules:
|
||||||
- branch: master
|
- branch: blender-v3.0-release
|
||||||
commit_id: HEAD
|
commit_id: HEAD
|
||||||
path: release/scripts/addons
|
path: release/scripts/addons
|
||||||
- branch: master
|
- branch: blender-v3.0-release
|
||||||
commit_id: HEAD
|
commit_id: HEAD
|
||||||
path: release/scripts/addons_contrib
|
path: release/scripts/addons_contrib
|
||||||
- branch: master
|
- branch: blender-v3.0-release
|
||||||
commit_id: HEAD
|
commit_id: HEAD
|
||||||
path: release/datafiles/locale
|
path: release/datafiles/locale
|
||||||
- branch: master
|
- branch: blender-v3.0-release
|
||||||
commit_id: HEAD
|
commit_id: HEAD
|
||||||
path: source/tools
|
path: source/tools
|
||||||
svn:
|
svn:
|
||||||
libraries:
|
libraries:
|
||||||
darwin-arm64:
|
darwin-arm64:
|
||||||
branch: trunk
|
branch: tags/blender-3.0-release
|
||||||
commit_id: HEAD
|
commit_id: HEAD
|
||||||
path: lib/darwin_arm64
|
path: lib/darwin_arm64
|
||||||
darwin-x86_64:
|
darwin-x86_64:
|
||||||
branch: trunk
|
branch: tags/blender-3.0-release
|
||||||
commit_id: HEAD
|
commit_id: HEAD
|
||||||
path: lib/darwin
|
path: lib/darwin
|
||||||
linux-x86_64:
|
linux-x86_64:
|
||||||
branch: trunk
|
branch: tags/blender-3.0-release
|
||||||
commit_id: HEAD
|
commit_id: HEAD
|
||||||
path: lib/linux_centos7_x86_64
|
path: lib/linux_centos7_x86_64
|
||||||
windows-amd64:
|
windows-amd64:
|
||||||
branch: trunk
|
branch: tags/blender-3.0-release
|
||||||
commit_id: HEAD
|
commit_id: HEAD
|
||||||
path: lib/win64_vc15
|
path: lib/win64_vc15
|
||||||
tests:
|
tests:
|
||||||
branch: trunk
|
branch: tags/blender-3.0-release
|
||||||
commit_id: HEAD
|
commit_id: HEAD
|
||||||
path: lib/tests
|
path: lib/tests
|
||||||
benchmarks:
|
benchmarks:
|
||||||
|
@@ -38,7 +38,7 @@ PROJECT_NAME = Blender
|
|||||||
# could be handy for archiving the generated documentation or if some version
|
# could be handy for archiving the generated documentation or if some version
|
||||||
# control system is used.
|
# control system is used.
|
||||||
|
|
||||||
PROJECT_NUMBER = V3.1
|
PROJECT_NUMBER = V3.0
|
||||||
|
|
||||||
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
||||||
# for a project that appears at the top of each page and should give viewer a
|
# for a project that appears at the top of each page and should give viewer a
|
||||||
|
@@ -42,8 +42,13 @@ class SimpleMouseOperator(bpy.types.Operator):
|
|||||||
self.y = event.mouse_y
|
self.y = event.mouse_y
|
||||||
return self.execute(context)
|
return self.execute(context)
|
||||||
|
|
||||||
|
# Only needed if you want to add into a dynamic menu
|
||||||
|
def menu_func(self, context):
|
||||||
|
self.layout.operator(SimpleMouseOperator.bl_idname, text="Simple Mouse Operator")
|
||||||
|
|
||||||
|
# Register and add to the view menu (required to also use F3 search "Simple Mouse Operator" for quick access)
|
||||||
bpy.utils.register_class(SimpleMouseOperator)
|
bpy.utils.register_class(SimpleMouseOperator)
|
||||||
|
bpy.types.VIEW3D_MT_view.append(menu_func)
|
||||||
|
|
||||||
# Test call to the newly defined operator.
|
# Test call to the newly defined operator.
|
||||||
# Here we call the operator and invoke it, meaning that the settings are taken
|
# Here we call the operator and invoke it, meaning that the settings are taken
|
||||||
|
@@ -43,7 +43,7 @@ def menu_func(self, context):
|
|||||||
self.layout.operator(ExportSomeData.bl_idname, text="Text Export Operator")
|
self.layout.operator(ExportSomeData.bl_idname, text="Text Export Operator")
|
||||||
|
|
||||||
|
|
||||||
# Register and add to the file selector
|
# Register and add to the file selector (required to also use F3 search "Text Export Operator" for quick access)
|
||||||
bpy.utils.register_class(ExportSomeData)
|
bpy.utils.register_class(ExportSomeData)
|
||||||
bpy.types.TOPBAR_MT_file_export.append(menu_func)
|
bpy.types.TOPBAR_MT_file_export.append(menu_func)
|
||||||
|
|
||||||
|
@@ -27,8 +27,14 @@ class DialogOperator(bpy.types.Operator):
|
|||||||
wm = context.window_manager
|
wm = context.window_manager
|
||||||
return wm.invoke_props_dialog(self)
|
return wm.invoke_props_dialog(self)
|
||||||
|
|
||||||
|
# Only needed if you want to add into a dynamic menu
|
||||||
|
def menu_func(self, context):
|
||||||
|
self.layout.operator(DialogOperator.bl_idname, text="Dialog Operator")
|
||||||
|
|
||||||
|
|
||||||
|
# Register and add to the object menu (required to also use F3 search "Dialog Operator" for quick access)
|
||||||
bpy.utils.register_class(DialogOperator)
|
bpy.utils.register_class(DialogOperator)
|
||||||
|
bpy.types.VIEW3D_MT_object.append(menu_func)
|
||||||
|
|
||||||
# Test call.
|
# Test call.
|
||||||
bpy.ops.object.dialog_operator('INVOKE_DEFAULT')
|
bpy.ops.object.dialog_operator('INVOKE_DEFAULT')
|
||||||
|
@@ -41,8 +41,13 @@ class CustomDrawOperator(bpy.types.Operator):
|
|||||||
|
|
||||||
col.prop(self, "my_string")
|
col.prop(self, "my_string")
|
||||||
|
|
||||||
|
# Only needed if you want to add into a dynamic menu
|
||||||
|
def menu_func(self, context):
|
||||||
|
self.layout.operator(CustomDrawOperator.bl_idname, text="Custom Draw Operator")
|
||||||
|
|
||||||
|
# Register and add to the object menu (required to also use F3 search "Custom Draw Operator" for quick access)
|
||||||
bpy.utils.register_class(CustomDrawOperator)
|
bpy.utils.register_class(CustomDrawOperator)
|
||||||
|
bpy.types.VIEW3D_MT_object.append(menu_func)
|
||||||
|
|
||||||
# test call
|
# test call
|
||||||
bpy.ops.object.custom_draw('INVOKE_DEFAULT')
|
bpy.ops.object.custom_draw('INVOKE_DEFAULT')
|
||||||
|
@@ -55,8 +55,13 @@ class ModalOperator(bpy.types.Operator):
|
|||||||
context.window_manager.modal_handler_add(self)
|
context.window_manager.modal_handler_add(self)
|
||||||
return {'RUNNING_MODAL'}
|
return {'RUNNING_MODAL'}
|
||||||
|
|
||||||
|
# Only needed if you want to add into a dynamic menu
|
||||||
|
def menu_func(self, context):
|
||||||
|
self.layout.operator(ModalOperator.bl_idname, text="Modal Operator")
|
||||||
|
|
||||||
|
# Register and add to the object menu (required to also use F3 search "Modal Operator" for quick access)
|
||||||
bpy.utils.register_class(ModalOperator)
|
bpy.utils.register_class(ModalOperator)
|
||||||
|
bpy.types.VIEW3D_MT_object.append(menu_func)
|
||||||
|
|
||||||
# test call
|
# test call
|
||||||
bpy.ops.object.modal_operator('INVOKE_DEFAULT')
|
bpy.ops.object.modal_operator('INVOKE_DEFAULT')
|
||||||
|
@@ -31,8 +31,13 @@ class SearchEnumOperator(bpy.types.Operator):
|
|||||||
context.window_manager.invoke_search_popup(self)
|
context.window_manager.invoke_search_popup(self)
|
||||||
return {'RUNNING_MODAL'}
|
return {'RUNNING_MODAL'}
|
||||||
|
|
||||||
|
# Only needed if you want to add into a dynamic menu
|
||||||
|
def menu_func(self, context):
|
||||||
|
self.layout.operator(SearchEnumOperator.bl_idname, text="Search Enum Operator")
|
||||||
|
|
||||||
|
# Register and add to the object menu (required to also use F3 search "Search Enum Operator" for quick access)
|
||||||
bpy.utils.register_class(SearchEnumOperator)
|
bpy.utils.register_class(SearchEnumOperator)
|
||||||
|
bpy.types.VIEW3D_MT_object.append(menu_func)
|
||||||
|
|
||||||
# test call
|
# test call
|
||||||
bpy.ops.object.search_enum_operator('INVOKE_DEFAULT')
|
bpy.ops.object.search_enum_operator('INVOKE_DEFAULT')
|
||||||
|
@@ -22,8 +22,13 @@ class HelloWorldOperator(bpy.types.Operator):
|
|||||||
print("Hello World")
|
print("Hello World")
|
||||||
return {'FINISHED'}
|
return {'FINISHED'}
|
||||||
|
|
||||||
|
# Only needed if you want to add into a dynamic menu
|
||||||
|
def menu_func(self, context):
|
||||||
|
self.layout.operator(HelloWorldOperator.bl_idname, text="Hello World Operator")
|
||||||
|
|
||||||
|
# Register and add to the view menu (required to also use F3 search "Hello World Operator" for quick access)
|
||||||
bpy.utils.register_class(HelloWorldOperator)
|
bpy.utils.register_class(HelloWorldOperator)
|
||||||
|
bpy.types.VIEW3D_MT_view.append(menu_func)
|
||||||
|
|
||||||
# test call to the newly defined operator
|
# test call to the newly defined operator
|
||||||
bpy.ops.wm.hello_world()
|
bpy.ops.wm.hello_world()
|
||||||
|
@@ -106,24 +106,6 @@ including advanced features.
|
|||||||
floating-point values. These values are interpreted as a plane equation.
|
floating-point values. These values are interpreted as a plane equation.
|
||||||
|
|
||||||
|
|
||||||
.. function:: glColor (red, green, blue, alpha):
|
|
||||||
|
|
||||||
B{glColor3b, glColor3d, glColor3f, glColor3i, glColor3s, glColor3ub, glColor3ui, glColor3us,
|
|
||||||
glColor4b, glColor4d, glColor4f, glColor4i, glColor4s, glColor4ub, glColor4ui, glColor4us,
|
|
||||||
glColor3bv, glColor3dv, glColor3fv, glColor3iv, glColor3sv, glColor3ubv, glColor3uiv,
|
|
||||||
glColor3usv, glColor4bv, glColor4dv, glColor4fv, glColor4iv, glColor4sv, glColor4ubv,
|
|
||||||
glColor4uiv, glColor4usv}
|
|
||||||
|
|
||||||
Set a new color.
|
|
||||||
|
|
||||||
.. seealso:: `OpenGL Docs <https://khronos.org/registry/OpenGL-Refpages/gl4/html/glColor.xhtml>`__
|
|
||||||
|
|
||||||
:type red, green, blue, alpha: Depends on function prototype.
|
|
||||||
:arg red, green, blue: Specify new red, green, and blue values for the current color.
|
|
||||||
:arg alpha: Specifies a new alpha value for the current color. Included only in the
|
|
||||||
four-argument glColor4 commands. (With '4' colors only)
|
|
||||||
|
|
||||||
|
|
||||||
.. function:: glColorMask(red, green, blue, alpha):
|
.. function:: glColorMask(red, green, blue, alpha):
|
||||||
|
|
||||||
Enable and disable writing of frame buffer color components
|
Enable and disable writing of frame buffer color components
|
||||||
|
@@ -1123,7 +1123,7 @@ context_type_map = {
|
|||||||
"soft_body": ("SoftBodyModifier", False),
|
"soft_body": ("SoftBodyModifier", False),
|
||||||
"speaker": ("Speaker", False),
|
"speaker": ("Speaker", False),
|
||||||
"texture": ("Texture", False),
|
"texture": ("Texture", False),
|
||||||
"texture_slot": ("MaterialTextureSlot", False),
|
"texture_slot": ("TextureSlot", False),
|
||||||
"texture_user": ("ID", False),
|
"texture_user": ("ID", False),
|
||||||
"texture_user_property": ("Property", False),
|
"texture_user_property": ("Property", False),
|
||||||
"ui_list": ("UIList", False),
|
"ui_list": ("UIList", False),
|
||||||
@@ -1224,10 +1224,7 @@ def pycontext2sphinx(basepath):
|
|||||||
while char_array[i] is not None:
|
while char_array[i] is not None:
|
||||||
member = ctypes.string_at(char_array[i]).decode(encoding="ascii")
|
member = ctypes.string_at(char_array[i]).decode(encoding="ascii")
|
||||||
fw(".. data:: %s\n\n" % member)
|
fw(".. data:: %s\n\n" % member)
|
||||||
try:
|
member_type, is_seq = context_type_map[member]
|
||||||
member_type, is_seq = context_type_map[member]
|
|
||||||
except KeyError:
|
|
||||||
raise SystemExit("Error: context key %r not found in context_type_map; update %s" % (member, __file__)) from None
|
|
||||||
fw(" :type: %s :class:`bpy.types.%s`\n\n" % ("sequence of " if is_seq else "", member_type))
|
fw(" :type: %s :class:`bpy.types.%s`\n\n" % ("sequence of " if is_seq else "", member_type))
|
||||||
unique.add(member)
|
unique.add(member)
|
||||||
i += 1
|
i += 1
|
||||||
@@ -2254,7 +2251,7 @@ def main():
|
|||||||
# First monkey patch to load in fake members.
|
# First monkey patch to load in fake members.
|
||||||
setup_monkey_patch()
|
setup_monkey_patch()
|
||||||
|
|
||||||
# Perform changes to Blender itself.
|
# Perform changes to Blender it's self.
|
||||||
setup_data = setup_blender()
|
setup_data = setup_blender()
|
||||||
|
|
||||||
# eventually, create the dirs
|
# eventually, create the dirs
|
||||||
|
12
extern/hipew/README
vendored
Normal file
12
extern/hipew/README
vendored
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
The HIP Extension Wrangler Library (HIPEW) is a cross-platform open-source
|
||||||
|
C/C++ library to dynamically load the HIP library.
|
||||||
|
|
||||||
|
HIP (Heterogeneous-Compute Interface for Portability) is an API for C++
|
||||||
|
programming on AMD GPUs.
|
||||||
|
|
||||||
|
It is maintained as part of the Blender project, but included in extern/
|
||||||
|
for consistency with CUEW and CLEW libraries.
|
||||||
|
|
||||||
|
LICENSE
|
||||||
|
|
||||||
|
HIPEW is released under the Apache 2.0 license.
|
5
extern/hipew/README.blender
vendored
Normal file
5
extern/hipew/README.blender
vendored
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
Project: Blender
|
||||||
|
URL: https://git.blender.org/blender.git
|
||||||
|
License: Apache 2.0
|
||||||
|
Upstream version: N/A
|
||||||
|
Local modifications: None
|
43
extern/hipew/include/hipew.h
vendored
43
extern/hipew/include/hipew.h
vendored
@@ -804,31 +804,29 @@ typedef enum hipDeviceP2PAttr {
|
|||||||
} hipDeviceP2PAttr;
|
} hipDeviceP2PAttr;
|
||||||
|
|
||||||
typedef struct HIP_MEMCPY3D {
|
typedef struct HIP_MEMCPY3D {
|
||||||
size_t srcXInBytes;
|
unsigned int srcXInBytes;
|
||||||
size_t srcY;
|
unsigned int srcY;
|
||||||
size_t srcZ;
|
unsigned int srcZ;
|
||||||
size_t srcLOD;
|
unsigned int srcLOD;
|
||||||
hipMemoryType srcMemoryType;
|
hipMemoryType srcMemoryType;
|
||||||
const void* srcHost;
|
const void* srcHost;
|
||||||
hipDeviceptr_t srcDevice;
|
hipDeviceptr_t srcDevice;
|
||||||
hArray * srcArray;
|
hArray srcArray;
|
||||||
void* reserved0;
|
unsigned int srcPitch;
|
||||||
size_t srcPitch;
|
unsigned int srcHeight;
|
||||||
size_t srcHeight;
|
unsigned int dstXInBytes;
|
||||||
size_t dstXInBytes;
|
unsigned int dstY;
|
||||||
size_t dstY;
|
unsigned int dstZ;
|
||||||
size_t dstZ;
|
unsigned int dstLOD;
|
||||||
size_t dstLOD;
|
|
||||||
hipMemoryType dstMemoryType;
|
hipMemoryType dstMemoryType;
|
||||||
void* dstHost;
|
void* dstHost;
|
||||||
hipDeviceptr_t dstDevice;
|
hipDeviceptr_t dstDevice;
|
||||||
hArray * dstArray;
|
hArray dstArray;
|
||||||
void* reserved1;
|
unsigned int dstPitch;
|
||||||
size_t dstPitch;
|
unsigned int dstHeight;
|
||||||
size_t dstHeight;
|
unsigned int WidthInBytes;
|
||||||
size_t WidthInBytes;
|
unsigned int Height;
|
||||||
size_t Height;
|
unsigned int Depth;
|
||||||
size_t Depth;
|
|
||||||
} HIP_MEMCPY3D;
|
} HIP_MEMCPY3D;
|
||||||
|
|
||||||
typedef struct HIP_MEMCPY3D_PEER_st {
|
typedef struct HIP_MEMCPY3D_PEER_st {
|
||||||
@@ -879,7 +877,7 @@ typedef struct HIP_RESOURCE_DESC_st {
|
|||||||
hipResourceType resType;
|
hipResourceType resType;
|
||||||
union {
|
union {
|
||||||
struct {
|
struct {
|
||||||
hArray * h_Array;
|
hArray h_Array;
|
||||||
} array;
|
} array;
|
||||||
struct {
|
struct {
|
||||||
hipMipmappedArray_t hMipmappedArray;
|
hipMipmappedArray_t hMipmappedArray;
|
||||||
@@ -1074,9 +1072,10 @@ typedef enum hiprtcResult {
|
|||||||
typedef hipError_t HIPAPI thipGetErrorName(hipError_t error, const char** pStr);
|
typedef hipError_t HIPAPI thipGetErrorName(hipError_t error, const char** pStr);
|
||||||
typedef hipError_t HIPAPI thipInit(unsigned int Flags);
|
typedef hipError_t HIPAPI thipInit(unsigned int Flags);
|
||||||
typedef hipError_t HIPAPI thipDriverGetVersion(int* driverVersion);
|
typedef hipError_t HIPAPI thipDriverGetVersion(int* driverVersion);
|
||||||
typedef hipError_t HIPAPI thipGetDevice(hipDevice_t* device, int ordinal);
|
typedef hipError_t HIPAPI thipGetDevice(int* device);
|
||||||
typedef hipError_t HIPAPI thipGetDeviceCount(int* count);
|
typedef hipError_t HIPAPI thipGetDeviceCount(int* count);
|
||||||
typedef hipError_t HIPAPI thipGetDeviceProperties(hipDeviceProp_t* props, int deviceId);
|
typedef hipError_t HIPAPI thipGetDeviceProperties(hipDeviceProp_t* props, int deviceId);
|
||||||
|
typedef hipError_t HIPAPI thipDeviceGet(hipDevice_t* device, int ordinal);
|
||||||
typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev);
|
typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev);
|
||||||
typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
|
typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
|
||||||
typedef hipError_t HIPAPI thipDeviceComputeCapability(int* major, int* minor, hipDevice_t dev);
|
typedef hipError_t HIPAPI thipDeviceComputeCapability(int* major, int* minor, hipDevice_t dev);
|
||||||
@@ -1209,6 +1208,7 @@ extern thipDriverGetVersion *hipDriverGetVersion;
|
|||||||
extern thipGetDevice *hipGetDevice;
|
extern thipGetDevice *hipGetDevice;
|
||||||
extern thipGetDeviceCount *hipGetDeviceCount;
|
extern thipGetDeviceCount *hipGetDeviceCount;
|
||||||
extern thipGetDeviceProperties *hipGetDeviceProperties;
|
extern thipGetDeviceProperties *hipGetDeviceProperties;
|
||||||
|
extern thipDeviceGet* hipDeviceGet;
|
||||||
extern thipDeviceGetName *hipDeviceGetName;
|
extern thipDeviceGetName *hipDeviceGetName;
|
||||||
extern thipDeviceGetAttribute *hipDeviceGetAttribute;
|
extern thipDeviceGetAttribute *hipDeviceGetAttribute;
|
||||||
extern thipDeviceComputeCapability *hipDeviceComputeCapability;
|
extern thipDeviceComputeCapability *hipDeviceComputeCapability;
|
||||||
@@ -1333,6 +1333,7 @@ enum {
|
|||||||
HIPEW_SUCCESS = 0,
|
HIPEW_SUCCESS = 0,
|
||||||
HIPEW_ERROR_OPEN_FAILED = -1,
|
HIPEW_ERROR_OPEN_FAILED = -1,
|
||||||
HIPEW_ERROR_ATEXIT_FAILED = -2,
|
HIPEW_ERROR_ATEXIT_FAILED = -2,
|
||||||
|
HIPEW_ERROR_OLD_DRIVER = -3,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
|
40
extern/hipew/src/hipew.c
vendored
40
extern/hipew/src/hipew.c
vendored
@@ -71,6 +71,7 @@ thipDriverGetVersion *hipDriverGetVersion;
|
|||||||
thipGetDevice *hipGetDevice;
|
thipGetDevice *hipGetDevice;
|
||||||
thipGetDeviceCount *hipGetDeviceCount;
|
thipGetDeviceCount *hipGetDeviceCount;
|
||||||
thipGetDeviceProperties *hipGetDeviceProperties;
|
thipGetDeviceProperties *hipGetDeviceProperties;
|
||||||
|
thipDeviceGet* hipDeviceGet;
|
||||||
thipDeviceGetName *hipDeviceGetName;
|
thipDeviceGetName *hipDeviceGetName;
|
||||||
thipDeviceGetAttribute *hipDeviceGetAttribute;
|
thipDeviceGetAttribute *hipDeviceGetAttribute;
|
||||||
thipDeviceComputeCapability *hipDeviceComputeCapability;
|
thipDeviceComputeCapability *hipDeviceComputeCapability;
|
||||||
@@ -213,6 +214,36 @@ static void hipewHipExit(void) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
static int hipewHasOldDriver(const char *hip_path) {
|
||||||
|
DWORD verHandle = 0;
|
||||||
|
DWORD verSize = GetFileVersionInfoSize(hip_path, &verHandle);
|
||||||
|
int old_driver = 0;
|
||||||
|
if (verSize != 0) {
|
||||||
|
LPSTR verData = (LPSTR)malloc(verSize);
|
||||||
|
if (GetFileVersionInfo(hip_path, verHandle, verSize, verData)) {
|
||||||
|
LPBYTE lpBuffer = NULL;
|
||||||
|
UINT size = 0;
|
||||||
|
if (VerQueryValue(verData, "\\", (VOID FAR * FAR *)&lpBuffer, &size)) {
|
||||||
|
if (size) {
|
||||||
|
VS_FIXEDFILEINFO *verInfo = (VS_FIXEDFILEINFO *)lpBuffer;
|
||||||
|
/* Magic value from
|
||||||
|
* https://docs.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-vs_fixedfileinfo */
|
||||||
|
if (verInfo->dwSignature == 0xfeef04bd) {
|
||||||
|
unsigned int fileVersionLS0 = (verInfo->dwFileVersionLS >> 16) & 0xffff;
|
||||||
|
unsigned int fileversionLS1 = (verInfo->dwFileVersionLS >> 0) & 0xffff;
|
||||||
|
/* Corresponds to versions older than AMD Radeon Pro 21.Q4. */
|
||||||
|
old_driver = ((fileVersionLS0 < 3354) || (fileVersionLS0 == 3354 && fileversionLS1 < 13));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(verData);
|
||||||
|
}
|
||||||
|
return old_driver;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static int hipewHipInit(void) {
|
static int hipewHipInit(void) {
|
||||||
/* Library paths. */
|
/* Library paths. */
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
@@ -240,6 +271,14 @@ static int hipewHipInit(void) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
/* Test for driver version. */
|
||||||
|
if(hipewHasOldDriver(hip_paths[0])) {
|
||||||
|
result = HIPEW_ERROR_OLD_DRIVER;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Load library. */
|
/* Load library. */
|
||||||
hip_lib = dynamic_library_open_find(hip_paths);
|
hip_lib = dynamic_library_open_find(hip_paths);
|
||||||
|
|
||||||
@@ -255,6 +294,7 @@ static int hipewHipInit(void) {
|
|||||||
HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
|
HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
|
||||||
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
|
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
|
||||||
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceProperties);
|
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceProperties);
|
||||||
|
HIP_LIBRARY_FIND_CHECKED(hipDeviceGet);
|
||||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
|
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
|
||||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
|
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
|
||||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
|
HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
|
||||||
|
2
extern/nanosvg/README.blender
vendored
2
extern/nanosvg/README.blender
vendored
@@ -1,7 +1,7 @@
|
|||||||
Project: NanoSVG
|
Project: NanoSVG
|
||||||
URL: https://github.com/memononen/nanosvg
|
URL: https://github.com/memononen/nanosvg
|
||||||
License: zlib
|
License: zlib
|
||||||
Upstream version:
|
Upstream version: 3cdd4a9d7886
|
||||||
Local modifications: Added some functionality to manage grease pencil layers
|
Local modifications: Added some functionality to manage grease pencil layers
|
||||||
|
|
||||||
Added a fix to SVG import arc and float errors (https://developer.blender.org/rB11dc674c78b49fc4e0b7c134c375b6c8b8eacbcc)
|
Added a fix to SVG import arc and float errors (https://developer.blender.org/rB11dc674c78b49fc4e0b7c134c375b6c8b8eacbcc)
|
||||||
|
@@ -82,7 +82,7 @@ static void session_print_status()
|
|||||||
string status, substatus;
|
string status, substatus;
|
||||||
|
|
||||||
/* get status */
|
/* get status */
|
||||||
float progress = options.session->progress.get_progress();
|
double progress = options.session->progress.get_progress();
|
||||||
options.session->progress.get_status(status, substatus);
|
options.session->progress.get_status(status, substatus);
|
||||||
|
|
||||||
if (substatus != "")
|
if (substatus != "")
|
||||||
@@ -183,7 +183,7 @@ static void display_info(Progress &progress)
|
|||||||
|
|
||||||
progress.get_time(total_time, sample_time);
|
progress.get_time(total_time, sample_time);
|
||||||
progress.get_status(status, substatus);
|
progress.get_status(status, substatus);
|
||||||
float progress_val = progress.get_progress();
|
double progress_val = progress.get_progress();
|
||||||
|
|
||||||
if (substatus != "")
|
if (substatus != "")
|
||||||
status += ": " + substatus;
|
status += ": " + substatus;
|
||||||
|
@@ -138,6 +138,11 @@ endif()
|
|||||||
|
|
||||||
blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
|
blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
|
||||||
|
|
||||||
|
# avoid link failure with clang 3.4 debug
|
||||||
|
if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND NOT ${CMAKE_C_COMPILER_VERSION} VERSION_LESS '3.4')
|
||||||
|
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -gline-tables-only")
|
||||||
|
endif()
|
||||||
|
|
||||||
add_dependencies(bf_intern_cycles bf_rna)
|
add_dependencies(bf_intern_cycles bf_rna)
|
||||||
|
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${ADDON_FILES}" ${CYCLES_INSTALL_PATH})
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${ADDON_FILES}" ${CYCLES_INSTALL_PATH})
|
||||||
|
@@ -346,7 +346,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
|||||||
name="Scrambling Distance",
|
name="Scrambling Distance",
|
||||||
default=1.0,
|
default=1.0,
|
||||||
min=0.0, max=1.0,
|
min=0.0, max=1.0,
|
||||||
description="Lower values give faster rendering with GPU rendering and less noise with all devices at the cost of possible artifacts if set too low. Only works when not using adaptive sampling",
|
description="Reduce randomization between pixels to improve GPU rendering performance, at the cost of possible rendering artifacts if set too low. Only works when not using adaptive sampling",
|
||||||
)
|
)
|
||||||
preview_scrambling_distance: BoolProperty(
|
preview_scrambling_distance: BoolProperty(
|
||||||
name="Scrambling Distance viewport",
|
name="Scrambling Distance viewport",
|
||||||
@@ -354,10 +354,10 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
|||||||
description="Uses the Scrambling Distance value for the viewport. Faster but may flicker",
|
description="Uses the Scrambling Distance value for the viewport. Faster but may flicker",
|
||||||
)
|
)
|
||||||
|
|
||||||
adaptive_scrambling_distance: BoolProperty(
|
auto_scrambling_distance: BoolProperty(
|
||||||
name="Adaptive Scrambling Distance",
|
name="Automatic Scrambling Distance",
|
||||||
default=False,
|
default=False,
|
||||||
description="Uses a formula to adapt the scrambling distance strength based on the sample count",
|
description="Automatically reduce the randomization between pixels to improve GPU rendering performance, at the cost of possible rendering artifacts. Only works when not using adaptive sampling",
|
||||||
)
|
)
|
||||||
|
|
||||||
use_layer_samples: EnumProperty(
|
use_layer_samples: EnumProperty(
|
||||||
@@ -770,8 +770,8 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
|||||||
)
|
)
|
||||||
|
|
||||||
use_auto_tile: BoolProperty(
|
use_auto_tile: BoolProperty(
|
||||||
name="Auto Tiles",
|
name="Use Tiling",
|
||||||
description="Automatically render high resolution images in tiles to reduce memory usage, using the specified tile size. Tiles are cached to disk while rendering to save memory",
|
description="Render high resolution images in tiles to reduce memory usage, using the specified tile size. Tiles are cached to disk while rendering to save memory",
|
||||||
default=True,
|
default=True,
|
||||||
)
|
)
|
||||||
tile_size: IntProperty(
|
tile_size: IntProperty(
|
||||||
|
@@ -292,13 +292,13 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
|
|||||||
|
|
||||||
layout.separator()
|
layout.separator()
|
||||||
|
|
||||||
col = layout.column(align=True)
|
heading = layout.column(align=True, heading="Scrambling Distance")
|
||||||
col.active = not (cscene.use_adaptive_sampling and cscene.use_preview_adaptive_sampling)
|
heading.active = not (cscene.use_adaptive_sampling and cscene.use_preview_adaptive_sampling)
|
||||||
col.prop(cscene, "scrambling_distance", text="Scrambling Distance")
|
heading.prop(cscene, "auto_scrambling_distance", text="Automatic")
|
||||||
col.prop(cscene, "adaptive_scrambling_distance", text="Adaptive")
|
sub = heading.row()
|
||||||
sub = col.row(align=True)
|
|
||||||
sub.active = not cscene.use_preview_adaptive_sampling
|
sub.active = not cscene.use_preview_adaptive_sampling
|
||||||
sub.prop(cscene, "preview_scrambling_distance", text="Viewport")
|
sub.prop(cscene, "preview_scrambling_distance", text="Viewport")
|
||||||
|
heading.prop(cscene, "scrambling_distance", text="Multiplier")
|
||||||
|
|
||||||
layout.separator()
|
layout.separator()
|
||||||
|
|
||||||
@@ -1051,7 +1051,7 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
|
|||||||
|
|
||||||
|
|
||||||
def has_geometry_visibility(ob):
|
def has_geometry_visibility(ob):
|
||||||
return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT'}) or
|
return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT', 'VOLUME', 'POINTCLOUD', 'HAIR'}) or
|
||||||
(ob.instance_type == 'COLLECTION' and ob.instance_collection))
|
(ob.instance_type == 'COLLECTION' and ob.instance_collection))
|
||||||
|
|
||||||
|
|
||||||
|
@@ -819,11 +819,14 @@ void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, H
|
|||||||
new_hair.set_used_shaders(used_shaders);
|
new_hair.set_used_shaders(used_shaders);
|
||||||
|
|
||||||
if (view_layer.use_hair) {
|
if (view_layer.use_hair) {
|
||||||
|
#ifdef WITH_HAIR_NODES
|
||||||
if (b_ob_info.object_data.is_a(&RNA_Hair)) {
|
if (b_ob_info.object_data.is_a(&RNA_Hair)) {
|
||||||
/* Hair object. */
|
/* Hair object. */
|
||||||
sync_hair(&new_hair, b_ob_info, false);
|
sync_hair(&new_hair, b_ob_info, false);
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
/* Particle hair. */
|
/* Particle hair. */
|
||||||
bool need_undeformed = new_hair.need_attribute(scene, ATTR_STD_GENERATED);
|
bool need_undeformed = new_hair.need_attribute(scene, ATTR_STD_GENERATED);
|
||||||
BL::Mesh b_mesh = object_to_mesh(
|
BL::Mesh b_mesh = object_to_mesh(
|
||||||
@@ -870,12 +873,15 @@ void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph,
|
|||||||
|
|
||||||
/* Export deformed coordinates. */
|
/* Export deformed coordinates. */
|
||||||
if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
|
if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
|
||||||
|
#ifdef WITH_HAIR_NODES
|
||||||
if (b_ob_info.object_data.is_a(&RNA_Hair)) {
|
if (b_ob_info.object_data.is_a(&RNA_Hair)) {
|
||||||
/* Hair object. */
|
/* Hair object. */
|
||||||
sync_hair(hair, b_ob_info, true, motion_step);
|
sync_hair(hair, b_ob_info, true, motion_step);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
/* Particle hair. */
|
/* Particle hair. */
|
||||||
BL::Mesh b_mesh = object_to_mesh(
|
BL::Mesh b_mesh = object_to_mesh(
|
||||||
b_data, b_ob_info, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
|
b_data, b_ob_info, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
|
||||||
|
@@ -31,7 +31,11 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
static Geometry::Type determine_geom_type(BObjectInfo &b_ob_info, bool use_particle_hair)
|
static Geometry::Type determine_geom_type(BObjectInfo &b_ob_info, bool use_particle_hair)
|
||||||
{
|
{
|
||||||
|
#ifdef WITH_HAIR_NODES
|
||||||
if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
|
if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
|
||||||
|
#else
|
||||||
|
if (use_particle_hair) {
|
||||||
|
#endif
|
||||||
return Geometry::HAIR;
|
return Geometry::HAIR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -215,7 +219,11 @@ void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
|
|||||||
if (progress.get_cancel())
|
if (progress.get_cancel())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
#ifdef WITH_HAIR_NODES
|
||||||
if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
|
if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
|
||||||
|
#else
|
||||||
|
if (use_particle_hair) {
|
||||||
|
#endif
|
||||||
Hair *hair = static_cast<Hair *>(geom);
|
Hair *hair = static_cast<Hair *>(geom);
|
||||||
sync_hair_motion(b_depsgraph, b_ob_info, hair, motion_step);
|
sync_hair_motion(b_depsgraph, b_ob_info, hair, motion_step);
|
||||||
}
|
}
|
||||||
|
@@ -24,8 +24,14 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
/* Packed Images */
|
/* Packed Images */
|
||||||
|
|
||||||
BlenderImageLoader::BlenderImageLoader(BL::Image b_image, int frame)
|
BlenderImageLoader::BlenderImageLoader(BL::Image b_image,
|
||||||
: b_image(b_image), frame(frame), free_cache(!b_image.has_data())
|
const int frame,
|
||||||
|
const bool is_preview_render)
|
||||||
|
: b_image(b_image),
|
||||||
|
frame(frame),
|
||||||
|
/* Don't free cache for preview render to avoid race condition from T93560, to be fixed
|
||||||
|
properly later as we are close to release. */
|
||||||
|
free_cache(!is_preview_render && !b_image.has_data())
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
class BlenderImageLoader : public ImageLoader {
|
class BlenderImageLoader : public ImageLoader {
|
||||||
public:
|
public:
|
||||||
BlenderImageLoader(BL::Image b_image, int frame);
|
BlenderImageLoader(BL::Image b_image, const int frame, const bool is_preview_render);
|
||||||
|
|
||||||
bool load_metadata(const ImageDeviceFeatures &features, ImageMetaData &metadata) override;
|
bool load_metadata(const ImageDeviceFeatures &features, ImageMetaData &metadata) override;
|
||||||
bool load_pixels(const ImageMetaData &metadata,
|
bool load_pixels(const ImageMetaData &metadata,
|
||||||
|
@@ -294,7 +294,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
|
|||||||
|
|
||||||
object->set_visibility(visibility);
|
object->set_visibility(visibility);
|
||||||
|
|
||||||
object->set_is_shadow_catcher(b_ob.is_shadow_catcher());
|
object->set_is_shadow_catcher(b_ob.is_shadow_catcher() || b_parent.is_shadow_catcher());
|
||||||
|
|
||||||
float shadow_terminator_shading_offset = get_float(cobject, "shadow_terminator_offset");
|
float shadow_terminator_shading_offset = get_float(cobject, "shadow_terminator_offset");
|
||||||
object->set_shadow_terminator_shading_offset(shadow_terminator_shading_offset);
|
object->set_shadow_terminator_shading_offset(shadow_terminator_shading_offset);
|
||||||
|
@@ -120,7 +120,7 @@ void BlenderOutputDriver::write_render_tile(const Tile &tile)
|
|||||||
b_pass.rect(&pixels[0]);
|
b_pass.rect(&pixels[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
b_engine_.end_result(b_rr, true, false, true);
|
b_engine_.end_result(b_rr, false, false, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -129,7 +129,7 @@ void BlenderSession::create_session()
|
|||||||
/* reset status/progress */
|
/* reset status/progress */
|
||||||
last_status = "";
|
last_status = "";
|
||||||
last_error = "";
|
last_error = "";
|
||||||
last_progress = -1.0f;
|
last_progress = -1.0;
|
||||||
start_resize_time = 0.0;
|
start_resize_time = 0.0;
|
||||||
|
|
||||||
/* create session */
|
/* create session */
|
||||||
@@ -615,6 +615,24 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
|
|||||||
sync->sync_camera(b_render, b_camera_override, width, height, "");
|
sync->sync_camera(b_render, b_camera_override, width, height, "");
|
||||||
sync->sync_data(
|
sync->sync_data(
|
||||||
b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
|
b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
|
||||||
|
|
||||||
|
/* Filtering settings for combined pass. */
|
||||||
|
if (pass->get_type() == PASS_COMBINED) {
|
||||||
|
Integrator *integrator = scene->integrator;
|
||||||
|
integrator->set_use_direct_light((bake_filter & BL::BakeSettings::pass_filter_DIRECT) != 0);
|
||||||
|
integrator->set_use_indirect_light((bake_filter & BL::BakeSettings::pass_filter_INDIRECT) !=
|
||||||
|
0);
|
||||||
|
integrator->set_use_diffuse((bake_filter & BL::BakeSettings::pass_filter_DIFFUSE) != 0);
|
||||||
|
integrator->set_use_glossy((bake_filter & BL::BakeSettings::pass_filter_GLOSSY) != 0);
|
||||||
|
integrator->set_use_transmission(
|
||||||
|
(bake_filter & BL::BakeSettings::pass_filter_TRANSMISSION) != 0);
|
||||||
|
integrator->set_use_emission((bake_filter & BL::BakeSettings::pass_filter_EMIT) != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Always use transpanent background for baking. */
|
||||||
|
scene->background->set_transparent(true);
|
||||||
|
|
||||||
|
/* Load built-in images from Blender. */
|
||||||
builtin_images_load();
|
builtin_images_load();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -841,7 +859,7 @@ void BlenderSession::get_status(string &status, string &substatus)
|
|||||||
session->progress.get_status(status, substatus);
|
session->progress.get_status(status, substatus);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BlenderSession::get_progress(float &progress, double &total_time, double &render_time)
|
void BlenderSession::get_progress(double &progress, double &total_time, double &render_time)
|
||||||
{
|
{
|
||||||
session->progress.get_time(total_time, render_time);
|
session->progress.get_time(total_time, render_time);
|
||||||
progress = session->progress.get_progress();
|
progress = session->progress.get_progress();
|
||||||
@@ -849,10 +867,10 @@ void BlenderSession::get_progress(float &progress, double &total_time, double &r
|
|||||||
|
|
||||||
void BlenderSession::update_bake_progress()
|
void BlenderSession::update_bake_progress()
|
||||||
{
|
{
|
||||||
float progress = session->progress.get_progress();
|
double progress = session->progress.get_progress();
|
||||||
|
|
||||||
if (progress != last_progress) {
|
if (progress != last_progress) {
|
||||||
b_engine.update_progress(progress);
|
b_engine.update_progress((float)progress);
|
||||||
last_progress = progress;
|
last_progress = progress;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -861,7 +879,7 @@ void BlenderSession::update_status_progress()
|
|||||||
{
|
{
|
||||||
string timestatus, status, substatus;
|
string timestatus, status, substatus;
|
||||||
string scene_status = "";
|
string scene_status = "";
|
||||||
float progress;
|
double progress;
|
||||||
double total_time, remaining_time = 0, render_time;
|
double total_time, remaining_time = 0, render_time;
|
||||||
float mem_used = (float)session->stats.mem_used / 1024.0f / 1024.0f;
|
float mem_used = (float)session->stats.mem_used / 1024.0f / 1024.0f;
|
||||||
float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f;
|
float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f;
|
||||||
@@ -905,7 +923,7 @@ void BlenderSession::update_status_progress()
|
|||||||
last_status_time = current_time;
|
last_status_time = current_time;
|
||||||
}
|
}
|
||||||
if (progress != last_progress) {
|
if (progress != last_progress) {
|
||||||
b_engine.update_progress(progress);
|
b_engine.update_progress((float)progress);
|
||||||
last_progress = progress;
|
last_progress = progress;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -82,7 +82,7 @@ class BlenderSession {
|
|||||||
void tag_redraw();
|
void tag_redraw();
|
||||||
void tag_update();
|
void tag_update();
|
||||||
void get_status(string &status, string &substatus);
|
void get_status(string &status, string &substatus);
|
||||||
void get_progress(float &progress, double &total_time, double &render_time);
|
void get_progress(double &progress, double &total_time, double &render_time);
|
||||||
void test_cancel();
|
void test_cancel();
|
||||||
void update_status_progress();
|
void update_status_progress();
|
||||||
void update_bake_progress();
|
void update_bake_progress();
|
||||||
@@ -108,7 +108,7 @@ class BlenderSession {
|
|||||||
|
|
||||||
string last_status;
|
string last_status;
|
||||||
string last_error;
|
string last_error;
|
||||||
float last_progress;
|
double last_progress;
|
||||||
double last_status_time;
|
double last_status_time;
|
||||||
|
|
||||||
int width, height;
|
int width, height;
|
||||||
|
@@ -762,7 +762,8 @@ static ShaderNode *add_node(Scene *scene,
|
|||||||
int scene_frame = b_scene.frame_current();
|
int scene_frame = b_scene.frame_current();
|
||||||
int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
|
int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
|
||||||
image->handle = scene->image_manager->add_image(
|
image->handle = scene->image_manager->add_image(
|
||||||
new BlenderImageLoader(b_image, image_frame), image->image_params());
|
new BlenderImageLoader(b_image, image_frame, b_engine.is_preview()),
|
||||||
|
image->image_params());
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
ustring filename = ustring(
|
ustring filename = ustring(
|
||||||
@@ -797,8 +798,9 @@ static ShaderNode *add_node(Scene *scene,
|
|||||||
if (is_builtin) {
|
if (is_builtin) {
|
||||||
int scene_frame = b_scene.frame_current();
|
int scene_frame = b_scene.frame_current();
|
||||||
int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
|
int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
|
||||||
env->handle = scene->image_manager->add_image(new BlenderImageLoader(b_image, image_frame),
|
env->handle = scene->image_manager->add_image(
|
||||||
env->image_params());
|
new BlenderImageLoader(b_image, image_frame, b_engine.is_preview()),
|
||||||
|
env->image_params());
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
env->set_filename(
|
env->set_filename(
|
||||||
|
@@ -365,8 +365,8 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
|
|||||||
|
|
||||||
int samples = get_int(cscene, "samples");
|
int samples = get_int(cscene, "samples");
|
||||||
float scrambling_distance = get_float(cscene, "scrambling_distance");
|
float scrambling_distance = get_float(cscene, "scrambling_distance");
|
||||||
bool adaptive_scrambling_distance = get_boolean(cscene, "adaptive_scrambling_distance");
|
bool auto_scrambling_distance = get_boolean(cscene, "auto_scrambling_distance");
|
||||||
if (adaptive_scrambling_distance) {
|
if (auto_scrambling_distance) {
|
||||||
scrambling_distance *= 4.0f / sqrtf(samples);
|
scrambling_distance *= 4.0f / sqrtf(samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -30,15 +30,17 @@ BVHOptiX::BVHOptiX(const BVHParams ¶ms_,
|
|||||||
: BVH(params_, geometry_, objects_),
|
: BVH(params_, geometry_, objects_),
|
||||||
device(device),
|
device(device),
|
||||||
traversable_handle(0),
|
traversable_handle(0),
|
||||||
as_data(device, params_.top_level ? "optix tlas" : "optix blas", false),
|
as_data(make_unique<device_only_memory<char>>(
|
||||||
motion_transform_data(device, "optix motion transform", false)
|
device, params.top_level ? "optix tlas" : "optix blas", false)),
|
||||||
|
motion_transform_data(
|
||||||
|
make_unique<device_only_memory<char>>(device, "optix motion transform", false))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
BVHOptiX::~BVHOptiX()
|
BVHOptiX::~BVHOptiX()
|
||||||
{
|
{
|
||||||
// Acceleration structure memory is delayed freed on device, since deleting the
|
/* Acceleration structure memory is delayed freed on device, since deleting the
|
||||||
// BVH may happen while still being used for rendering.
|
* BVH may happen while still being used for rendering. */
|
||||||
device->release_optix_bvh(this);
|
device->release_optix_bvh(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -25,14 +25,16 @@
|
|||||||
|
|
||||||
# include "device/memory.h"
|
# include "device/memory.h"
|
||||||
|
|
||||||
|
# include "util/unique_ptr.h"
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
class BVHOptiX : public BVH {
|
class BVHOptiX : public BVH {
|
||||||
public:
|
public:
|
||||||
Device *device;
|
Device *device;
|
||||||
uint64_t traversable_handle;
|
uint64_t traversable_handle;
|
||||||
device_only_memory<char> as_data;
|
unique_ptr<device_only_memory<char>> as_data;
|
||||||
device_only_memory<char> motion_transform_data;
|
unique_ptr<device_only_memory<char>> motion_transform_data;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
friend class BVH;
|
friend class BVH;
|
||||||
|
@@ -38,6 +38,7 @@ void device_cpu_info(vector<DeviceInfo> &devices)
|
|||||||
info.id = "CPU";
|
info.id = "CPU";
|
||||||
info.num = 0;
|
info.num = 0;
|
||||||
info.has_osl = true;
|
info.has_osl = true;
|
||||||
|
info.has_half_images = true;
|
||||||
info.has_nanovdb = true;
|
info.has_nanovdb = true;
|
||||||
info.has_profiling = true;
|
info.has_profiling = true;
|
||||||
if (openimagedenoise_supported()) {
|
if (openimagedenoise_supported()) {
|
||||||
|
@@ -134,8 +134,7 @@ void CPUDevice::mem_alloc(device_memory &mem)
|
|||||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mem.type == MEM_DEVICE_ONLY) {
|
if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
|
||||||
assert(!mem.host_pointer);
|
|
||||||
size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
|
size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
|
||||||
void *data = util_aligned_malloc(mem.memory_size(), alignment);
|
void *data = util_aligned_malloc(mem.memory_size(), alignment);
|
||||||
mem.device_pointer = (device_ptr)data;
|
mem.device_pointer = (device_ptr)data;
|
||||||
@@ -194,7 +193,7 @@ void CPUDevice::mem_free(device_memory &mem)
|
|||||||
tex_free((device_texture &)mem);
|
tex_free((device_texture &)mem);
|
||||||
}
|
}
|
||||||
else if (mem.device_pointer) {
|
else if (mem.device_pointer) {
|
||||||
if (mem.type == MEM_DEVICE_ONLY) {
|
if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
|
||||||
util_aligned_free((void *)mem.device_pointer);
|
util_aligned_free((void *)mem.device_pointer);
|
||||||
}
|
}
|
||||||
mem.device_pointer = 0;
|
mem.device_pointer = 0;
|
||||||
|
@@ -42,7 +42,7 @@ class CPUKernels {
|
|||||||
|
|
||||||
IntegratorInitFunction integrator_init_from_camera;
|
IntegratorInitFunction integrator_init_from_camera;
|
||||||
IntegratorInitFunction integrator_init_from_bake;
|
IntegratorInitFunction integrator_init_from_bake;
|
||||||
IntegratorFunction integrator_intersect_closest;
|
IntegratorShadeFunction integrator_intersect_closest;
|
||||||
IntegratorFunction integrator_intersect_shadow;
|
IntegratorFunction integrator_intersect_shadow;
|
||||||
IntegratorFunction integrator_intersect_subsurface;
|
IntegratorFunction integrator_intersect_subsurface;
|
||||||
IntegratorFunction integrator_intersect_volume_stack;
|
IntegratorFunction integrator_intersect_volume_stack;
|
||||||
|
@@ -144,6 +144,7 @@ void device_cuda_info(vector<DeviceInfo> &devices)
|
|||||||
info.description = string(name);
|
info.description = string(name);
|
||||||
info.num = num;
|
info.num = num;
|
||||||
|
|
||||||
|
info.has_half_images = (major >= 3);
|
||||||
info.has_nanovdb = true;
|
info.has_nanovdb = true;
|
||||||
info.denoisers = 0;
|
info.denoisers = 0;
|
||||||
|
|
||||||
|
@@ -680,7 +680,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_
|
|||||||
|
|
||||||
void *shared_pointer = 0;
|
void *shared_pointer = 0;
|
||||||
|
|
||||||
if (mem_alloc_result != CUDA_SUCCESS && can_map_host) {
|
if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) {
|
||||||
if (mem.shared_pointer) {
|
if (mem.shared_pointer) {
|
||||||
/* Another device already allocated host memory. */
|
/* Another device already allocated host memory. */
|
||||||
mem_alloc_result = CUDA_SUCCESS;
|
mem_alloc_result = CUDA_SUCCESS;
|
||||||
@@ -703,8 +703,14 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (mem_alloc_result != CUDA_SUCCESS) {
|
if (mem_alloc_result != CUDA_SUCCESS) {
|
||||||
status = " failed, out of device and host memory";
|
if (mem.type == MEM_DEVICE_ONLY) {
|
||||||
set_error("System is out of GPU and shared host memory");
|
status = " failed, out of device memory";
|
||||||
|
set_error("System is out of GPU memory");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
status = " failed, out of device and host memory";
|
||||||
|
set_error("System is out of GPU and shared host memory");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mem.name) {
|
if (mem.name) {
|
||||||
@@ -777,6 +783,7 @@ void CUDADevice::generic_free(device_memory &mem)
|
|||||||
if (mem.device_pointer) {
|
if (mem.device_pointer) {
|
||||||
CUDAContextScope scope(this);
|
CUDAContextScope scope(this);
|
||||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||||
|
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
|
||||||
const CUDAMem &cmem = cuda_mem_map[&mem];
|
const CUDAMem &cmem = cuda_mem_map[&mem];
|
||||||
|
|
||||||
/* If cmem.use_mapped_host is true, reference counting is used
|
/* If cmem.use_mapped_host is true, reference counting is used
|
||||||
@@ -1145,6 +1152,7 @@ void CUDADevice::tex_free(device_texture &mem)
|
|||||||
if (mem.device_pointer) {
|
if (mem.device_pointer) {
|
||||||
CUDAContextScope scope(this);
|
CUDAContextScope scope(this);
|
||||||
thread_scoped_lock lock(cuda_mem_map_mutex);
|
thread_scoped_lock lock(cuda_mem_map_mutex);
|
||||||
|
DCHECK(cuda_mem_map.find(&mem) != cuda_mem_map.end());
|
||||||
const CUDAMem &cmem = cuda_mem_map[&mem];
|
const CUDAMem &cmem = cuda_mem_map[&mem];
|
||||||
|
|
||||||
if (cmem.texobject) {
|
if (cmem.texobject) {
|
||||||
|
@@ -286,6 +286,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
|
|||||||
info.description = "Multi Device";
|
info.description = "Multi Device";
|
||||||
info.num = 0;
|
info.num = 0;
|
||||||
|
|
||||||
|
info.has_half_images = true;
|
||||||
info.has_nanovdb = true;
|
info.has_nanovdb = true;
|
||||||
info.has_osl = true;
|
info.has_osl = true;
|
||||||
info.has_profiling = true;
|
info.has_profiling = true;
|
||||||
@@ -332,6 +333,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Accumulate device info. */
|
/* Accumulate device info. */
|
||||||
|
info.has_half_images &= device.has_half_images;
|
||||||
info.has_nanovdb &= device.has_nanovdb;
|
info.has_nanovdb &= device.has_nanovdb;
|
||||||
info.has_osl &= device.has_osl;
|
info.has_osl &= device.has_osl;
|
||||||
info.has_profiling &= device.has_profiling;
|
info.has_profiling &= device.has_profiling;
|
||||||
|
@@ -73,6 +73,7 @@ class DeviceInfo {
|
|||||||
int num;
|
int num;
|
||||||
bool display_device; /* GPU is used as a display device. */
|
bool display_device; /* GPU is used as a display device. */
|
||||||
bool has_nanovdb; /* Support NanoVDB volumes. */
|
bool has_nanovdb; /* Support NanoVDB volumes. */
|
||||||
|
bool has_half_images; /* Support half-float textures. */
|
||||||
bool has_osl; /* Support Open Shading Language. */
|
bool has_osl; /* Support Open Shading Language. */
|
||||||
bool has_profiling; /* Supports runtime collection of profiling info. */
|
bool has_profiling; /* Supports runtime collection of profiling info. */
|
||||||
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
|
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
|
||||||
@@ -89,6 +90,7 @@ class DeviceInfo {
|
|||||||
num = 0;
|
num = 0;
|
||||||
cpu_threads = 0;
|
cpu_threads = 0;
|
||||||
display_device = false;
|
display_device = false;
|
||||||
|
has_half_images = false;
|
||||||
has_nanovdb = false;
|
has_nanovdb = false;
|
||||||
has_osl = false;
|
has_osl = false;
|
||||||
has_profiling = false;
|
has_profiling = false;
|
||||||
|
@@ -57,9 +57,16 @@ bool device_hip_init()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
VLOG(1) << "HIPEW initialization failed: "
|
if (hipew_result == HIPEW_ERROR_ATEXIT_FAILED) {
|
||||||
<< ((hipew_result == HIPEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
|
VLOG(1) << "HIPEW initialization failed: Error setting up atexit() handler";
|
||||||
"Error opening the library");
|
}
|
||||||
|
else if (hipew_result == HIPEW_ERROR_OLD_DRIVER) {
|
||||||
|
VLOG(1) << "HIPEW initialization failed: Driver version too old, requires AMD Radeon Pro "
|
||||||
|
"21.Q4 driver or newer";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
VLOG(1) << "HIPEW initialization failed: Error opening HIP dynamic library";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
@@ -141,6 +148,7 @@ void device_hip_info(vector<DeviceInfo> &devices)
|
|||||||
info.description = string(name);
|
info.description = string(name);
|
||||||
info.num = num;
|
info.num = num;
|
||||||
|
|
||||||
|
info.has_half_images = true;
|
||||||
info.has_nanovdb = true;
|
info.has_nanovdb = true;
|
||||||
info.denoisers = 0;
|
info.denoisers = 0;
|
||||||
|
|
||||||
|
@@ -99,7 +99,7 @@ HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Setup device and context. */
|
/* Setup device and context. */
|
||||||
result = hipGetDevice(&hipDevice, hipDevId);
|
result = hipDeviceGet(&hipDevice, hipDevId);
|
||||||
if (result != hipSuccess) {
|
if (result != hipSuccess) {
|
||||||
set_error(string_printf("Failed to get HIP device handle from ordinal (%s)",
|
set_error(string_printf("Failed to get HIP device handle from ordinal (%s)",
|
||||||
hipewErrorString(result)));
|
hipewErrorString(result)));
|
||||||
@@ -222,7 +222,6 @@ string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
|
|||||||
const string include_path = source_path;
|
const string include_path = source_path;
|
||||||
string cflags = string_printf(
|
string cflags = string_printf(
|
||||||
"-m%d "
|
"-m%d "
|
||||||
"--ptxas-options=\"-v\" "
|
|
||||||
"--use_fast_math "
|
"--use_fast_math "
|
||||||
"-DHIPCC "
|
"-DHIPCC "
|
||||||
"-I\"%s\"",
|
"-I\"%s\"",
|
||||||
@@ -234,10 +233,7 @@ string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
|
|||||||
return cflags;
|
return cflags;
|
||||||
}
|
}
|
||||||
|
|
||||||
string HIPDevice::compile_kernel(const uint kernel_features,
|
string HIPDevice::compile_kernel(const uint kernel_features, const char *name, const char *base)
|
||||||
const char *name,
|
|
||||||
const char *base,
|
|
||||||
bool force_ptx)
|
|
||||||
{
|
{
|
||||||
/* Compute kernel name. */
|
/* Compute kernel name. */
|
||||||
int major, minor;
|
int major, minor;
|
||||||
@@ -255,13 +251,11 @@ string HIPDevice::compile_kernel(const uint kernel_features,
|
|||||||
|
|
||||||
/* Attempt to use kernel provided with Blender. */
|
/* Attempt to use kernel provided with Blender. */
|
||||||
if (!use_adaptive_compilation()) {
|
if (!use_adaptive_compilation()) {
|
||||||
if (!force_ptx) {
|
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, arch));
|
||||||
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, arch));
|
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
|
||||||
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
|
if (path_exists(fatbin)) {
|
||||||
if (path_exists(fatbin)) {
|
VLOG(1) << "Using precompiled kernel.";
|
||||||
VLOG(1) << "Using precompiled kernel.";
|
return fatbin;
|
||||||
return fatbin;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -298,9 +292,9 @@ string HIPDevice::compile_kernel(const uint kernel_features,
|
|||||||
|
|
||||||
# ifdef _WIN32
|
# ifdef _WIN32
|
||||||
if (!use_adaptive_compilation() && have_precompiled_kernels()) {
|
if (!use_adaptive_compilation() && have_precompiled_kernels()) {
|
||||||
if (major < 3) {
|
if (!hipSupportsDevice(hipDevId)) {
|
||||||
set_error(
|
set_error(
|
||||||
string_printf("HIP backend requires compute capability 3.0 or up, but found %d.%d. "
|
string_printf("HIP backend requires compute capability 10.1 or up, but found %d.%d. "
|
||||||
"Your GPU is not supported.",
|
"Your GPU is not supported.",
|
||||||
major,
|
major,
|
||||||
minor));
|
minor));
|
||||||
@@ -751,6 +745,7 @@ void HIPDevice::generic_free(device_memory &mem)
|
|||||||
if (mem.device_pointer) {
|
if (mem.device_pointer) {
|
||||||
HIPContextScope scope(this);
|
HIPContextScope scope(this);
|
||||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||||
|
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
|
||||||
const HIPMem &cmem = hip_mem_map[&mem];
|
const HIPMem &cmem = hip_mem_map[&mem];
|
||||||
|
|
||||||
/* If cmem.use_mapped_host is true, reference counting is used
|
/* If cmem.use_mapped_host is true, reference counting is used
|
||||||
@@ -994,16 +989,16 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
|||||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||||
|
|
||||||
hip_assert(hipArray3DCreate(&array_3d, &desc));
|
hip_assert(hipArray3DCreate((hArray *)&array_3d, &desc));
|
||||||
|
|
||||||
if (!array_3d) {
|
if (!array_3d) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
HIP_MEMCPY3D param;
|
HIP_MEMCPY3D param;
|
||||||
memset(¶m, 0, sizeof(param));
|
memset(¶m, 0, sizeof(HIP_MEMCPY3D));
|
||||||
param.dstMemoryType = hipMemoryTypeArray;
|
param.dstMemoryType = hipMemoryTypeArray;
|
||||||
param.dstArray = &array_3d;
|
param.dstArray = array_3d;
|
||||||
param.srcMemoryType = hipMemoryTypeHost;
|
param.srcMemoryType = hipMemoryTypeHost;
|
||||||
param.srcHost = mem.host_pointer;
|
param.srcHost = mem.host_pointer;
|
||||||
param.srcPitch = src_pitch;
|
param.srcPitch = src_pitch;
|
||||||
@@ -1069,13 +1064,13 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
|||||||
|
|
||||||
if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
|
if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT &&
|
||||||
mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
|
mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
|
||||||
/* Kepler+, bindless textures. */
|
/* Bindless textures. */
|
||||||
hipResourceDesc resDesc;
|
hipResourceDesc resDesc;
|
||||||
memset(&resDesc, 0, sizeof(resDesc));
|
memset(&resDesc, 0, sizeof(resDesc));
|
||||||
|
|
||||||
if (array_3d) {
|
if (array_3d) {
|
||||||
resDesc.resType = hipResourceTypeArray;
|
resDesc.resType = hipResourceTypeArray;
|
||||||
resDesc.res.array.h_Array = &array_3d;
|
resDesc.res.array.h_Array = array_3d;
|
||||||
resDesc.flags = 0;
|
resDesc.flags = 0;
|
||||||
}
|
}
|
||||||
else if (mem.data_height > 0) {
|
else if (mem.data_height > 0) {
|
||||||
@@ -1120,6 +1115,7 @@ void HIPDevice::tex_free(device_texture &mem)
|
|||||||
if (mem.device_pointer) {
|
if (mem.device_pointer) {
|
||||||
HIPContextScope scope(this);
|
HIPContextScope scope(this);
|
||||||
thread_scoped_lock lock(hip_mem_map_mutex);
|
thread_scoped_lock lock(hip_mem_map_mutex);
|
||||||
|
DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
|
||||||
const HIPMem &cmem = hip_mem_map[&mem];
|
const HIPMem &cmem = hip_mem_map[&mem];
|
||||||
|
|
||||||
if (cmem.texobject) {
|
if (cmem.texobject) {
|
||||||
@@ -1160,6 +1156,8 @@ bool HIPDevice::should_use_graphics_interop()
|
|||||||
* possible, but from the empiric measurements it can be considerably slower than using naive
|
* possible, but from the empiric measurements it can be considerably slower than using naive
|
||||||
* pixels copy. */
|
* pixels copy. */
|
||||||
|
|
||||||
|
/* Disable graphics interop for now, because of driver bug in 21.40. See T92972 */
|
||||||
|
# if 0
|
||||||
HIPContextScope scope(this);
|
HIPContextScope scope(this);
|
||||||
|
|
||||||
int num_all_devices = 0;
|
int num_all_devices = 0;
|
||||||
@@ -1178,6 +1176,7 @@ bool HIPDevice::should_use_graphics_interop()
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@@ -95,8 +95,7 @@ class HIPDevice : public Device {
|
|||||||
|
|
||||||
string compile_kernel(const uint kernel_features,
|
string compile_kernel(const uint kernel_features,
|
||||||
const char *name,
|
const char *name,
|
||||||
const char *base = "hip",
|
const char *base = "hip");
|
||||||
bool force_ptx = false);
|
|
||||||
|
|
||||||
virtual bool load_kernels(const uint kernel_features) override;
|
virtual bool load_kernels(const uint kernel_features) override;
|
||||||
void reserve_local_memory(const uint kernel_features);
|
void reserve_local_memory(const uint kernel_features);
|
||||||
|
@@ -44,45 +44,6 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type)
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
device_memory::device_memory(device_memory &&other) noexcept
|
|
||||||
: data_type(other.data_type),
|
|
||||||
data_elements(other.data_elements),
|
|
||||||
data_size(other.data_size),
|
|
||||||
device_size(other.device_size),
|
|
||||||
data_width(other.data_width),
|
|
||||||
data_height(other.data_height),
|
|
||||||
data_depth(other.data_depth),
|
|
||||||
type(other.type),
|
|
||||||
name(other.name),
|
|
||||||
device(other.device),
|
|
||||||
device_pointer(other.device_pointer),
|
|
||||||
host_pointer(other.host_pointer),
|
|
||||||
shared_pointer(other.shared_pointer),
|
|
||||||
shared_counter(other.shared_counter),
|
|
||||||
original_device_ptr(other.original_device_ptr),
|
|
||||||
original_device_size(other.original_device_size),
|
|
||||||
original_device(other.original_device),
|
|
||||||
need_realloc_(other.need_realloc_),
|
|
||||||
modified(other.modified)
|
|
||||||
{
|
|
||||||
other.data_elements = 0;
|
|
||||||
other.data_size = 0;
|
|
||||||
other.device_size = 0;
|
|
||||||
other.data_width = 0;
|
|
||||||
other.data_height = 0;
|
|
||||||
other.data_depth = 0;
|
|
||||||
other.device = 0;
|
|
||||||
other.device_pointer = 0;
|
|
||||||
other.host_pointer = 0;
|
|
||||||
other.shared_pointer = 0;
|
|
||||||
other.shared_counter = 0;
|
|
||||||
other.original_device_ptr = 0;
|
|
||||||
other.original_device_size = 0;
|
|
||||||
other.original_device = 0;
|
|
||||||
other.need_realloc_ = false;
|
|
||||||
other.modified = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
device_memory::~device_memory()
|
device_memory::~device_memory()
|
||||||
{
|
{
|
||||||
assert(shared_pointer == 0);
|
assert(shared_pointer == 0);
|
||||||
|
@@ -281,11 +281,16 @@ class device_memory {
|
|||||||
|
|
||||||
/* Only create through subclasses. */
|
/* Only create through subclasses. */
|
||||||
device_memory(Device *device, const char *name, MemoryType type);
|
device_memory(Device *device, const char *name, MemoryType type);
|
||||||
device_memory(device_memory &&other) noexcept;
|
|
||||||
|
|
||||||
/* No copying allowed. */
|
/* No copying and allowed.
|
||||||
|
*
|
||||||
|
* This is because device implementation might need to register device memory in an allocation
|
||||||
|
* map of some sort and use pointer as a key to identify blocks. Moving data from one place to
|
||||||
|
* another bypassing device allocation routines will make those maps hard to maintain. */
|
||||||
device_memory(const device_memory &) = delete;
|
device_memory(const device_memory &) = delete;
|
||||||
|
device_memory(device_memory &&other) noexcept = delete;
|
||||||
device_memory &operator=(const device_memory &) = delete;
|
device_memory &operator=(const device_memory &) = delete;
|
||||||
|
device_memory &operator=(device_memory &&) = delete;
|
||||||
|
|
||||||
/* Host allocation on the device. All host_pointer memory should be
|
/* Host allocation on the device. All host_pointer memory should be
|
||||||
* allocated with these functions, for devices that support using
|
* allocated with these functions, for devices that support using
|
||||||
|
@@ -44,14 +44,14 @@
|
|||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
|
OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
|
||||||
: device(device), queue(device), state(device, "__denoiser_state")
|
: device(device), queue(device), state(device, "__denoiser_state", true)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||||
: CUDADevice(info, stats, profiler),
|
: CUDADevice(info, stats, profiler),
|
||||||
sbt_data(this, "__sbt", MEM_READ_ONLY),
|
sbt_data(this, "__sbt", MEM_READ_ONLY),
|
||||||
launch_params(this, "__params"),
|
launch_params(this, "__params", false),
|
||||||
denoiser_(this)
|
denoiser_(this)
|
||||||
{
|
{
|
||||||
/* Make the CUDA context current. */
|
/* Make the CUDA context current. */
|
||||||
@@ -507,7 +507,7 @@ class OptiXDevice::DenoiseContext {
|
|||||||
: denoise_params(task.params),
|
: denoise_params(task.params),
|
||||||
render_buffers(task.render_buffers),
|
render_buffers(task.render_buffers),
|
||||||
buffer_params(task.buffer_params),
|
buffer_params(task.buffer_params),
|
||||||
guiding_buffer(device, "denoiser guiding passes buffer"),
|
guiding_buffer(device, "denoiser guiding passes buffer", true),
|
||||||
num_samples(task.num_samples)
|
num_samples(task.num_samples)
|
||||||
{
|
{
|
||||||
num_input_passes = 1;
|
num_input_passes = 1;
|
||||||
@@ -522,9 +522,9 @@ class OptiXDevice::DenoiseContext {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const int num_guiding_passes = num_input_passes - 1;
|
use_guiding_passes = (num_input_passes - 1) > 0;
|
||||||
|
|
||||||
if (num_guiding_passes) {
|
if (use_guiding_passes) {
|
||||||
if (task.allow_inplace_modification) {
|
if (task.allow_inplace_modification) {
|
||||||
guiding_params.device_pointer = render_buffers->buffer.device_pointer;
|
guiding_params.device_pointer = render_buffers->buffer.device_pointer;
|
||||||
|
|
||||||
@@ -577,6 +577,7 @@ class OptiXDevice::DenoiseContext {
|
|||||||
|
|
||||||
/* Number of input passes. Including the color and extra auxiliary passes. */
|
/* Number of input passes. Including the color and extra auxiliary passes. */
|
||||||
int num_input_passes = 0;
|
int num_input_passes = 0;
|
||||||
|
bool use_guiding_passes = false;
|
||||||
bool use_pass_albedo = false;
|
bool use_pass_albedo = false;
|
||||||
bool use_pass_normal = false;
|
bool use_pass_normal = false;
|
||||||
|
|
||||||
@@ -708,7 +709,7 @@ void OptiXDevice::denoise_pass(DenoiseContext &context, PassType pass_type)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (!context.albedo_replaced_with_fake) {
|
else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
|
||||||
context.albedo_replaced_with_fake = true;
|
context.albedo_replaced_with_fake = true;
|
||||||
if (!denoise_filter_guiding_set_fake_albedo(context)) {
|
if (!denoise_filter_guiding_set_fake_albedo(context)) {
|
||||||
LOG(ERROR) << "Error replacing real albedo with the fake one.";
|
LOG(ERROR) << "Error replacing real albedo with the fake one.";
|
||||||
@@ -886,8 +887,7 @@ bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
|
|||||||
denoiser_.scratch_offset = sizes.stateSizeInBytes;
|
denoiser_.scratch_offset = sizes.stateSizeInBytes;
|
||||||
|
|
||||||
/* Allocate denoiser state if tile size has changed since last setup. */
|
/* Allocate denoiser state if tile size has changed since last setup. */
|
||||||
denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size +
|
denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size);
|
||||||
sizeof(float));
|
|
||||||
|
|
||||||
/* Initialize denoiser state for the current tile size. */
|
/* Initialize denoiser state for the current tile size. */
|
||||||
const OptixResult result = optixDenoiserSetup(
|
const OptixResult result = optixDenoiserSetup(
|
||||||
@@ -971,16 +971,6 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
|
|||||||
|
|
||||||
/* Finally run denoising. */
|
/* Finally run denoising. */
|
||||||
OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
|
OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
|
||||||
params.hdrIntensity = denoiser_.state.device_pointer + denoiser_.scratch_offset +
|
|
||||||
denoiser_.scratch_size;
|
|
||||||
|
|
||||||
optix_assert(
|
|
||||||
optixDenoiserComputeIntensity(denoiser_.optix_denoiser,
|
|
||||||
denoiser_.queue.stream(),
|
|
||||||
&color_layer,
|
|
||||||
params.hdrIntensity,
|
|
||||||
denoiser_.state.device_pointer + denoiser_.scratch_offset,
|
|
||||||
denoiser_.scratch_size));
|
|
||||||
|
|
||||||
OptixDenoiserLayer image_layers = {};
|
OptixDenoiserLayer image_layers = {};
|
||||||
image_layers.input = color_layer;
|
image_layers.input = color_layer;
|
||||||
@@ -1011,6 +1001,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
|||||||
const OptixBuildInput &build_input,
|
const OptixBuildInput &build_input,
|
||||||
uint16_t num_motion_steps)
|
uint16_t num_motion_steps)
|
||||||
{
|
{
|
||||||
|
/* Allocate and build acceleration structures only one at a time, to prevent parallel builds
|
||||||
|
* from running out of memory (since both original and compacted acceleration structure memory
|
||||||
|
* may be allocated at the same time for the duration of this function). The builds would
|
||||||
|
* otherwise happen on the same CUDA stream anyway. */
|
||||||
|
static thread_mutex mutex;
|
||||||
|
thread_scoped_lock lock(mutex);
|
||||||
|
|
||||||
const CUDAContextScope scope(this);
|
const CUDAContextScope scope(this);
|
||||||
|
|
||||||
const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
|
const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
|
||||||
@@ -1036,14 +1033,15 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
|||||||
optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes));
|
optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes));
|
||||||
|
|
||||||
/* Allocate required output buffers. */
|
/* Allocate required output buffers. */
|
||||||
device_only_memory<char> temp_mem(this, "optix temp as build mem");
|
device_only_memory<char> temp_mem(this, "optix temp as build mem", true);
|
||||||
temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8);
|
temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8);
|
||||||
if (!temp_mem.device_pointer) {
|
if (!temp_mem.device_pointer) {
|
||||||
/* Make sure temporary memory allocation succeeded. */
|
/* Make sure temporary memory allocation succeeded. */
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
device_only_memory<char> &out_data = bvh->as_data;
|
/* Acceleration structure memory has to be allocated on the device (not allowed on the host). */
|
||||||
|
device_only_memory<char> &out_data = *bvh->as_data;
|
||||||
if (operation == OPTIX_BUILD_OPERATION_BUILD) {
|
if (operation == OPTIX_BUILD_OPERATION_BUILD) {
|
||||||
assert(out_data.device == this);
|
assert(out_data.device == this);
|
||||||
out_data.alloc_to_device(sizes.outputSizeInBytes);
|
out_data.alloc_to_device(sizes.outputSizeInBytes);
|
||||||
@@ -1091,12 +1089,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
|||||||
|
|
||||||
/* There is no point compacting if the size does not change. */
|
/* There is no point compacting if the size does not change. */
|
||||||
if (compacted_size < sizes.outputSizeInBytes) {
|
if (compacted_size < sizes.outputSizeInBytes) {
|
||||||
device_only_memory<char> compacted_data(this, "optix compacted as");
|
device_only_memory<char> compacted_data(this, "optix compacted as", false);
|
||||||
compacted_data.alloc_to_device(compacted_size);
|
compacted_data.alloc_to_device(compacted_size);
|
||||||
if (!compacted_data.device_pointer)
|
if (!compacted_data.device_pointer) {
|
||||||
/* Do not compact if memory allocation for compacted acceleration structure fails.
|
/* Do not compact if memory allocation for compacted acceleration structure fails.
|
||||||
* Can just use the uncompacted one then, so succeed here regardless. */
|
* Can just use the uncompacted one then, so succeed here regardless. */
|
||||||
return !have_error();
|
return !have_error();
|
||||||
|
}
|
||||||
|
|
||||||
optix_assert(optixAccelCompact(
|
optix_assert(optixAccelCompact(
|
||||||
context, NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle));
|
context, NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle));
|
||||||
@@ -1107,6 +1106,8 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
|||||||
|
|
||||||
std::swap(out_data.device_size, compacted_data.device_size);
|
std::swap(out_data.device_size, compacted_data.device_size);
|
||||||
std::swap(out_data.device_pointer, compacted_data.device_pointer);
|
std::swap(out_data.device_pointer, compacted_data.device_pointer);
|
||||||
|
/* Original acceleration structure memory is freed when 'compacted_data' goes out of scope.
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1134,7 +1135,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
operation = OPTIX_BUILD_OPERATION_UPDATE;
|
operation = OPTIX_BUILD_OPERATION_UPDATE;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
bvh_optix->as_data.free();
|
bvh_optix->as_data->free();
|
||||||
bvh_optix->traversable_handle = 0;
|
bvh_optix->traversable_handle = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1195,7 +1196,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
const float4 pw = make_float4(
|
const float4 pw = make_float4(
|
||||||
curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]);
|
curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]);
|
||||||
|
|
||||||
/* Convert Catmull-Rom data to Bezier spline. */
|
/* Convert Catmull-Rom data to B-spline. */
|
||||||
static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
|
static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
|
||||||
static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f;
|
static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f;
|
||||||
static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
|
static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
|
||||||
@@ -1355,9 +1356,9 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
unsigned int num_instances = 0;
|
unsigned int num_instances = 0;
|
||||||
unsigned int max_num_instances = 0xFFFFFFFF;
|
unsigned int max_num_instances = 0xFFFFFFFF;
|
||||||
|
|
||||||
bvh_optix->as_data.free();
|
bvh_optix->as_data->free();
|
||||||
bvh_optix->traversable_handle = 0;
|
bvh_optix->traversable_handle = 0;
|
||||||
bvh_optix->motion_transform_data.free();
|
bvh_optix->motion_transform_data->free();
|
||||||
|
|
||||||
optixDeviceContextGetProperty(context,
|
optixDeviceContextGetProperty(context,
|
||||||
OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID,
|
OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID,
|
||||||
@@ -1390,8 +1391,8 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(bvh_optix->motion_transform_data.device == this);
|
assert(bvh_optix->motion_transform_data->device == this);
|
||||||
bvh_optix->motion_transform_data.alloc_to_device(total_motion_transform_size);
|
bvh_optix->motion_transform_data->alloc_to_device(total_motion_transform_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Object *ob : bvh->objects) {
|
for (Object *ob : bvh->objects) {
|
||||||
@@ -1452,7 +1453,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
|
|
||||||
motion_transform_offset = align_up(motion_transform_offset,
|
motion_transform_offset = align_up(motion_transform_offset,
|
||||||
OPTIX_TRANSFORM_BYTE_ALIGNMENT);
|
OPTIX_TRANSFORM_BYTE_ALIGNMENT);
|
||||||
CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data.device_pointer +
|
CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data->device_pointer +
|
||||||
motion_transform_offset;
|
motion_transform_offset;
|
||||||
motion_transform_offset += motion_transform_size;
|
motion_transform_offset += motion_transform_size;
|
||||||
|
|
||||||
|
@@ -23,6 +23,7 @@
|
|||||||
# include "device/optix/queue.h"
|
# include "device/optix/queue.h"
|
||||||
# include "device/optix/util.h"
|
# include "device/optix/util.h"
|
||||||
# include "kernel/types.h"
|
# include "kernel/types.h"
|
||||||
|
# include "util/unique_ptr.h"
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
@@ -76,7 +77,7 @@ class OptiXDevice : public CUDADevice {
|
|||||||
device_only_memory<KernelParamsOptiX> launch_params;
|
device_only_memory<KernelParamsOptiX> launch_params;
|
||||||
OptixTraversableHandle tlas_handle = 0;
|
OptixTraversableHandle tlas_handle = 0;
|
||||||
|
|
||||||
vector<device_only_memory<char>> delayed_free_bvh_memory;
|
vector<unique_ptr<device_only_memory<char>>> delayed_free_bvh_memory;
|
||||||
thread_mutex delayed_free_bvh_mutex;
|
thread_mutex delayed_free_bvh_mutex;
|
||||||
|
|
||||||
class Denoiser {
|
class Denoiser {
|
||||||
|
@@ -73,7 +73,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *a
|
|||||||
sizeof(device_ptr),
|
sizeof(device_ptr),
|
||||||
cuda_stream_));
|
cuda_stream_));
|
||||||
|
|
||||||
if (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||||
|
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
||||||
cuda_device_assert(
|
cuda_device_assert(
|
||||||
cuda_device_,
|
cuda_device_,
|
||||||
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
|
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
|
||||||
|
@@ -33,7 +33,10 @@ unique_ptr<Denoiser> Denoiser::create(Device *path_trace_device, const DenoisePa
|
|||||||
return make_unique<OptiXDenoiser>(path_trace_device, params);
|
return make_unique<OptiXDenoiser>(path_trace_device, params);
|
||||||
}
|
}
|
||||||
|
|
||||||
return make_unique<OIDNDenoiser>(path_trace_device, params);
|
/* Always fallback to OIDN. */
|
||||||
|
DenoiseParams oidn_params = params;
|
||||||
|
oidn_params.type = DENOISER_OPENIMAGEDENOISE;
|
||||||
|
return make_unique<OIDNDenoiser>(path_trace_device, oidn_params);
|
||||||
}
|
}
|
||||||
|
|
||||||
Denoiser::Denoiser(Device *path_trace_device, const DenoiseParams ¶ms)
|
Denoiser::Denoiser(Device *path_trace_device, const DenoiseParams ¶ms)
|
||||||
|
@@ -47,9 +47,6 @@ static bool oidn_progress_monitor_function(void *user_ptr, double /*n*/)
|
|||||||
OIDNDenoiser *oidn_denoiser = reinterpret_cast<OIDNDenoiser *>(user_ptr);
|
OIDNDenoiser *oidn_denoiser = reinterpret_cast<OIDNDenoiser *>(user_ptr);
|
||||||
return !oidn_denoiser->is_cancelled();
|
return !oidn_denoiser->is_cancelled();
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef WITH_OPENIMAGEDENOISE
|
|
||||||
|
|
||||||
class OIDNPass {
|
class OIDNPass {
|
||||||
public:
|
public:
|
||||||
@@ -547,7 +544,6 @@ class OIDNDenoiseContext {
|
|||||||
* the fake values and denoising of passes which do need albedo can no longer happen. */
|
* the fake values and denoising of passes which do need albedo can no longer happen. */
|
||||||
bool albedo_replaced_with_fake_ = false;
|
bool albedo_replaced_with_fake_ = false;
|
||||||
};
|
};
|
||||||
#endif
|
|
||||||
|
|
||||||
static unique_ptr<DeviceQueue> create_device_queue(const RenderBuffers *render_buffers)
|
static unique_ptr<DeviceQueue> create_device_queue(const RenderBuffers *render_buffers)
|
||||||
{
|
{
|
||||||
@@ -582,18 +578,20 @@ static void copy_render_buffers_to_device(unique_ptr<DeviceQueue> &queue,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
bool OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params,
|
bool OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params,
|
||||||
RenderBuffers *render_buffers,
|
RenderBuffers *render_buffers,
|
||||||
const int num_samples,
|
const int num_samples,
|
||||||
bool allow_inplace_modification)
|
bool allow_inplace_modification)
|
||||||
{
|
{
|
||||||
|
#ifdef WITH_OPENIMAGEDENOISE
|
||||||
thread_scoped_lock lock(mutex_);
|
thread_scoped_lock lock(mutex_);
|
||||||
|
|
||||||
/* Make sure the host-side data is available for denoising. */
|
/* Make sure the host-side data is available for denoising. */
|
||||||
unique_ptr<DeviceQueue> queue = create_device_queue(render_buffers);
|
unique_ptr<DeviceQueue> queue = create_device_queue(render_buffers);
|
||||||
copy_render_buffers_from_device(queue, render_buffers);
|
copy_render_buffers_from_device(queue, render_buffers);
|
||||||
|
|
||||||
#ifdef WITH_OPENIMAGEDENOISE
|
|
||||||
OIDNDenoiseContext context(
|
OIDNDenoiseContext context(
|
||||||
this, params_, buffer_params, render_buffers, num_samples, allow_inplace_modification);
|
this, params_, buffer_params, render_buffers, num_samples, allow_inplace_modification);
|
||||||
|
|
||||||
@@ -620,6 +618,11 @@ bool OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params,
|
|||||||
* copies data from the device it doesn't overwrite the denoiser buffers. */
|
* copies data from the device it doesn't overwrite the denoiser buffers. */
|
||||||
copy_render_buffers_to_device(queue, render_buffers);
|
copy_render_buffers_to_device(queue, render_buffers);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
(void)buffer_params;
|
||||||
|
(void)render_buffers;
|
||||||
|
(void)num_samples;
|
||||||
|
(void)allow_inplace_modification;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* This code is not supposed to run when compiled without OIDN support, so can assume if we made
|
/* This code is not supposed to run when compiled without OIDN support, so can assume if we made
|
||||||
|
@@ -296,13 +296,13 @@ static BufferParams scale_buffer_params(const BufferParams ¶ms, int resoluti
|
|||||||
|
|
||||||
scaled_params.window_x = params.window_x / resolution_divider;
|
scaled_params.window_x = params.window_x / resolution_divider;
|
||||||
scaled_params.window_y = params.window_y / resolution_divider;
|
scaled_params.window_y = params.window_y / resolution_divider;
|
||||||
scaled_params.window_width = params.window_width / resolution_divider;
|
scaled_params.window_width = max(1, params.window_width / resolution_divider);
|
||||||
scaled_params.window_height = params.window_height / resolution_divider;
|
scaled_params.window_height = max(1, params.window_height / resolution_divider);
|
||||||
|
|
||||||
scaled_params.full_x = params.full_x / resolution_divider;
|
scaled_params.full_x = params.full_x / resolution_divider;
|
||||||
scaled_params.full_y = params.full_y / resolution_divider;
|
scaled_params.full_y = params.full_y / resolution_divider;
|
||||||
scaled_params.full_width = params.full_width / resolution_divider;
|
scaled_params.full_width = max(1, params.full_width / resolution_divider);
|
||||||
scaled_params.full_height = params.full_height / resolution_divider;
|
scaled_params.full_height = max(1, params.full_height / resolution_divider);
|
||||||
|
|
||||||
scaled_params.update_offset_stride();
|
scaled_params.update_offset_stride();
|
||||||
|
|
||||||
@@ -479,7 +479,11 @@ void PathTrace::set_denoiser_params(const DenoiseParams ¶ms)
|
|||||||
}
|
}
|
||||||
|
|
||||||
denoiser_ = Denoiser::create(device_, params);
|
denoiser_ = Denoiser::create(device_, params);
|
||||||
denoiser_->is_cancelled_cb = [this]() { return is_cancel_requested(); };
|
|
||||||
|
/* Only take into account the "immediate" cancel to have interactive rendering responding to
|
||||||
|
* navigation as quickly as possible, but allow to run denoiser after user hit Esc button while
|
||||||
|
* doing offline rendering. */
|
||||||
|
denoiser_->is_cancelled_cb = [this]() { return render_cancel_.is_requested; };
|
||||||
}
|
}
|
||||||
|
|
||||||
void PathTrace::set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling)
|
void PathTrace::set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling)
|
||||||
@@ -847,7 +851,8 @@ void PathTrace::progress_update_if_needed(const RenderWork &render_work)
|
|||||||
{
|
{
|
||||||
if (progress_ != nullptr) {
|
if (progress_ != nullptr) {
|
||||||
const int2 tile_size = get_render_tile_size();
|
const int2 tile_size = get_render_tile_size();
|
||||||
const int num_samples_added = tile_size.x * tile_size.y * render_work.path_trace.num_samples;
|
const uint64_t num_samples_added = uint64_t(tile_size.x) * tile_size.y *
|
||||||
|
render_work.path_trace.num_samples;
|
||||||
const int current_sample = render_work.path_trace.start_sample +
|
const int current_sample = render_work.path_trace.start_sample +
|
||||||
render_work.path_trace.num_samples;
|
render_work.path_trace.num_samples;
|
||||||
progress_->add_samples(num_samples_added, current_sample);
|
progress_->add_samples(num_samples_added, current_sample);
|
||||||
|
@@ -77,8 +77,10 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
|
|||||||
const int64_t image_height = effective_buffer_params_.height;
|
const int64_t image_height = effective_buffer_params_.height;
|
||||||
const int64_t total_pixels_num = image_width * image_height;
|
const int64_t total_pixels_num = image_width * image_height;
|
||||||
|
|
||||||
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
if (device_->profiler.active()) {
|
||||||
kernel_globals.start_profiling();
|
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
||||||
|
kernel_globals.start_profiling();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tbb::task_arena local_arena = local_tbb_arena_create(device_);
|
tbb::task_arena local_arena = local_tbb_arena_create(device_);
|
||||||
@@ -106,9 +108,10 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
|
|||||||
render_samples_full_pipeline(kernel_globals, work_tile, samples_num);
|
render_samples_full_pipeline(kernel_globals, work_tile, samples_num);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
if (device_->profiler.active()) {
|
||||||
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
||||||
kernel_globals.stop_profiling();
|
kernel_globals.stop_profiling();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
statistics.occupancy = 1.0f;
|
statistics.occupancy = 1.0f;
|
||||||
|
@@ -257,7 +257,8 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
|
|||||||
* become busy after adding new tiles). This is especially important for the shadow catcher which
|
* become busy after adding new tiles). This is especially important for the shadow catcher which
|
||||||
* schedules work in halves of available number of paths. */
|
* schedules work in halves of available number of paths. */
|
||||||
work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
|
work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
|
||||||
|
work_tile_scheduler_.set_accelerated_rt((device_->get_bvh_layout_mask() & BVH_LAYOUT_OPTIX) !=
|
||||||
|
0);
|
||||||
work_tile_scheduler_.reset(effective_buffer_params_,
|
work_tile_scheduler_.reset(effective_buffer_params_,
|
||||||
start_sample,
|
start_sample,
|
||||||
samples_num,
|
samples_num,
|
||||||
@@ -437,7 +438,15 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
|
|||||||
DCHECK_LE(work_size, max_num_paths_);
|
DCHECK_LE(work_size, max_num_paths_);
|
||||||
|
|
||||||
switch (kernel) {
|
switch (kernel) {
|
||||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: {
|
||||||
|
/* Closest ray intersection kernels with integrator state and render buffer. */
|
||||||
|
void *d_render_buffer = (void *)buffers_->buffer.device_pointer;
|
||||||
|
void *args[] = {&d_path_index, &d_render_buffer, const_cast<int *>(&work_size)};
|
||||||
|
|
||||||
|
queue_->enqueue(kernel, work_size, args);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
|
||||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
|
||||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: {
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: {
|
||||||
|
@@ -827,6 +827,26 @@ int RenderScheduler::get_num_samples_to_path_trace() const
|
|||||||
num_samples_to_occupy = lround(state_.occupancy_num_samples * 0.7f / state_.occupancy);
|
num_samples_to_occupy = lround(state_.occupancy_num_samples * 0.7f / state_.occupancy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* When time limit is used clamp the calculated number of samples to keep occupancy.
|
||||||
|
* This is because time limit causes the last render iteration to happen with less number of
|
||||||
|
* samples, which conflicts with the occupancy (lower number of samples causes lower
|
||||||
|
* occupancy, also the calculation is based on number of previously rendered samples).
|
||||||
|
*
|
||||||
|
* When time limit is not used the number of samples per render iteration is either increasing
|
||||||
|
* or stays the same, so there is no need to clamp number of samples calculated for occupancy.
|
||||||
|
*/
|
||||||
|
if (time_limit_ && state_.start_render_time) {
|
||||||
|
const double remaining_render_time = max(
|
||||||
|
0.0, time_limit_ - (time_dt() - state_.start_render_time));
|
||||||
|
const double time_per_sample_average = path_trace_time_.get_average();
|
||||||
|
const double predicted_render_time = num_samples_to_occupy * time_per_sample_average;
|
||||||
|
|
||||||
|
if (predicted_render_time > remaining_render_time) {
|
||||||
|
num_samples_to_occupy = lround(num_samples_to_occupy *
|
||||||
|
(remaining_render_time / predicted_render_time));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
num_samples_to_render = max(num_samples_to_render,
|
num_samples_to_render = max(num_samples_to_render,
|
||||||
min(num_samples_to_occupy, max_num_samples_to_render));
|
min(num_samples_to_occupy, max_num_samples_to_render));
|
||||||
}
|
}
|
||||||
|
@@ -46,7 +46,8 @@ ccl_device_inline uint round_up_to_power_of_two(uint x)
|
|||||||
return next_power_of_two(x);
|
return next_power_of_two(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
TileSize tile_calculate_best_size(const int2 &image_size,
|
TileSize tile_calculate_best_size(const bool accel_rt,
|
||||||
|
const int2 &image_size,
|
||||||
const int num_samples,
|
const int num_samples,
|
||||||
const int max_num_path_states,
|
const int max_num_path_states,
|
||||||
const float scrambling_distance)
|
const float scrambling_distance)
|
||||||
@@ -73,7 +74,7 @@ TileSize tile_calculate_best_size(const int2 &image_size,
|
|||||||
|
|
||||||
TileSize tile_size;
|
TileSize tile_size;
|
||||||
const int num_path_states_per_sample = max_num_path_states / num_samples;
|
const int num_path_states_per_sample = max_num_path_states / num_samples;
|
||||||
if (scrambling_distance < 0.9f) {
|
if (scrambling_distance < 0.9f && accel_rt) {
|
||||||
/* Prefer large tiles for scrambling distance, bounded by max num path states. */
|
/* Prefer large tiles for scrambling distance, bounded by max num path states. */
|
||||||
tile_size.width = min(image_size.x, max_num_path_states);
|
tile_size.width = min(image_size.x, max_num_path_states);
|
||||||
tile_size.height = min(image_size.y, max(max_num_path_states / tile_size.width, 1));
|
tile_size.height = min(image_size.y, max(max_num_path_states / tile_size.width, 1));
|
||||||
|
@@ -49,7 +49,8 @@ std::ostream &operator<<(std::ostream &os, const TileSize &tile_size);
|
|||||||
* of active path states.
|
* of active path states.
|
||||||
* Will attempt to provide best guess to keep path tracing threads of a device as localized as
|
* Will attempt to provide best guess to keep path tracing threads of a device as localized as
|
||||||
* possible, and have as many threads active for every tile as possible. */
|
* possible, and have as many threads active for every tile as possible. */
|
||||||
TileSize tile_calculate_best_size(const int2 &image_size,
|
TileSize tile_calculate_best_size(const bool accel_rt,
|
||||||
|
const int2 &image_size,
|
||||||
const int num_samples,
|
const int num_samples,
|
||||||
const int max_num_path_states,
|
const int max_num_path_states,
|
||||||
const float scrambling_distance);
|
const float scrambling_distance);
|
||||||
|
@@ -28,6 +28,11 @@ WorkTileScheduler::WorkTileScheduler()
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void WorkTileScheduler::set_accelerated_rt(bool accelerated_rt)
|
||||||
|
{
|
||||||
|
accelerated_rt_ = accelerated_rt;
|
||||||
|
}
|
||||||
|
|
||||||
void WorkTileScheduler::set_max_num_path_states(int max_num_path_states)
|
void WorkTileScheduler::set_max_num_path_states(int max_num_path_states)
|
||||||
{
|
{
|
||||||
max_num_path_states_ = max_num_path_states;
|
max_num_path_states_ = max_num_path_states;
|
||||||
@@ -59,7 +64,7 @@ void WorkTileScheduler::reset(const BufferParams &buffer_params,
|
|||||||
void WorkTileScheduler::reset_scheduler_state()
|
void WorkTileScheduler::reset_scheduler_state()
|
||||||
{
|
{
|
||||||
tile_size_ = tile_calculate_best_size(
|
tile_size_ = tile_calculate_best_size(
|
||||||
image_size_px_, samples_num_, max_num_path_states_, scrambling_distance_);
|
accelerated_rt_, image_size_px_, samples_num_, max_num_path_states_, scrambling_distance_);
|
||||||
|
|
||||||
VLOG(3) << "Will schedule tiles of size " << tile_size_;
|
VLOG(3) << "Will schedule tiles of size " << tile_size_;
|
||||||
|
|
||||||
|
@@ -31,6 +31,9 @@ class WorkTileScheduler {
|
|||||||
public:
|
public:
|
||||||
WorkTileScheduler();
|
WorkTileScheduler();
|
||||||
|
|
||||||
|
/* To indicate if there is accelerated RT support. */
|
||||||
|
void set_accelerated_rt(bool state);
|
||||||
|
|
||||||
/* MAximum path states which are allowed to be used by a single scheduled work tile.
|
/* MAximum path states which are allowed to be used by a single scheduled work tile.
|
||||||
*
|
*
|
||||||
* Affects the scheduled work size: the work size will be as big as possible, but will not exceed
|
* Affects the scheduled work size: the work size will be as big as possible, but will not exceed
|
||||||
@@ -54,6 +57,9 @@ class WorkTileScheduler {
|
|||||||
protected:
|
protected:
|
||||||
void reset_scheduler_state();
|
void reset_scheduler_state();
|
||||||
|
|
||||||
|
/* Used to indicate if there is accelerated ray tracing. */
|
||||||
|
bool accelerated_rt_ = false;
|
||||||
|
|
||||||
/* Maximum allowed path states to be used.
|
/* Maximum allowed path states to be used.
|
||||||
*
|
*
|
||||||
* TODO(sergey): Naming can be improved. The fact that this is a limiting factor based on the
|
* TODO(sergey): Naming can be improved. The fact that this is a limiting factor based on the
|
||||||
|
@@ -39,10 +39,6 @@ set(SRC_KERNEL_DEVICE_HIP
|
|||||||
device/hip/kernel.cpp
|
device/hip/kernel.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
set(SRC_KERNEL_DEVICE_METAL
|
|
||||||
device/metal/kernel.metal
|
|
||||||
)
|
|
||||||
|
|
||||||
set(SRC_KERNEL_DEVICE_OPTIX
|
set(SRC_KERNEL_DEVICE_OPTIX
|
||||||
device/optix/kernel.cu
|
device/optix/kernel.cu
|
||||||
device/optix/kernel_shader_raytrace.cu
|
device/optix/kernel_shader_raytrace.cu
|
||||||
@@ -83,13 +79,6 @@ set(SRC_KERNEL_DEVICE_OPTIX_HEADERS
|
|||||||
device/optix/globals.h
|
device/optix/globals.h
|
||||||
)
|
)
|
||||||
|
|
||||||
set(SRC_KERNEL_DEVICE_METAL_HEADERS
|
|
||||||
device/metal/compat.h
|
|
||||||
device/metal/context_begin.h
|
|
||||||
device/metal/context_end.h
|
|
||||||
device/metal/globals.h
|
|
||||||
)
|
|
||||||
|
|
||||||
set(SRC_KERNEL_CLOSURE_HEADERS
|
set(SRC_KERNEL_CLOSURE_HEADERS
|
||||||
closure/alloc.h
|
closure/alloc.h
|
||||||
closure/bsdf.h
|
closure/bsdf.h
|
||||||
@@ -576,6 +565,12 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
|||||||
set(name ${name}_experimental)
|
set(name ${name}_experimental)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(WITH_NANOVDB)
|
||||||
|
set(hip_flags ${hip_flags}
|
||||||
|
-D WITH_NANOVDB
|
||||||
|
-I "${NANOVDB_INCLUDE_DIR}")
|
||||||
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_DEBUG)
|
if(WITH_CYCLES_DEBUG)
|
||||||
set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__)
|
set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__)
|
||||||
endif()
|
endif()
|
||||||
@@ -734,14 +729,12 @@ cycles_add_library(cycles_kernel "${LIB}"
|
|||||||
${SRC_KERNEL_DEVICE_CUDA}
|
${SRC_KERNEL_DEVICE_CUDA}
|
||||||
${SRC_KERNEL_DEVICE_HIP}
|
${SRC_KERNEL_DEVICE_HIP}
|
||||||
${SRC_KERNEL_DEVICE_OPTIX}
|
${SRC_KERNEL_DEVICE_OPTIX}
|
||||||
${SRC_KERNEL_DEVICE_METAL}
|
|
||||||
${SRC_KERNEL_HEADERS}
|
${SRC_KERNEL_HEADERS}
|
||||||
${SRC_KERNEL_DEVICE_CPU_HEADERS}
|
${SRC_KERNEL_DEVICE_CPU_HEADERS}
|
||||||
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
||||||
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
|
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
|
||||||
${SRC_KERNEL_DEVICE_HIP_HEADERS}
|
${SRC_KERNEL_DEVICE_HIP_HEADERS}
|
||||||
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
|
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
|
||||||
${SRC_KERNEL_DEVICE_METAL_HEADERS}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
source_group("bake" FILES ${SRC_KERNEL_BAKE_HEADERS})
|
source_group("bake" FILES ${SRC_KERNEL_BAKE_HEADERS})
|
||||||
@@ -753,7 +746,6 @@ source_group("device\\cuda" FILES ${SRC_KERNEL_DEVICE_CUDA} ${SRC_KERNEL_DEVICE_
|
|||||||
source_group("device\\gpu" FILES ${SRC_KERNEL_DEVICE_GPU_HEADERS})
|
source_group("device\\gpu" FILES ${SRC_KERNEL_DEVICE_GPU_HEADERS})
|
||||||
source_group("device\\hip" FILES ${SRC_KERNEL_DEVICE_HIP} ${SRC_KERNEL_DEVICE_HIP_HEADERS})
|
source_group("device\\hip" FILES ${SRC_KERNEL_DEVICE_HIP} ${SRC_KERNEL_DEVICE_HIP_HEADERS})
|
||||||
source_group("device\\optix" FILES ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS})
|
source_group("device\\optix" FILES ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS})
|
||||||
source_group("device\\metal" FILES ${SRC_KERNEL_DEVICE_METAL} ${SRC_KERNEL_DEVICE_METAL_HEADERS})
|
|
||||||
source_group("film" FILES ${SRC_KERNEL_FILM_HEADERS})
|
source_group("film" FILES ${SRC_KERNEL_FILM_HEADERS})
|
||||||
source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS})
|
source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS})
|
||||||
source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS})
|
source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS})
|
||||||
@@ -786,8 +778,6 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP}" ${CYCLES_
|
|||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)
|
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)
|
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_FILM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/film)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_FILM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/film)
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom)
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_INTEGRATOR_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/integrator)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_INTEGRATOR_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/integrator)
|
||||||
|
@@ -438,7 +438,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
|
|||||||
if (label & LABEL_TRANSMIT) {
|
if (label & LABEL_TRANSMIT) {
|
||||||
float threshold_squared = kernel_data.background.transparent_roughness_squared_threshold;
|
float threshold_squared = kernel_data.background.transparent_roughness_squared_threshold;
|
||||||
|
|
||||||
if (threshold_squared >= 0.0f) {
|
if (threshold_squared >= 0.0f && !(label & LABEL_DIFFUSE)) {
|
||||||
if (bsdf_get_specular_roughness_squared(sc) <= threshold_squared) {
|
if (bsdf_get_specular_roughness_squared(sc) <= threshold_squared) {
|
||||||
label |= LABEL_TRANSMIT_TRANSPARENT;
|
label |= LABEL_TRANSMIT_TRANSPARENT;
|
||||||
}
|
}
|
||||||
|
@@ -37,7 +37,7 @@
|
|||||||
|
|
||||||
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_camera);
|
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_camera);
|
||||||
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_bake);
|
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_bake);
|
||||||
KERNEL_INTEGRATOR_FUNCTION(intersect_closest);
|
KERNEL_INTEGRATOR_SHADE_FUNCTION(intersect_closest);
|
||||||
KERNEL_INTEGRATOR_FUNCTION(intersect_shadow);
|
KERNEL_INTEGRATOR_FUNCTION(intersect_shadow);
|
||||||
KERNEL_INTEGRATOR_FUNCTION(intersect_subsurface);
|
KERNEL_INTEGRATOR_FUNCTION(intersect_subsurface);
|
||||||
KERNEL_INTEGRATOR_FUNCTION(intersect_volume_stack);
|
KERNEL_INTEGRATOR_FUNCTION(intersect_volume_stack);
|
||||||
|
@@ -112,7 +112,7 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
DEFINE_INTEGRATOR_INIT_KERNEL(init_from_camera)
|
DEFINE_INTEGRATOR_INIT_KERNEL(init_from_camera)
|
||||||
DEFINE_INTEGRATOR_INIT_KERNEL(init_from_bake)
|
DEFINE_INTEGRATOR_INIT_KERNEL(init_from_bake)
|
||||||
DEFINE_INTEGRATOR_KERNEL(intersect_closest)
|
DEFINE_INTEGRATOR_SHADE_KERNEL(intersect_closest)
|
||||||
DEFINE_INTEGRATOR_KERNEL(intersect_subsurface)
|
DEFINE_INTEGRATOR_KERNEL(intersect_subsurface)
|
||||||
DEFINE_INTEGRATOR_KERNEL(intersect_volume_stack)
|
DEFINE_INTEGRATOR_KERNEL(intersect_volume_stack)
|
||||||
DEFINE_INTEGRATOR_SHADE_KERNEL(shade_background)
|
DEFINE_INTEGRATOR_SHADE_KERNEL(shade_background)
|
||||||
|
@@ -75,7 +75,6 @@ typedef unsigned long long uint64_t;
|
|||||||
#define ccl_gpu_block_idx_x (blockIdx.x)
|
#define ccl_gpu_block_idx_x (blockIdx.x)
|
||||||
#define ccl_gpu_grid_dim_x (gridDim.x)
|
#define ccl_gpu_grid_dim_x (gridDim.x)
|
||||||
#define ccl_gpu_warp_size (warpSize)
|
#define ccl_gpu_warp_size (warpSize)
|
||||||
#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
|
|
||||||
|
|
||||||
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
||||||
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
||||||
|
@@ -92,29 +92,12 @@
|
|||||||
|
|
||||||
/* Compute number of threads per block and minimum blocks per multiprocessor
|
/* Compute number of threads per block and minimum blocks per multiprocessor
|
||||||
* given the maximum number of registers per thread. */
|
* given the maximum number of registers per thread. */
|
||||||
|
|
||||||
#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
|
#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
|
||||||
extern "C" __global__ void __launch_bounds__(block_num_threads, \
|
extern "C" __global__ void __launch_bounds__(block_num_threads, \
|
||||||
GPU_MULTIPRESSOR_MAX_REGISTERS / \
|
GPU_MULTIPRESSOR_MAX_REGISTERS / \
|
||||||
(block_num_threads * thread_num_registers))
|
(block_num_threads * thread_num_registers))
|
||||||
|
|
||||||
#define ccl_gpu_kernel_threads(block_num_threads) \
|
|
||||||
extern "C" __global__ void __launch_bounds__(block_num_threads)
|
|
||||||
|
|
||||||
#define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__)
|
|
||||||
|
|
||||||
#define ccl_gpu_kernel_call(x) x
|
|
||||||
|
|
||||||
/* Define a function object where "func" is the lambda body, and additional parameters are used to
|
|
||||||
* specify captured state */
|
|
||||||
#define ccl_gpu_kernel_lambda(func, ...) \
|
|
||||||
struct KernelLambda { \
|
|
||||||
__VA_ARGS__; \
|
|
||||||
__device__ int operator()(const int state) \
|
|
||||||
{ \
|
|
||||||
return (func); \
|
|
||||||
} \
|
|
||||||
} ccl_gpu_kernel_lambda_pass
|
|
||||||
|
|
||||||
/* sanity checks */
|
/* sanity checks */
|
||||||
|
|
||||||
#if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
|
#if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
|
||||||
|
@@ -65,9 +65,7 @@ ccl_device float cubic_h1(float a)
|
|||||||
|
|
||||||
/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
|
/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
|
||||||
template<typename T>
|
template<typename T>
|
||||||
ccl_device_noinline T kernel_tex_image_interp_bicubic(ccl_global const TextureInfo &info,
|
ccl_device_noinline T kernel_tex_image_interp_bicubic(const TextureInfo &info, float x, float y)
|
||||||
float x,
|
|
||||||
float y)
|
|
||||||
{
|
{
|
||||||
ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data;
|
ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data;
|
||||||
|
|
||||||
@@ -96,7 +94,7 @@ ccl_device_noinline T kernel_tex_image_interp_bicubic(ccl_global const TextureIn
|
|||||||
/* Fast tricubic texture lookup using 8 trilinear lookups. */
|
/* Fast tricubic texture lookup using 8 trilinear lookups. */
|
||||||
template<typename T>
|
template<typename T>
|
||||||
ccl_device_noinline T
|
ccl_device_noinline T
|
||||||
kernel_tex_image_interp_tricubic(ccl_global const TextureInfo &info, float x, float y, float z)
|
kernel_tex_image_interp_tricubic(const TextureInfo &info, float x, float y, float z)
|
||||||
{
|
{
|
||||||
ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data;
|
ccl_gpu_tex_object tex = (ccl_gpu_tex_object)info.data;
|
||||||
|
|
||||||
@@ -171,7 +169,7 @@ ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, fl
|
|||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
ccl_device_noinline T kernel_tex_image_interp_nanovdb(
|
ccl_device_noinline T kernel_tex_image_interp_nanovdb(
|
||||||
ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
|
const TextureInfo &info, float x, float y, float z, uint interpolation)
|
||||||
{
|
{
|
||||||
using namespace nanovdb;
|
using namespace nanovdb;
|
||||||
|
|
||||||
@@ -193,7 +191,7 @@ ccl_device_noinline T kernel_tex_image_interp_nanovdb(
|
|||||||
|
|
||||||
ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y)
|
ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y)
|
||||||
{
|
{
|
||||||
ccl_global const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
|
const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
|
||||||
|
|
||||||
/* float4, byte4, ushort4 and half4 */
|
/* float4, byte4, ushort4 and half4 */
|
||||||
const int texture_type = info.data_type;
|
const int texture_type = info.data_type;
|
||||||
@@ -228,7 +226,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
|
|||||||
float3 P,
|
float3 P,
|
||||||
InterpolationType interp)
|
InterpolationType interp)
|
||||||
{
|
{
|
||||||
ccl_global const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
|
const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
|
||||||
|
|
||||||
if (info.use_transform_3d) {
|
if (info.use_transform_3d) {
|
||||||
P = transform_point(&info.transform_3d, P);
|
P = transform_point(&info.transform_3d, P);
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -31,43 +31,10 @@ CCL_NAMESPACE_BEGIN
|
|||||||
# define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 512
|
# define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 512
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __KERNEL_METAL__
|
|
||||||
struct ActiveIndexContext {
|
|
||||||
ActiveIndexContext(int _thread_index,
|
|
||||||
int _global_index,
|
|
||||||
int _threadgroup_size,
|
|
||||||
int _simdgroup_size,
|
|
||||||
int _simd_lane_index,
|
|
||||||
int _simd_group_index,
|
|
||||||
int _num_simd_groups,
|
|
||||||
threadgroup int *_simdgroup_offset)
|
|
||||||
: thread_index(_thread_index),
|
|
||||||
global_index(_global_index),
|
|
||||||
blocksize(_threadgroup_size),
|
|
||||||
ccl_gpu_warp_size(_simdgroup_size),
|
|
||||||
thread_warp(_simd_lane_index),
|
|
||||||
warp_index(_simd_group_index),
|
|
||||||
num_warps(_num_simd_groups),
|
|
||||||
warp_offset(_simdgroup_offset)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
const int thread_index, global_index, blocksize, ccl_gpu_warp_size, thread_warp, warp_index,
|
|
||||||
num_warps;
|
|
||||||
threadgroup int *warp_offset;
|
|
||||||
|
|
||||||
template<uint blocksizeDummy, typename IsActiveOp>
|
|
||||||
void active_index_array(const uint num_states,
|
|
||||||
ccl_global int *indices,
|
|
||||||
ccl_global int *num_indices,
|
|
||||||
IsActiveOp is_active_op)
|
|
||||||
{
|
|
||||||
const uint state_index = global_index;
|
|
||||||
#else
|
|
||||||
template<uint blocksize, typename IsActiveOp>
|
template<uint blocksize, typename IsActiveOp>
|
||||||
__device__ void gpu_parallel_active_index_array(const uint num_states,
|
__device__ void gpu_parallel_active_index_array(const uint num_states,
|
||||||
ccl_global int *indices,
|
int *indices,
|
||||||
ccl_global int *num_indices,
|
int *num_indices,
|
||||||
IsActiveOp is_active_op)
|
IsActiveOp is_active_op)
|
||||||
{
|
{
|
||||||
extern ccl_gpu_shared int warp_offset[];
|
extern ccl_gpu_shared int warp_offset[];
|
||||||
@@ -78,62 +45,43 @@ __device__ void gpu_parallel_active_index_array(const uint num_states,
|
|||||||
const uint warp_index = thread_index / ccl_gpu_warp_size;
|
const uint warp_index = thread_index / ccl_gpu_warp_size;
|
||||||
const uint num_warps = blocksize / ccl_gpu_warp_size;
|
const uint num_warps = blocksize / ccl_gpu_warp_size;
|
||||||
|
|
||||||
|
/* Test if state corresponding to this thread is active. */
|
||||||
const uint state_index = ccl_gpu_block_idx_x * blocksize + thread_index;
|
const uint state_index = ccl_gpu_block_idx_x * blocksize + thread_index;
|
||||||
#endif
|
const uint is_active = (state_index < num_states) ? is_active_op(state_index) : 0;
|
||||||
|
|
||||||
/* Test if state corresponding to this thread is active. */
|
/* For each thread within a warp compute how many other active states precede it. */
|
||||||
const uint is_active = (state_index < num_states) ? is_active_op(state_index) : 0;
|
const uint thread_mask = 0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp);
|
||||||
|
const uint thread_offset = ccl_gpu_popc(ccl_gpu_ballot(is_active) & thread_mask);
|
||||||
|
|
||||||
/* For each thread within a warp compute how many other active states precede it. */
|
/* Last thread in warp stores number of active states for each warp. */
|
||||||
const uint thread_offset = ccl_gpu_popc(ccl_gpu_ballot(is_active) &
|
if (thread_warp == ccl_gpu_warp_size - 1) {
|
||||||
ccl_gpu_thread_mask(thread_warp));
|
warp_offset[warp_index] = thread_offset + is_active;
|
||||||
|
|
||||||
/* Last thread in warp stores number of active states for each warp. */
|
|
||||||
if (thread_warp == ccl_gpu_warp_size - 1) {
|
|
||||||
warp_offset[warp_index] = thread_offset + is_active;
|
|
||||||
}
|
|
||||||
|
|
||||||
ccl_gpu_syncthreads();
|
|
||||||
|
|
||||||
/* Last thread in block converts per-warp sizes to offsets, increments global size of
|
|
||||||
* index array and gets offset to write to. */
|
|
||||||
if (thread_index == blocksize - 1) {
|
|
||||||
/* TODO: parallelize this. */
|
|
||||||
int offset = 0;
|
|
||||||
for (int i = 0; i < num_warps; i++) {
|
|
||||||
int num_active = warp_offset[i];
|
|
||||||
warp_offset[i] = offset;
|
|
||||||
offset += num_active;
|
|
||||||
}
|
|
||||||
|
|
||||||
const uint block_num_active = warp_offset[warp_index] + thread_offset + is_active;
|
|
||||||
warp_offset[num_warps] = atomic_fetch_and_add_uint32(num_indices, block_num_active);
|
|
||||||
}
|
|
||||||
|
|
||||||
ccl_gpu_syncthreads();
|
|
||||||
|
|
||||||
/* Write to index array. */
|
|
||||||
if (is_active) {
|
|
||||||
const uint block_offset = warp_offset[num_warps];
|
|
||||||
indices[block_offset + warp_offset[warp_index] + thread_offset] = state_index;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __KERNEL_METAL__
|
ccl_gpu_syncthreads();
|
||||||
}; /* end class ActiveIndexContext */
|
|
||||||
|
|
||||||
/* inject the required thread params into a struct, and redirect to its templated member function
|
/* Last thread in block converts per-warp sizes to offsets, increments global size of
|
||||||
*/
|
* index array and gets offset to write to. */
|
||||||
# define gpu_parallel_active_index_array \
|
if (thread_index == blocksize - 1) {
|
||||||
ActiveIndexContext(metal_local_id, \
|
/* TODO: parallelize this. */
|
||||||
metal_global_id, \
|
int offset = 0;
|
||||||
metal_local_size, \
|
for (int i = 0; i < num_warps; i++) {
|
||||||
simdgroup_size, \
|
int num_active = warp_offset[i];
|
||||||
simd_lane_index, \
|
warp_offset[i] = offset;
|
||||||
simd_group_index, \
|
offset += num_active;
|
||||||
num_simd_groups, \
|
}
|
||||||
simdgroup_offset) \
|
|
||||||
.active_index_array
|
const uint block_num_active = warp_offset[warp_index] + thread_offset + is_active;
|
||||||
#endif
|
warp_offset[num_warps] = atomic_fetch_and_add_uint32(num_indices, block_num_active);
|
||||||
|
}
|
||||||
|
|
||||||
|
ccl_gpu_syncthreads();
|
||||||
|
|
||||||
|
/* Write to index array. */
|
||||||
|
if (is_active) {
|
||||||
|
const uint block_offset = warp_offset[num_warps];
|
||||||
|
indices[block_offset + warp_offset[warp_index] + thread_offset] = state_index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -33,12 +33,10 @@ CCL_NAMESPACE_BEGIN
|
|||||||
# define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512
|
# define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__device__ void gpu_parallel_prefix_sum(const int global_id,
|
template<uint blocksize>
|
||||||
ccl_global int *counter,
|
__device__ void gpu_parallel_prefix_sum(int *counter, int *prefix_sum, const int num_values)
|
||||||
ccl_global int *prefix_sum,
|
|
||||||
const int num_values)
|
|
||||||
{
|
{
|
||||||
if (global_id != 0) {
|
if (!(ccl_gpu_block_idx_x == 0 && ccl_gpu_thread_idx_x == 0)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -33,16 +33,16 @@ CCL_NAMESPACE_BEGIN
|
|||||||
#endif
|
#endif
|
||||||
#define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY (~0)
|
#define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY (~0)
|
||||||
|
|
||||||
template<typename GetKeyOp>
|
template<uint blocksize, typename GetKeyOp>
|
||||||
__device__ void gpu_parallel_sorted_index_array(const uint state_index,
|
__device__ void gpu_parallel_sorted_index_array(const uint num_states,
|
||||||
const uint num_states,
|
|
||||||
const int num_states_limit,
|
const int num_states_limit,
|
||||||
ccl_global int *indices,
|
int *indices,
|
||||||
ccl_global int *num_indices,
|
int *num_indices,
|
||||||
ccl_global int *key_counter,
|
int *key_counter,
|
||||||
ccl_global int *key_prefix_sum,
|
int *key_prefix_sum,
|
||||||
GetKeyOp get_key_op)
|
GetKeyOp get_key_op)
|
||||||
{
|
{
|
||||||
|
const uint state_index = ccl_gpu_block_idx_x * blocksize + ccl_gpu_thread_idx_x;
|
||||||
const int key = (state_index < num_states) ? get_key_op(state_index) :
|
const int key = (state_index < num_states) ? get_key_op(state_index) :
|
||||||
GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY;
|
GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY;
|
||||||
|
|
||||||
|
@@ -29,17 +29,20 @@ ccl_device_inline void get_work_pixel(ccl_global const KernelWorkTile *tile,
|
|||||||
ccl_private uint *y,
|
ccl_private uint *y,
|
||||||
ccl_private uint *sample)
|
ccl_private uint *sample)
|
||||||
{
|
{
|
||||||
#if 0
|
uint sample_offset, pixel_offset;
|
||||||
/* Keep threads for the same sample together. */
|
|
||||||
uint tile_pixels = tile->w * tile->h;
|
if (kernel_data.integrator.scrambling_distance < 0.9f) {
|
||||||
uint sample_offset = global_work_index / tile_pixels;
|
/* Keep threads for the same sample together. */
|
||||||
uint pixel_offset = global_work_index - sample_offset * tile_pixels;
|
uint tile_pixels = tile->w * tile->h;
|
||||||
#else
|
sample_offset = global_work_index / tile_pixels;
|
||||||
/* Keeping threads for the same pixel together.
|
pixel_offset = global_work_index - sample_offset * tile_pixels;
|
||||||
* Appears to improve performance by a few % on CUDA and OptiX. */
|
}
|
||||||
uint sample_offset = global_work_index % tile->num_samples;
|
else {
|
||||||
uint pixel_offset = global_work_index / tile->num_samples;
|
/* Keeping threads for the same pixel together.
|
||||||
#endif
|
* Appears to improve performance by a few % on CUDA and OptiX. */
|
||||||
|
sample_offset = global_work_index % tile->num_samples;
|
||||||
|
pixel_offset = global_work_index / tile->num_samples;
|
||||||
|
}
|
||||||
|
|
||||||
uint y_offset = pixel_offset / tile->w;
|
uint y_offset = pixel_offset / tile->w;
|
||||||
uint x_offset = pixel_offset - y_offset * tile->w;
|
uint x_offset = pixel_offset - y_offset * tile->w;
|
||||||
|
@@ -74,7 +74,6 @@ typedef unsigned long long uint64_t;
|
|||||||
#define ccl_gpu_block_idx_x (blockIdx.x)
|
#define ccl_gpu_block_idx_x (blockIdx.x)
|
||||||
#define ccl_gpu_grid_dim_x (gridDim.x)
|
#define ccl_gpu_grid_dim_x (gridDim.x)
|
||||||
#define ccl_gpu_warp_size (warpSize)
|
#define ccl_gpu_warp_size (warpSize)
|
||||||
#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
|
|
||||||
|
|
||||||
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
||||||
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
||||||
|
@@ -35,29 +35,12 @@
|
|||||||
|
|
||||||
/* Compute number of threads per block and minimum blocks per multiprocessor
|
/* Compute number of threads per block and minimum blocks per multiprocessor
|
||||||
* given the maximum number of registers per thread. */
|
* given the maximum number of registers per thread. */
|
||||||
|
|
||||||
#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
|
#define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
|
||||||
extern "C" __global__ void __launch_bounds__(block_num_threads, \
|
extern "C" __global__ void __launch_bounds__(block_num_threads, \
|
||||||
GPU_MULTIPRESSOR_MAX_REGISTERS / \
|
GPU_MULTIPRESSOR_MAX_REGISTERS / \
|
||||||
(block_num_threads * thread_num_registers))
|
(block_num_threads * thread_num_registers))
|
||||||
|
|
||||||
#define ccl_gpu_kernel_threads(block_num_threads) \
|
|
||||||
extern "C" __global__ void __launch_bounds__(block_num_threads)
|
|
||||||
|
|
||||||
#define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__)
|
|
||||||
|
|
||||||
#define ccl_gpu_kernel_call(x) x
|
|
||||||
|
|
||||||
/* Define a function object where "func" is the lambda body, and additional parameters are used to
|
|
||||||
* specify captured state */
|
|
||||||
#define ccl_gpu_kernel_lambda(func, ...) \
|
|
||||||
struct KernelLambda { \
|
|
||||||
__VA_ARGS__; \
|
|
||||||
__device__ int operator()(const int state) \
|
|
||||||
{ \
|
|
||||||
return (func); \
|
|
||||||
} \
|
|
||||||
} ccl_gpu_kernel_lambda_pass
|
|
||||||
|
|
||||||
/* sanity checks */
|
/* sanity checks */
|
||||||
|
|
||||||
#if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
|
#if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
|
||||||
|
@@ -58,96 +58,6 @@ using namespace metal;
|
|||||||
|
|
||||||
#define kernel_assert(cond)
|
#define kernel_assert(cond)
|
||||||
|
|
||||||
#define ccl_gpu_global_id_x() metal_global_id
|
|
||||||
#define ccl_gpu_warp_size simdgroup_size
|
|
||||||
#define ccl_gpu_thread_idx_x simd_group_index
|
|
||||||
#define ccl_gpu_thread_mask(thread_warp) uint64_t((1ull << thread_warp) - 1)
|
|
||||||
|
|
||||||
#define ccl_gpu_ballot(predicate) ((uint64_t)((simd_vote::vote_t)simd_ballot(predicate)))
|
|
||||||
#define ccl_gpu_popc(x) popcount(x)
|
|
||||||
|
|
||||||
// clang-format off
|
|
||||||
|
|
||||||
/* kernel.h adapters */
|
|
||||||
|
|
||||||
#define ccl_gpu_kernel(block_num_threads, thread_num_registers)
|
|
||||||
#define ccl_gpu_kernel_threads(block_num_threads)
|
|
||||||
|
|
||||||
/* convert a comma-separated list into a semicolon-separated list (so that we can generate a struct based on kernel entrypoint parameters) */
|
|
||||||
#define FN0()
|
|
||||||
#define FN1(p1) p1;
|
|
||||||
#define FN2(p1, p2) p1; p2;
|
|
||||||
#define FN3(p1, p2, p3) p1; p2; p3;
|
|
||||||
#define FN4(p1, p2, p3, p4) p1; p2; p3; p4;
|
|
||||||
#define FN5(p1, p2, p3, p4, p5) p1; p2; p3; p4; p5;
|
|
||||||
#define FN6(p1, p2, p3, p4, p5, p6) p1; p2; p3; p4; p5; p6;
|
|
||||||
#define FN7(p1, p2, p3, p4, p5, p6, p7) p1; p2; p3; p4; p5; p6; p7;
|
|
||||||
#define FN8(p1, p2, p3, p4, p5, p6, p7, p8) p1; p2; p3; p4; p5; p6; p7; p8;
|
|
||||||
#define FN9(p1, p2, p3, p4, p5, p6, p7, p8, p9) p1; p2; p3; p4; p5; p6; p7; p8; p9;
|
|
||||||
#define FN10(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10;
|
|
||||||
#define FN11(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11;
|
|
||||||
#define FN12(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12;
|
|
||||||
#define FN13(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13;
|
|
||||||
#define FN14(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14;
|
|
||||||
#define FN15(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14; p15;
|
|
||||||
#define FN16(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16) p1; p2; p3; p4; p5; p6; p7; p8; p9; p10; p11; p12; p13; p14; p15; p16;
|
|
||||||
#define GET_LAST_ARG(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, ...) p16
|
|
||||||
#define PARAMS_MAKER(...) GET_LAST_ARG(__VA_ARGS__, FN16, FN15, FN14, FN13, FN12, FN11, FN10, FN9, FN8, FN7, FN6, FN5, FN4, FN3, FN2, FN1, FN0)
|
|
||||||
|
|
||||||
/* generate a struct containing the entrypoint parameters and a "run" method which can access them implicitly via this-> */
|
|
||||||
#define ccl_gpu_kernel_signature(name, ...) \
|
|
||||||
struct kernel_gpu_##name \
|
|
||||||
{ \
|
|
||||||
PARAMS_MAKER(__VA_ARGS__)(__VA_ARGS__) \
|
|
||||||
void run(thread MetalKernelContext& context, \
|
|
||||||
threadgroup int *simdgroup_offset, \
|
|
||||||
const uint metal_global_id, \
|
|
||||||
const ushort metal_local_id, \
|
|
||||||
const ushort metal_local_size, \
|
|
||||||
uint simdgroup_size, \
|
|
||||||
uint simd_lane_index, \
|
|
||||||
uint simd_group_index, \
|
|
||||||
uint num_simd_groups) ccl_global const; \
|
|
||||||
}; \
|
|
||||||
kernel void kernel_metal_##name(device const kernel_gpu_##name *params_struct, \
|
|
||||||
constant KernelParamsMetal &ccl_restrict _launch_params_metal, \
|
|
||||||
constant MetalAncillaries *_metal_ancillaries, \
|
|
||||||
threadgroup int *simdgroup_offset[[ threadgroup(0) ]], \
|
|
||||||
const uint metal_global_id [[thread_position_in_grid]], \
|
|
||||||
const ushort metal_local_id [[thread_position_in_threadgroup]], \
|
|
||||||
const ushort metal_local_size [[threads_per_threadgroup]], \
|
|
||||||
uint simdgroup_size [[threads_per_simdgroup]], \
|
|
||||||
uint simd_lane_index [[thread_index_in_simdgroup]], \
|
|
||||||
uint simd_group_index [[simdgroup_index_in_threadgroup]], \
|
|
||||||
uint num_simd_groups [[simdgroups_per_threadgroup]]) { \
|
|
||||||
MetalKernelContext context(_launch_params_metal, _metal_ancillaries); \
|
|
||||||
INIT_DEBUG_BUFFER \
|
|
||||||
params_struct->run(context, simdgroup_offset, metal_global_id, metal_local_id, metal_local_size, simdgroup_size, simd_lane_index, simd_group_index, num_simd_groups); \
|
|
||||||
} \
|
|
||||||
void kernel_gpu_##name::run(thread MetalKernelContext& context, \
|
|
||||||
threadgroup int *simdgroup_offset, \
|
|
||||||
const uint metal_global_id, \
|
|
||||||
const ushort metal_local_id, \
|
|
||||||
const ushort metal_local_size, \
|
|
||||||
uint simdgroup_size, \
|
|
||||||
uint simd_lane_index, \
|
|
||||||
uint simd_group_index, \
|
|
||||||
uint num_simd_groups) ccl_global const
|
|
||||||
|
|
||||||
#define ccl_gpu_kernel_call(x) context.x
|
|
||||||
|
|
||||||
/* define a function object where "func" is the lambda body, and additional parameters are used to specify captured state */
|
|
||||||
#define ccl_gpu_kernel_lambda(func, ...) \
|
|
||||||
struct KernelLambda \
|
|
||||||
{ \
|
|
||||||
KernelLambda(ccl_private MetalKernelContext &_context) : context(_context) {} \
|
|
||||||
ccl_private MetalKernelContext &context; \
|
|
||||||
__VA_ARGS__; \
|
|
||||||
int operator()(const int state) const { return (func); } \
|
|
||||||
} ccl_gpu_kernel_lambda_pass(context)
|
|
||||||
|
|
||||||
// clang-format on
|
|
||||||
|
|
||||||
/* make_type definitions with Metal style element initializers */
|
/* make_type definitions with Metal style element initializers */
|
||||||
#ifdef make_float2
|
#ifdef make_float2
|
||||||
# undef make_float2
|
# undef make_float2
|
||||||
@@ -214,38 +124,3 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
|
|||||||
#define logf(x) trigmode::log(float(x))
|
#define logf(x) trigmode::log(float(x))
|
||||||
|
|
||||||
#define NULL 0
|
#define NULL 0
|
||||||
|
|
||||||
/* texture bindings and sampler setup */
|
|
||||||
|
|
||||||
struct Texture2DParamsMetal {
|
|
||||||
texture2d<float, access::sample> tex;
|
|
||||||
};
|
|
||||||
struct Texture3DParamsMetal {
|
|
||||||
texture3d<float, access::sample> tex;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct MetalAncillaries {
|
|
||||||
device Texture2DParamsMetal *textures_2d;
|
|
||||||
device Texture3DParamsMetal *textures_3d;
|
|
||||||
};
|
|
||||||
|
|
||||||
enum SamplerType {
|
|
||||||
SamplerFilterNearest_AddressRepeat,
|
|
||||||
SamplerFilterNearest_AddressClampEdge,
|
|
||||||
SamplerFilterNearest_AddressClampZero,
|
|
||||||
|
|
||||||
SamplerFilterLinear_AddressRepeat,
|
|
||||||
SamplerFilterLinear_AddressClampEdge,
|
|
||||||
SamplerFilterLinear_AddressClampZero,
|
|
||||||
|
|
||||||
SamplerCount
|
|
||||||
};
|
|
||||||
|
|
||||||
constant constexpr array<sampler, SamplerCount> metal_samplers = {
|
|
||||||
sampler(address::repeat, filter::nearest),
|
|
||||||
sampler(address::clamp_to_edge, filter::nearest),
|
|
||||||
sampler(address::clamp_to_zero, filter::nearest),
|
|
||||||
sampler(address::repeat, filter::linear),
|
|
||||||
sampler(address::clamp_to_edge, filter::linear),
|
|
||||||
sampler(address::clamp_to_zero, filter::linear),
|
|
||||||
};
|
|
||||||
|
@@ -1,79 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2021 Blender Foundation
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
// clang-format off
|
|
||||||
|
|
||||||
/* Open the Metal kernel context class
|
|
||||||
* Necessary to access resource bindings */
|
|
||||||
class MetalKernelContext {
|
|
||||||
public:
|
|
||||||
constant KernelParamsMetal &launch_params_metal;
|
|
||||||
constant MetalAncillaries *metal_ancillaries;
|
|
||||||
|
|
||||||
MetalKernelContext(constant KernelParamsMetal &_launch_params_metal, constant MetalAncillaries * _metal_ancillaries)
|
|
||||||
: launch_params_metal(_launch_params_metal), metal_ancillaries(_metal_ancillaries)
|
|
||||||
{}
|
|
||||||
|
|
||||||
/* texture fetch adapter functions */
|
|
||||||
typedef uint64_t ccl_gpu_tex_object;
|
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
inline __attribute__((__always_inline__))
|
|
||||||
T ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object tex, float x, float y) const {
|
|
||||||
kernel_assert(0);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
template<typename T>
|
|
||||||
inline __attribute__((__always_inline__))
|
|
||||||
T ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object tex, float x, float y, float z) const {
|
|
||||||
kernel_assert(0);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// texture2d
|
|
||||||
template<>
|
|
||||||
inline __attribute__((__always_inline__))
|
|
||||||
float4 ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object tex, float x, float y) const {
|
|
||||||
const uint tid(tex);
|
|
||||||
const uint sid(tex >> 32);
|
|
||||||
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y));
|
|
||||||
}
|
|
||||||
template<>
|
|
||||||
inline __attribute__((__always_inline__))
|
|
||||||
float ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object tex, float x, float y) const {
|
|
||||||
const uint tid(tex);
|
|
||||||
const uint sid(tex >> 32);
|
|
||||||
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y)).x;
|
|
||||||
}
|
|
||||||
|
|
||||||
// texture3d
|
|
||||||
template<>
|
|
||||||
inline __attribute__((__always_inline__))
|
|
||||||
float4 ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object tex, float x, float y, float z) const {
|
|
||||||
const uint tid(tex);
|
|
||||||
const uint sid(tex >> 32);
|
|
||||||
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z));
|
|
||||||
}
|
|
||||||
template<>
|
|
||||||
inline __attribute__((__always_inline__))
|
|
||||||
float ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object tex, float x, float y, float z) const {
|
|
||||||
const uint tid(tex);
|
|
||||||
const uint sid(tex >> 32);
|
|
||||||
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z)).x;
|
|
||||||
}
|
|
||||||
# include "kernel/device/gpu/image.h"
|
|
||||||
|
|
||||||
// clang-format on
|
|
@@ -1,23 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2021 Blender Foundation
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
; /* end of MetalKernelContext class definition */
|
|
||||||
|
|
||||||
/* Silently redirect into the MetalKernelContext instance */
|
|
||||||
/* NOTE: These macros will need maintaining as entrypoints change */
|
|
||||||
|
|
||||||
#undef kernel_integrator_state
|
|
||||||
#define kernel_integrator_state context.launch_params_metal.__integrator_state
|
|
@@ -1,51 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2021 Blender Foundation
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Constant Globals */
|
|
||||||
|
|
||||||
#include "kernel/types.h"
|
|
||||||
#include "kernel/util/profiling.h"
|
|
||||||
|
|
||||||
#include "kernel/integrator/state.h"
|
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
|
||||||
|
|
||||||
typedef struct KernelParamsMetal {
|
|
||||||
|
|
||||||
#define KERNEL_TEX(type, name) ccl_constant type *name;
|
|
||||||
#include "kernel/textures.h"
|
|
||||||
#undef KERNEL_TEX
|
|
||||||
|
|
||||||
const IntegratorStateGPU __integrator_state;
|
|
||||||
const KernelData data;
|
|
||||||
|
|
||||||
} KernelParamsMetal;
|
|
||||||
|
|
||||||
typedef struct KernelGlobalsGPU {
|
|
||||||
int unused[1];
|
|
||||||
} KernelGlobalsGPU;
|
|
||||||
|
|
||||||
typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
|
|
||||||
|
|
||||||
#define kernel_data launch_params_metal.data
|
|
||||||
#define kernel_integrator_state launch_params_metal.__integrator_state
|
|
||||||
|
|
||||||
/* data lookup defines */
|
|
||||||
|
|
||||||
#define kernel_tex_fetch(tex, index) launch_params_metal.tex[index]
|
|
||||||
#define kernel_tex_array(tex) launch_params_metal.tex
|
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
|
@@ -1,25 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2021 Blender Foundation
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Metal kernel entry points */
|
|
||||||
|
|
||||||
// clang-format off
|
|
||||||
|
|
||||||
#include "kernel/device/metal/compat.h"
|
|
||||||
#include "kernel/device/metal/globals.h"
|
|
||||||
#include "kernel/device/gpu/kernel.h"
|
|
||||||
|
|
||||||
// clang-format on
|
|
@@ -76,7 +76,6 @@ typedef unsigned long long uint64_t;
|
|||||||
#define ccl_gpu_block_idx_x (blockIdx.x)
|
#define ccl_gpu_block_idx_x (blockIdx.x)
|
||||||
#define ccl_gpu_grid_dim_x (gridDim.x)
|
#define ccl_gpu_grid_dim_x (gridDim.x)
|
||||||
#define ccl_gpu_warp_size (warpSize)
|
#define ccl_gpu_warp_size (warpSize)
|
||||||
#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
|
|
||||||
|
|
||||||
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
||||||
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
||||||
|
@@ -57,7 +57,7 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest()
|
|||||||
const int global_index = optixGetLaunchIndex().x;
|
const int global_index = optixGetLaunchIndex().x;
|
||||||
const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
|
const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
|
||||||
global_index;
|
global_index;
|
||||||
integrator_intersect_closest(nullptr, path_index);
|
integrator_intersect_closest(nullptr, path_index, __params.render_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow()
|
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow()
|
||||||
|
@@ -33,62 +33,72 @@ CCL_NAMESPACE_BEGIN
|
|||||||
* them separately. */
|
* them separately. */
|
||||||
|
|
||||||
ccl_device_inline void bsdf_eval_init(ccl_private BsdfEval *eval,
|
ccl_device_inline void bsdf_eval_init(ccl_private BsdfEval *eval,
|
||||||
const bool is_diffuse,
|
const ClosureType closure_type,
|
||||||
float3 value)
|
float3 value)
|
||||||
{
|
{
|
||||||
eval->diffuse = zero_float3();
|
eval->diffuse = zero_float3();
|
||||||
eval->glossy = zero_float3();
|
eval->glossy = zero_float3();
|
||||||
|
|
||||||
if (is_diffuse) {
|
if (CLOSURE_IS_BSDF_DIFFUSE(closure_type)) {
|
||||||
eval->diffuse = value;
|
eval->diffuse = value;
|
||||||
}
|
}
|
||||||
else {
|
else if (CLOSURE_IS_BSDF_GLOSSY(closure_type)) {
|
||||||
eval->glossy = value;
|
eval->glossy = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
eval->sum = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_inline void bsdf_eval_accum(ccl_private BsdfEval *eval,
|
ccl_device_inline void bsdf_eval_accum(ccl_private BsdfEval *eval,
|
||||||
const bool is_diffuse,
|
const ClosureType closure_type,
|
||||||
float3 value,
|
float3 value)
|
||||||
float mis_weight)
|
|
||||||
{
|
{
|
||||||
value *= mis_weight;
|
if (CLOSURE_IS_BSDF_DIFFUSE(closure_type)) {
|
||||||
|
|
||||||
if (is_diffuse) {
|
|
||||||
eval->diffuse += value;
|
eval->diffuse += value;
|
||||||
}
|
}
|
||||||
else {
|
else if (CLOSURE_IS_BSDF_GLOSSY(closure_type)) {
|
||||||
eval->glossy += value;
|
eval->glossy += value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
eval->sum += value;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_inline bool bsdf_eval_is_zero(ccl_private BsdfEval *eval)
|
ccl_device_inline bool bsdf_eval_is_zero(ccl_private BsdfEval *eval)
|
||||||
{
|
{
|
||||||
return is_zero(eval->diffuse) && is_zero(eval->glossy);
|
return is_zero(eval->sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_inline void bsdf_eval_mul(ccl_private BsdfEval *eval, float value)
|
ccl_device_inline void bsdf_eval_mul(ccl_private BsdfEval *eval, float value)
|
||||||
{
|
{
|
||||||
eval->diffuse *= value;
|
eval->diffuse *= value;
|
||||||
eval->glossy *= value;
|
eval->glossy *= value;
|
||||||
|
eval->sum *= value;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_inline void bsdf_eval_mul3(ccl_private BsdfEval *eval, float3 value)
|
ccl_device_inline void bsdf_eval_mul3(ccl_private BsdfEval *eval, float3 value)
|
||||||
{
|
{
|
||||||
eval->diffuse *= value;
|
eval->diffuse *= value;
|
||||||
eval->glossy *= value;
|
eval->glossy *= value;
|
||||||
|
eval->sum *= value;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_inline float3 bsdf_eval_sum(ccl_private const BsdfEval *eval)
|
ccl_device_inline float3 bsdf_eval_sum(ccl_private const BsdfEval *eval)
|
||||||
{
|
{
|
||||||
return eval->diffuse + eval->glossy;
|
return eval->sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(ccl_private const BsdfEval *eval)
|
ccl_device_inline float3 bsdf_eval_pass_diffuse_weight(ccl_private const BsdfEval *eval)
|
||||||
{
|
{
|
||||||
/* Ratio of diffuse and glossy to recover proportions for writing to render pass.
|
/* Ratio of diffuse weight to recover proportions for writing to render pass.
|
||||||
* We assume reflection, transmission and volume scatter to be exclusive. */
|
* We assume reflection, transmission and volume scatter to be exclusive. */
|
||||||
return safe_divide_float3_float3(eval->diffuse, eval->diffuse + eval->glossy);
|
return safe_divide_float3_float3(eval->diffuse, eval->sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
ccl_device_inline float3 bsdf_eval_pass_glossy_weight(ccl_private const BsdfEval *eval)
|
||||||
|
{
|
||||||
|
/* Ratio of glossy weight to recover proportions for writing to render pass.
|
||||||
|
* We assume reflection, transmission and volume scatter to be exclusive. */
|
||||||
|
return safe_divide_float3_float3(eval->glossy, eval->sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* --------------------------------------------------------------------
|
/* --------------------------------------------------------------------
|
||||||
@@ -351,37 +361,47 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(KernelGlobals kg
|
|||||||
/* Directly visible, write to emission or background pass. */
|
/* Directly visible, write to emission or background pass. */
|
||||||
pass_offset = pass;
|
pass_offset = pass;
|
||||||
}
|
}
|
||||||
else if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
|
else if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
||||||
/* Indirectly visible through reflection. */
|
if (path_flag & PATH_RAY_SURFACE_PASS) {
|
||||||
const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
|
/* Indirectly visible through reflection. */
|
||||||
((INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
const float3 diffuse_weight = INTEGRATOR_STATE(state, path, pass_diffuse_weight);
|
||||||
kernel_data.film.pass_glossy_direct :
|
const float3 glossy_weight = INTEGRATOR_STATE(state, path, pass_glossy_weight);
|
||||||
kernel_data.film.pass_glossy_indirect) :
|
|
||||||
((INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
|
||||||
kernel_data.film.pass_transmission_direct :
|
|
||||||
kernel_data.film.pass_transmission_indirect);
|
|
||||||
|
|
||||||
if (glossy_pass_offset != PASS_UNUSED) {
|
/* Glossy */
|
||||||
/* Glossy is a subset of the throughput, reconstruct it here using the
|
const int glossy_pass_offset = ((INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
||||||
* diffuse-glossy ratio. */
|
kernel_data.film.pass_glossy_direct :
|
||||||
const float3 ratio = INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
|
kernel_data.film.pass_glossy_indirect);
|
||||||
const float3 glossy_contribution = (one_float3() - ratio) * contribution;
|
if (glossy_pass_offset != PASS_UNUSED) {
|
||||||
kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
|
kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_weight * contribution);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Reconstruct diffuse subset of throughput. */
|
/* Transmission */
|
||||||
pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
const int transmission_pass_offset = ((INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
||||||
kernel_data.film.pass_diffuse_direct :
|
kernel_data.film.pass_transmission_direct :
|
||||||
kernel_data.film.pass_diffuse_indirect;
|
kernel_data.film.pass_transmission_indirect);
|
||||||
if (pass_offset != PASS_UNUSED) {
|
|
||||||
contribution *= INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
|
if (transmission_pass_offset != PASS_UNUSED) {
|
||||||
|
/* Transmission is what remains if not diffuse and glossy, not stored explicitly to save
|
||||||
|
* GPU memory. */
|
||||||
|
const float3 transmission_weight = one_float3() - diffuse_weight - glossy_weight;
|
||||||
|
kernel_write_pass_float3(buffer + transmission_pass_offset,
|
||||||
|
transmission_weight * contribution);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Reconstruct diffuse subset of throughput. */
|
||||||
|
pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
||||||
|
kernel_data.film.pass_diffuse_direct :
|
||||||
|
kernel_data.film.pass_diffuse_indirect;
|
||||||
|
if (pass_offset != PASS_UNUSED) {
|
||||||
|
contribution *= diffuse_weight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (path_flag & PATH_RAY_VOLUME_PASS) {
|
||||||
|
/* Indirectly visible through volume. */
|
||||||
|
pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
||||||
|
kernel_data.film.pass_volume_direct :
|
||||||
|
kernel_data.film.pass_volume_indirect;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else if (path_flag & PATH_RAY_VOLUME_PASS) {
|
|
||||||
/* Indirectly visible through volume. */
|
|
||||||
pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ?
|
|
||||||
kernel_data.film.pass_volume_direct :
|
|
||||||
kernel_data.film.pass_volume_indirect;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Single write call for GPU coherence. */
|
/* Single write call for GPU coherence. */
|
||||||
@@ -426,49 +446,60 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg,
|
|||||||
#ifdef __PASSES__
|
#ifdef __PASSES__
|
||||||
if (kernel_data.film.light_pass_flag & PASS_ANY) {
|
if (kernel_data.film.light_pass_flag & PASS_ANY) {
|
||||||
const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag);
|
const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag);
|
||||||
int pass_offset = PASS_UNUSED;
|
|
||||||
|
|
||||||
if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
|
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
||||||
/* Indirectly visible through reflection. */
|
int pass_offset = PASS_UNUSED;
|
||||||
const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
|
|
||||||
((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
|
||||||
kernel_data.film.pass_glossy_direct :
|
|
||||||
kernel_data.film.pass_glossy_indirect) :
|
|
||||||
((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
|
||||||
kernel_data.film.pass_transmission_direct :
|
|
||||||
kernel_data.film.pass_transmission_indirect);
|
|
||||||
|
|
||||||
if (glossy_pass_offset != PASS_UNUSED) {
|
if (path_flag & PATH_RAY_SURFACE_PASS) {
|
||||||
/* Glossy is a subset of the throughput, reconstruct it here using the
|
/* Indirectly visible through reflection. */
|
||||||
* diffuse-glossy ratio. */
|
const float3 diffuse_weight = INTEGRATOR_STATE(state, shadow_path, pass_diffuse_weight);
|
||||||
const float3 ratio = INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio);
|
const float3 glossy_weight = INTEGRATOR_STATE(state, shadow_path, pass_glossy_weight);
|
||||||
const float3 glossy_contribution = (one_float3() - ratio) * contribution;
|
|
||||||
kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
|
/* Glossy */
|
||||||
|
const int glossy_pass_offset = ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
||||||
|
kernel_data.film.pass_glossy_direct :
|
||||||
|
kernel_data.film.pass_glossy_indirect);
|
||||||
|
if (glossy_pass_offset != PASS_UNUSED) {
|
||||||
|
kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_weight * contribution);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Transmission */
|
||||||
|
const int transmission_pass_offset = ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
||||||
|
kernel_data.film.pass_transmission_direct :
|
||||||
|
kernel_data.film.pass_transmission_indirect);
|
||||||
|
|
||||||
|
if (transmission_pass_offset != PASS_UNUSED) {
|
||||||
|
/* Transmission is what remains if not diffuse and glossy, not stored explicitly to save
|
||||||
|
* GPU memory. */
|
||||||
|
const float3 transmission_weight = one_float3() - diffuse_weight - glossy_weight;
|
||||||
|
kernel_write_pass_float3(buffer + transmission_pass_offset,
|
||||||
|
transmission_weight * contribution);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Reconstruct diffuse subset of throughput. */
|
||||||
|
pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
||||||
|
kernel_data.film.pass_diffuse_direct :
|
||||||
|
kernel_data.film.pass_diffuse_indirect;
|
||||||
|
if (pass_offset != PASS_UNUSED) {
|
||||||
|
contribution *= diffuse_weight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (path_flag & PATH_RAY_VOLUME_PASS) {
|
||||||
|
/* Indirectly visible through volume. */
|
||||||
|
pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
||||||
|
kernel_data.film.pass_volume_direct :
|
||||||
|
kernel_data.film.pass_volume_indirect;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Reconstruct diffuse subset of throughput. */
|
/* Single write call for GPU coherence. */
|
||||||
pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
|
||||||
kernel_data.film.pass_diffuse_direct :
|
|
||||||
kernel_data.film.pass_diffuse_indirect;
|
|
||||||
if (pass_offset != PASS_UNUSED) {
|
if (pass_offset != PASS_UNUSED) {
|
||||||
contribution *= INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio);
|
kernel_write_pass_float3(buffer + pass_offset, contribution);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (path_flag & PATH_RAY_VOLUME_PASS) {
|
|
||||||
/* Indirectly visible through volume. */
|
|
||||||
pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
|
|
||||||
kernel_data.film.pass_volume_direct :
|
|
||||||
kernel_data.film.pass_volume_indirect;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Single write call for GPU coherence. */
|
|
||||||
if (pass_offset != PASS_UNUSED) {
|
|
||||||
kernel_write_pass_float3(buffer + pass_offset, contribution);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Write shadow pass. */
|
/* Write shadow pass. */
|
||||||
if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) &&
|
if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) &&
|
||||||
(path_flag & PATH_RAY_CAMERA)) {
|
(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
|
||||||
const float3 unshadowed_throughput = INTEGRATOR_STATE(
|
const float3 unshadowed_throughput = INTEGRATOR_STATE(
|
||||||
state, shadow_path, unshadowed_throughput);
|
state, shadow_path, unshadowed_throughput);
|
||||||
const float3 shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
|
const float3 shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
|
||||||
|
@@ -160,40 +160,6 @@ ccl_device_forceinline void kernel_write_denoising_features_volume(KernelGlobals
|
|||||||
}
|
}
|
||||||
#endif /* __DENOISING_FEATURES__ */
|
#endif /* __DENOISING_FEATURES__ */
|
||||||
|
|
||||||
#ifdef __SHADOW_CATCHER__
|
|
||||||
|
|
||||||
/* Write shadow catcher passes on a bounce from the shadow catcher object. */
|
|
||||||
ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
|
|
||||||
KernelGlobals kg,
|
|
||||||
IntegratorState state,
|
|
||||||
ccl_private const ShaderData *sd,
|
|
||||||
ccl_global float *ccl_restrict render_buffer)
|
|
||||||
{
|
|
||||||
if (!kernel_data.integrator.has_shadow_catcher) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
|
|
||||||
kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
|
|
||||||
|
|
||||||
if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, sd->object_flag)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
|
|
||||||
|
|
||||||
/* Count sample for the shadow catcher object. */
|
|
||||||
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
|
|
||||||
|
|
||||||
/* Since the split is done, the sample does not contribute to the matte, so accumulate it as
|
|
||||||
* transparency to the matte. */
|
|
||||||
const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
|
|
||||||
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
|
|
||||||
average(throughput));
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* __SHADOW_CATCHER__ */
|
|
||||||
|
|
||||||
ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer,
|
ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer,
|
||||||
size_t depth,
|
size_t depth,
|
||||||
float id,
|
float id,
|
||||||
@@ -211,7 +177,7 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals kg,
|
|||||||
#ifdef __PASSES__
|
#ifdef __PASSES__
|
||||||
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
|
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
|
||||||
|
|
||||||
if (!(path_flag & PATH_RAY_CAMERA)) {
|
if (!(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -460,7 +460,7 @@ ccl_device_inline float4 film_calculate_shadow_catcher_matte_with_shadow(
|
|||||||
const float transparency = in_matte[3] * scale;
|
const float transparency = in_matte[3] * scale;
|
||||||
const float alpha = saturatef(1.0f - transparency);
|
const float alpha = saturatef(1.0f - transparency);
|
||||||
|
|
||||||
const float alpha_matte = (1.0f - alpha) * (1.0f - average(shadow_catcher)) + alpha;
|
const float alpha_matte = (1.0f - alpha) * (1.0f - saturatef(average(shadow_catcher))) + alpha;
|
||||||
|
|
||||||
if (kfilm_convert->use_approximate_shadow_catcher_background) {
|
if (kfilm_convert->use_approximate_shadow_catcher_background) {
|
||||||
kernel_assert(kfilm_convert->pass_background != PASS_UNUSED);
|
kernel_assert(kfilm_convert->pass_background != PASS_UNUSED);
|
||||||
|
@@ -70,14 +70,16 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
|
|||||||
/* Setup render buffers. */
|
/* Setup render buffers. */
|
||||||
const int index = INTEGRATOR_STATE(state, path, render_pixel_index);
|
const int index = INTEGRATOR_STATE(state, path, render_pixel_index);
|
||||||
const int pass_stride = kernel_data.film.pass_stride;
|
const int pass_stride = kernel_data.film.pass_stride;
|
||||||
render_buffer += index * pass_stride;
|
ccl_global float *buffer = render_buffer + index * pass_stride;
|
||||||
|
|
||||||
ccl_global float *primitive = render_buffer + kernel_data.film.pass_bake_primitive;
|
ccl_global float *primitive = buffer + kernel_data.film.pass_bake_primitive;
|
||||||
ccl_global float *differential = render_buffer + kernel_data.film.pass_bake_differential;
|
ccl_global float *differential = buffer + kernel_data.film.pass_bake_differential;
|
||||||
|
|
||||||
const int seed = __float_as_uint(primitive[0]);
|
const int seed = __float_as_uint(primitive[0]);
|
||||||
int prim = __float_as_uint(primitive[1]);
|
int prim = __float_as_uint(primitive[1]);
|
||||||
if (prim == -1) {
|
if (prim == -1) {
|
||||||
|
/* Accumulate transparency for empty pixels. */
|
||||||
|
kernel_accum_transparent(kg, state, 0, 1.0f, buffer);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -88,7 +88,10 @@ ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg,
|
|||||||
#ifdef __SHADOW_CATCHER__
|
#ifdef __SHADOW_CATCHER__
|
||||||
/* Split path if a shadow catcher was hit. */
|
/* Split path if a shadow catcher was hit. */
|
||||||
ccl_device_forceinline void integrator_split_shadow_catcher(
|
ccl_device_forceinline void integrator_split_shadow_catcher(
|
||||||
KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
|
KernelGlobals kg,
|
||||||
|
IntegratorState state,
|
||||||
|
ccl_private const Intersection *ccl_restrict isect,
|
||||||
|
ccl_global float *ccl_restrict render_buffer)
|
||||||
{
|
{
|
||||||
/* Test if we hit a shadow catcher object, and potentially split the path to continue tracing two
|
/* Test if we hit a shadow catcher object, and potentially split the path to continue tracing two
|
||||||
* paths from here. */
|
* paths from here. */
|
||||||
@@ -97,6 +100,8 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
kernel_write_shadow_catcher_bounce_data(kg, state, render_buffer);
|
||||||
|
|
||||||
/* Mark state as having done a shadow catcher split so that it stops contributing to
|
/* Mark state as having done a shadow catcher split so that it stops contributing to
|
||||||
* the shadow catcher matte pass, but keeps contributing to the combined pass. */
|
* the shadow catcher matte pass, but keeps contributing to the combined pass. */
|
||||||
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT;
|
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT;
|
||||||
@@ -191,6 +196,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
|||||||
KernelGlobals kg,
|
KernelGlobals kg,
|
||||||
IntegratorState state,
|
IntegratorState state,
|
||||||
ccl_private const Intersection *ccl_restrict isect,
|
ccl_private const Intersection *ccl_restrict isect,
|
||||||
|
ccl_global float *ccl_restrict render_buffer,
|
||||||
const bool hit)
|
const bool hit)
|
||||||
{
|
{
|
||||||
/* Continue with volume kernel if we are inside a volume, regardless if we hit anything. */
|
/* Continue with volume kernel if we are inside a volume, regardless if we hit anything. */
|
||||||
@@ -233,7 +239,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
|||||||
|
|
||||||
#ifdef __SHADOW_CATCHER__
|
#ifdef __SHADOW_CATCHER__
|
||||||
/* Handle shadow catcher. */
|
/* Handle shadow catcher. */
|
||||||
integrator_split_shadow_catcher(kg, state, isect);
|
integrator_split_shadow_catcher(kg, state, isect, render_buffer);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -253,7 +259,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
|
|||||||
* volume shading and termination testing have already been done. */
|
* volume shading and termination testing have already been done. */
|
||||||
template<uint32_t current_kernel>
|
template<uint32_t current_kernel>
|
||||||
ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
||||||
KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
|
KernelGlobals kg,
|
||||||
|
IntegratorState state,
|
||||||
|
ccl_private const Intersection *ccl_restrict isect,
|
||||||
|
ccl_global float *ccl_restrict render_buffer)
|
||||||
{
|
{
|
||||||
if (isect->prim != PRIM_NONE) {
|
if (isect->prim != PRIM_NONE) {
|
||||||
/* Hit a surface, continue with light or surface kernel. */
|
/* Hit a surface, continue with light or surface kernel. */
|
||||||
@@ -278,7 +287,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
|||||||
|
|
||||||
#ifdef __SHADOW_CATCHER__
|
#ifdef __SHADOW_CATCHER__
|
||||||
/* Handle shadow catcher. */
|
/* Handle shadow catcher. */
|
||||||
integrator_split_shadow_catcher(kg, state, isect);
|
integrator_split_shadow_catcher(kg, state, isect, render_buffer);
|
||||||
#endif
|
#endif
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -290,7 +299,9 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState state)
|
ccl_device void integrator_intersect_closest(KernelGlobals kg,
|
||||||
|
IntegratorState state,
|
||||||
|
ccl_global float *ccl_restrict render_buffer)
|
||||||
{
|
{
|
||||||
PROFILING_INIT(kg, PROFILING_INTERSECT_CLOSEST);
|
PROFILING_INIT(kg, PROFILING_INTERSECT_CLOSEST);
|
||||||
|
|
||||||
@@ -341,7 +352,7 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState s
|
|||||||
|
|
||||||
/* Setup up next kernel to be executed. */
|
/* Setup up next kernel to be executed. */
|
||||||
integrator_intersect_next_kernel<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
|
integrator_intersect_next_kernel<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
|
||||||
kg, state, &isect, hit);
|
kg, state, &isect, render_buffer, hit);
|
||||||
}
|
}
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -76,7 +76,7 @@ ccl_device void integrator_megakernel(KernelGlobals kg,
|
|||||||
if (queued_kernel) {
|
if (queued_kernel) {
|
||||||
switch (queued_kernel) {
|
switch (queued_kernel) {
|
||||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
|
||||||
integrator_intersect_closest(kg, state);
|
integrator_intersect_closest(kg, state, render_buffer);
|
||||||
break;
|
break;
|
||||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
|
case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
|
||||||
integrator_shade_background(kg, state, render_buffer);
|
integrator_shade_background(kg, state, render_buffer);
|
||||||
|
@@ -70,6 +70,9 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
|
|||||||
INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = 1.0f;
|
INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = 1.0f;
|
||||||
INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f);
|
INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f);
|
||||||
|
|
||||||
|
INTEGRATOR_STATE_WRITE(state, isect, object) = OBJECT_NONE;
|
||||||
|
INTEGRATOR_STATE_WRITE(state, isect, prim) = PRIM_NONE;
|
||||||
|
|
||||||
if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) {
|
if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) {
|
||||||
INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 0, object) = OBJECT_NONE;
|
INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 0, object) = OBJECT_NONE;
|
||||||
INTEGRATOR_STATE_ARRAY_WRITE(
|
INTEGRATOR_STATE_ARRAY_WRITE(
|
||||||
@@ -122,7 +125,7 @@ ccl_device_inline void path_state_next(KernelGlobals kg, IntegratorState state,
|
|||||||
/* volume scatter */
|
/* volume scatter */
|
||||||
flag |= PATH_RAY_VOLUME_SCATTER;
|
flag |= PATH_RAY_VOLUME_SCATTER;
|
||||||
flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
|
flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
|
||||||
if (bounce == 1) {
|
if (!(flag & PATH_RAY_ANY_PASS)) {
|
||||||
flag |= PATH_RAY_VOLUME_PASS;
|
flag |= PATH_RAY_VOLUME_PASS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -184,8 +187,8 @@ ccl_device_inline void path_state_next(KernelGlobals kg, IntegratorState state,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Render pass categories. */
|
/* Render pass categories. */
|
||||||
if (bounce == 1) {
|
if (!(flag & PATH_RAY_ANY_PASS) && !(flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
|
||||||
flag |= (label & LABEL_TRANSMIT) ? PATH_RAY_TRANSMISSION_PASS : PATH_RAY_REFLECT_PASS;
|
flag |= PATH_RAY_SURFACE_PASS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -208,9 +211,7 @@ ccl_device_inline bool path_state_volume_next(IntegratorState state)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Random number generator next bounce. */
|
/* Random number generator next bounce. */
|
||||||
if (volume_bounds_bounce > 1) {
|
INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
|
||||||
INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@@ -191,14 +191,18 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
|
|||||||
const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
|
const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
|
||||||
uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
|
uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
|
||||||
shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
|
shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
|
||||||
shadow_flag |= (is_transmission) ? PATH_RAY_TRANSMISSION_PASS : PATH_RAY_REFLECT_PASS;
|
shadow_flag |= PATH_RAY_SURFACE_PASS;
|
||||||
const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * bsdf_eval_sum(&bsdf_eval);
|
const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * bsdf_eval_sum(&bsdf_eval);
|
||||||
|
|
||||||
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
||||||
const float3 diffuse_glossy_ratio = (bounce == 0) ?
|
const float3 pass_diffuse_weight = (bounce == 0) ?
|
||||||
bsdf_eval_diffuse_glossy_ratio(&bsdf_eval) :
|
bsdf_eval_pass_diffuse_weight(&bsdf_eval) :
|
||||||
INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
|
INTEGRATOR_STATE(state, path, pass_diffuse_weight);
|
||||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
|
const float3 pass_glossy_weight = (bounce == 0) ?
|
||||||
|
bsdf_eval_pass_glossy_weight(&bsdf_eval) :
|
||||||
|
INTEGRATOR_STATE(state, path, pass_glossy_weight);
|
||||||
|
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight;
|
||||||
|
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_glossy_weight) = pass_glossy_weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
|
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
|
||||||
@@ -283,7 +287,9 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
|
|||||||
|
|
||||||
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
||||||
if (INTEGRATOR_STATE(state, path, bounce) == 0) {
|
if (INTEGRATOR_STATE(state, path, bounce) == 0) {
|
||||||
INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = bsdf_eval_diffuse_glossy_ratio(
|
INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = bsdf_eval_pass_diffuse_weight(
|
||||||
|
&bsdf_eval);
|
||||||
|
INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = bsdf_eval_pass_glossy_weight(
|
||||||
&bsdf_eval);
|
&bsdf_eval);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -445,7 +451,7 @@ ccl_device bool integrate_surface(KernelGlobals kg,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
shader_prepare_surface_closures(kg, state, &sd);
|
shader_prepare_surface_closures(kg, state, &sd, path_flag);
|
||||||
|
|
||||||
#ifdef __HOLDOUT__
|
#ifdef __HOLDOUT__
|
||||||
/* Evaluate holdout. */
|
/* Evaluate holdout. */
|
||||||
@@ -492,10 +498,6 @@ ccl_device bool integrate_surface(KernelGlobals kg,
|
|||||||
kernel_write_denoising_features_surface(kg, state, &sd, render_buffer);
|
kernel_write_denoising_features_surface(kg, state, &sd, render_buffer);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __SHADOW_CATCHER__
|
|
||||||
kernel_write_shadow_catcher_bounce_data(kg, state, &sd, render_buffer);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Direct light. */
|
/* Direct light. */
|
||||||
PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT);
|
PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT);
|
||||||
integrate_surface_direct_light(kg, state, &sd, &rng_state);
|
integrate_surface_direct_light(kg, state, &sd, &rng_state);
|
||||||
|
@@ -263,6 +263,12 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
|
|||||||
/* Equi-angular sampling as in:
|
/* Equi-angular sampling as in:
|
||||||
* "Importance Sampling Techniques for Path Tracing in Participating Media" */
|
* "Importance Sampling Techniques for Path Tracing in Participating Media" */
|
||||||
|
|
||||||
|
/* Below this pdf we ignore samples, as they tend to lead to very long distances.
|
||||||
|
* This can cause performance issues with BVH traversal in OptiX, leading it to
|
||||||
|
* traverse many nodes. Since these contribute very little to the image, just ignore
|
||||||
|
* those samples. */
|
||||||
|
# define VOLUME_SAMPLE_PDF_CUTOFF 1e-8f
|
||||||
|
|
||||||
ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict ray,
|
ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict ray,
|
||||||
const float3 light_P,
|
const float3 light_P,
|
||||||
const float xi,
|
const float xi,
|
||||||
@@ -437,7 +443,8 @@ ccl_device_forceinline void volume_integrate_step_scattering(
|
|||||||
|
|
||||||
/* Equiangular sampling for direct lighting. */
|
/* Equiangular sampling for direct lighting. */
|
||||||
if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR && !result.direct_scatter) {
|
if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR && !result.direct_scatter) {
|
||||||
if (result.direct_t >= vstate.start_t && result.direct_t <= vstate.end_t) {
|
if (result.direct_t >= vstate.start_t && result.direct_t <= vstate.end_t &&
|
||||||
|
vstate.equiangular_pdf > VOLUME_SAMPLE_PDF_CUTOFF) {
|
||||||
const float new_dt = result.direct_t - vstate.start_t;
|
const float new_dt = result.direct_t - vstate.start_t;
|
||||||
const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
|
const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
|
||||||
|
|
||||||
@@ -474,26 +481,28 @@ ccl_device_forceinline void volume_integrate_step_scattering(
|
|||||||
const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
|
const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
|
||||||
const float distance_pdf = dot(channel_pdf, coeff.sigma_t * new_transmittance);
|
const float distance_pdf = dot(channel_pdf, coeff.sigma_t * new_transmittance);
|
||||||
|
|
||||||
/* throughput */
|
if (vstate.distance_pdf * distance_pdf > VOLUME_SAMPLE_PDF_CUTOFF) {
|
||||||
result.indirect_scatter = true;
|
/* throughput */
|
||||||
result.indirect_t = new_t;
|
result.indirect_scatter = true;
|
||||||
result.indirect_throughput *= coeff.sigma_s * new_transmittance / distance_pdf;
|
result.indirect_t = new_t;
|
||||||
shader_copy_volume_phases(&result.indirect_phases, sd);
|
result.indirect_throughput *= coeff.sigma_s * new_transmittance / distance_pdf;
|
||||||
|
shader_copy_volume_phases(&result.indirect_phases, sd);
|
||||||
|
|
||||||
if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) {
|
if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) {
|
||||||
/* If using distance sampling for direct light, just copy parameters
|
/* If using distance sampling for direct light, just copy parameters
|
||||||
* of indirect light since we scatter at the same point then. */
|
* of indirect light since we scatter at the same point then. */
|
||||||
result.direct_scatter = true;
|
result.direct_scatter = true;
|
||||||
result.direct_t = result.indirect_t;
|
result.direct_t = result.indirect_t;
|
||||||
result.direct_throughput = result.indirect_throughput;
|
result.direct_throughput = result.indirect_throughput;
|
||||||
shader_copy_volume_phases(&result.direct_phases, sd);
|
shader_copy_volume_phases(&result.direct_phases, sd);
|
||||||
|
|
||||||
/* Multiple importance sampling. */
|
/* Multiple importance sampling. */
|
||||||
if (vstate.use_mis) {
|
if (vstate.use_mis) {
|
||||||
const float equiangular_pdf = volume_equiangular_pdf(ray, equiangular_light_P, new_t);
|
const float equiangular_pdf = volume_equiangular_pdf(ray, equiangular_light_P, new_t);
|
||||||
const float mis_weight = power_heuristic(vstate.distance_pdf * distance_pdf,
|
const float mis_weight = power_heuristic(vstate.distance_pdf * distance_pdf,
|
||||||
equiangular_pdf);
|
equiangular_pdf);
|
||||||
result.direct_throughput *= 2.0f * mis_weight;
|
result.direct_throughput *= 2.0f * mis_weight;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -694,8 +703,10 @@ ccl_device_forceinline bool integrate_volume_sample_light(
|
|||||||
float light_u, light_v;
|
float light_u, light_v;
|
||||||
path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
|
path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
|
||||||
|
|
||||||
light_distribution_sample_from_volume_segment(
|
if (!light_distribution_sample_from_volume_segment(
|
||||||
kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, ls);
|
kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, ls)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (ls->shader & SHADER_EXCLUDE_SCATTER) {
|
if (ls->shader & SHADER_EXCLUDE_SCATTER) {
|
||||||
return false;
|
return false;
|
||||||
@@ -794,10 +805,11 @@ ccl_device_forceinline void integrate_volume_direct_light(
|
|||||||
const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval);
|
const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval);
|
||||||
|
|
||||||
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
||||||
const float3 diffuse_glossy_ratio = (bounce == 0) ?
|
const float3 pass_diffuse_weight = (bounce == 0) ?
|
||||||
one_float3() :
|
one_float3() :
|
||||||
INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
|
INTEGRATOR_STATE(state, path, pass_diffuse_weight);
|
||||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
|
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight;
|
||||||
|
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_glossy_weight) = zero_float3();
|
||||||
}
|
}
|
||||||
|
|
||||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
|
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
|
||||||
@@ -876,7 +888,8 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
|
|||||||
INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput_phase;
|
INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput_phase;
|
||||||
|
|
||||||
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
||||||
INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = one_float3();
|
INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_float3();
|
||||||
|
INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_float3();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Update path state */
|
/* Update path state */
|
||||||
@@ -1024,7 +1037,7 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,
|
|||||||
else {
|
else {
|
||||||
/* Continue to background, light or surface. */
|
/* Continue to background, light or surface. */
|
||||||
integrator_intersect_next_kernel_after_volume<DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME>(
|
integrator_intersect_next_kernel_after_volume<DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME>(
|
||||||
kg, state, &isect);
|
kg, state, &isect, render_buffer);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif /* __VOLUME__ */
|
#endif /* __VOLUME__ */
|
||||||
|
@@ -105,8 +105,42 @@ ccl_device_inline void shader_copy_volume_phases(ccl_private ShaderVolumePhases
|
|||||||
|
|
||||||
ccl_device_inline void shader_prepare_surface_closures(KernelGlobals kg,
|
ccl_device_inline void shader_prepare_surface_closures(KernelGlobals kg,
|
||||||
ConstIntegratorState state,
|
ConstIntegratorState state,
|
||||||
ccl_private ShaderData *sd)
|
ccl_private ShaderData *sd,
|
||||||
|
const uint32_t path_flag)
|
||||||
{
|
{
|
||||||
|
/* Filter out closures. */
|
||||||
|
if (kernel_data.integrator.filter_closures) {
|
||||||
|
if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_EMISSION) {
|
||||||
|
sd->closure_emission_background = zero_float3();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIRECT_LIGHT) {
|
||||||
|
sd->flag &= ~SD_BSDF_HAS_EVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (path_flag & PATH_RAY_CAMERA) {
|
||||||
|
for (int i = 0; i < sd->num_closure; i++) {
|
||||||
|
ccl_private ShaderClosure *sc = &sd->closure[i];
|
||||||
|
|
||||||
|
if ((CLOSURE_IS_BSDF_DIFFUSE(sc->type) &&
|
||||||
|
(kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIFFUSE)) ||
|
||||||
|
(CLOSURE_IS_BSDF_GLOSSY(sc->type) &&
|
||||||
|
(kernel_data.integrator.filter_closures & FILTER_CLOSURE_GLOSSY)) ||
|
||||||
|
(CLOSURE_IS_BSDF_TRANSMISSION(sc->type) &&
|
||||||
|
(kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSMISSION))) {
|
||||||
|
sc->type = CLOSURE_NONE_ID;
|
||||||
|
sc->sample_weight = 0.0f;
|
||||||
|
}
|
||||||
|
else if ((CLOSURE_IS_BSDF_TRANSPARENT(sc->type) &&
|
||||||
|
(kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSPARENT))) {
|
||||||
|
sc->type = CLOSURE_HOLDOUT_ID;
|
||||||
|
sc->sample_weight = 0.0f;
|
||||||
|
sd->flag |= SD_HOLDOUT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Defensive sampling.
|
/* Defensive sampling.
|
||||||
*
|
*
|
||||||
* We can likely also do defensive sampling at deeper bounces, particularly
|
* We can likely also do defensive sampling at deeper bounces, particularly
|
||||||
@@ -209,8 +243,7 @@ ccl_device_inline float _shader_bsdf_multi_eval(KernelGlobals kg,
|
|||||||
float3 eval = bsdf_eval(kg, sd, sc, omega_in, is_transmission, &bsdf_pdf);
|
float3 eval = bsdf_eval(kg, sd, sc, omega_in, is_transmission, &bsdf_pdf);
|
||||||
|
|
||||||
if (bsdf_pdf != 0.0f) {
|
if (bsdf_pdf != 0.0f) {
|
||||||
const bool is_diffuse = CLOSURE_IS_BSDF_DIFFUSE(sc->type);
|
bsdf_eval_accum(result_eval, sc->type, eval * sc->weight);
|
||||||
bsdf_eval_accum(result_eval, is_diffuse, eval * sc->weight, 1.0f);
|
|
||||||
sum_pdf += bsdf_pdf * sc->sample_weight;
|
sum_pdf += bsdf_pdf * sc->sample_weight;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -235,7 +268,7 @@ ccl_device_inline
|
|||||||
ccl_private BsdfEval *bsdf_eval,
|
ccl_private BsdfEval *bsdf_eval,
|
||||||
const uint light_shader_flags)
|
const uint light_shader_flags)
|
||||||
{
|
{
|
||||||
bsdf_eval_init(bsdf_eval, false, zero_float3());
|
bsdf_eval_init(bsdf_eval, CLOSURE_NONE_ID, zero_float3());
|
||||||
|
|
||||||
return _shader_bsdf_multi_eval(
|
return _shader_bsdf_multi_eval(
|
||||||
kg, sd, omega_in, is_transmission, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags);
|
kg, sd, omega_in, is_transmission, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags);
|
||||||
@@ -328,8 +361,7 @@ ccl_device int shader_bsdf_sample_closure(KernelGlobals kg,
|
|||||||
label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
|
label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
|
||||||
|
|
||||||
if (*pdf != 0.0f) {
|
if (*pdf != 0.0f) {
|
||||||
const bool is_diffuse = CLOSURE_IS_BSDF_DIFFUSE(sc->type);
|
bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight);
|
||||||
bsdf_eval_init(bsdf_eval, is_diffuse, eval * sc->weight);
|
|
||||||
|
|
||||||
if (sd->num_closure > 1) {
|
if (sd->num_closure > 1) {
|
||||||
const bool is_transmission = shader_bsdf_is_transmission(sd, *omega_in);
|
const bool is_transmission = shader_bsdf_is_transmission(sd, *omega_in);
|
||||||
@@ -655,7 +687,7 @@ ccl_device_inline float _shader_volume_phase_multi_eval(
|
|||||||
float3 eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf);
|
float3 eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf);
|
||||||
|
|
||||||
if (phase_pdf != 0.0f) {
|
if (phase_pdf != 0.0f) {
|
||||||
bsdf_eval_accum(result_eval, false, eval, 1.0f);
|
bsdf_eval_accum(result_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
|
||||||
sum_pdf += phase_pdf * svc->sample_weight;
|
sum_pdf += phase_pdf * svc->sample_weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -671,7 +703,7 @@ ccl_device float shader_volume_phase_eval(KernelGlobals kg,
|
|||||||
const float3 omega_in,
|
const float3 omega_in,
|
||||||
ccl_private BsdfEval *phase_eval)
|
ccl_private BsdfEval *phase_eval)
|
||||||
{
|
{
|
||||||
bsdf_eval_init(phase_eval, false, zero_float3());
|
bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_float3());
|
||||||
|
|
||||||
return _shader_volume_phase_multi_eval(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f);
|
return _shader_volume_phase_multi_eval(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f);
|
||||||
}
|
}
|
||||||
@@ -729,7 +761,7 @@ ccl_device int shader_volume_phase_sample(KernelGlobals kg,
|
|||||||
label = volume_phase_sample(sd, svc, randu, randv, &eval, omega_in, domega_in, pdf);
|
label = volume_phase_sample(sd, svc, randu, randv, &eval, omega_in, domega_in, pdf);
|
||||||
|
|
||||||
if (*pdf != 0.0f) {
|
if (*pdf != 0.0f) {
|
||||||
bsdf_eval_init(phase_eval, false, eval);
|
bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
|
||||||
}
|
}
|
||||||
|
|
||||||
return label;
|
return label;
|
||||||
@@ -752,7 +784,7 @@ ccl_device int shader_phase_sample_closure(KernelGlobals kg,
|
|||||||
label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
|
label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
|
||||||
|
|
||||||
if (*pdf != 0.0f)
|
if (*pdf != 0.0f)
|
||||||
bsdf_eval_init(phase_eval, false, eval);
|
bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
|
||||||
|
|
||||||
return label;
|
return label;
|
||||||
}
|
}
|
||||||
|
@@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "kernel/film/write_passes.h"
|
||||||
#include "kernel/integrator/path_state.h"
|
#include "kernel/integrator/path_state.h"
|
||||||
#include "kernel/integrator/state_util.h"
|
#include "kernel/integrator/state_util.h"
|
||||||
|
|
||||||
@@ -47,7 +48,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) {
|
if (path_flag & PATH_RAY_SHADOW_CATCHER_HIT) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -88,6 +89,28 @@ ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(const uint32_t
|
|||||||
return path_flag & PATH_RAY_SHADOW_CATCHER_PASS;
|
return path_flag & PATH_RAY_SHADOW_CATCHER_PASS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Write shadow catcher passes on a bounce from the shadow catcher object. */
|
||||||
|
ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
|
||||||
|
KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
|
||||||
|
{
|
||||||
|
kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
|
||||||
|
kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
|
||||||
|
|
||||||
|
const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
|
||||||
|
const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
|
||||||
|
kernel_data.film.pass_stride;
|
||||||
|
ccl_global float *buffer = render_buffer + render_buffer_offset;
|
||||||
|
|
||||||
|
/* Count sample for the shadow catcher object. */
|
||||||
|
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
|
||||||
|
|
||||||
|
/* Since the split is done, the sample does not contribute to the matte, so accumulate it as
|
||||||
|
* transparency to the matte. */
|
||||||
|
const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
|
||||||
|
kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
|
||||||
|
average(throughput));
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* __SHADOW_CATCHER__ */
|
#endif /* __SHADOW_CATCHER__ */
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -46,8 +46,9 @@ KERNEL_STRUCT_MEMBER(shadow_path,
|
|||||||
float3,
|
float3,
|
||||||
unshadowed_throughput,
|
unshadowed_throughput,
|
||||||
KERNEL_FEATURE_SHADOW_PASS | KERNEL_FEATURE_AO_ADDITIVE)
|
KERNEL_FEATURE_SHADOW_PASS | KERNEL_FEATURE_AO_ADDITIVE)
|
||||||
/* Ratio of throughput to distinguish diffuse and glossy render passes. */
|
/* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */
|
||||||
KERNEL_STRUCT_MEMBER(shadow_path, float3, diffuse_glossy_ratio, KERNEL_FEATURE_LIGHT_PASSES)
|
KERNEL_STRUCT_MEMBER(shadow_path, float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES)
|
||||||
|
KERNEL_STRUCT_MEMBER(shadow_path, float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES)
|
||||||
/* Number of intersections found by ray-tracing. */
|
/* Number of intersections found by ray-tracing. */
|
||||||
KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, num_hits, KERNEL_FEATURE_PATH_TRACING)
|
KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, num_hits, KERNEL_FEATURE_PATH_TRACING)
|
||||||
KERNEL_STRUCT_END(shadow_path)
|
KERNEL_STRUCT_END(shadow_path)
|
||||||
|
@@ -60,8 +60,9 @@ KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
|
|||||||
KERNEL_STRUCT_MEMBER(path, float, continuation_probability, KERNEL_FEATURE_PATH_TRACING)
|
KERNEL_STRUCT_MEMBER(path, float, continuation_probability, KERNEL_FEATURE_PATH_TRACING)
|
||||||
/* Throughput. */
|
/* Throughput. */
|
||||||
KERNEL_STRUCT_MEMBER(path, float3, throughput, KERNEL_FEATURE_PATH_TRACING)
|
KERNEL_STRUCT_MEMBER(path, float3, throughput, KERNEL_FEATURE_PATH_TRACING)
|
||||||
/* Ratio of throughput to distinguish diffuse and glossy render passes. */
|
/* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */
|
||||||
KERNEL_STRUCT_MEMBER(path, float3, diffuse_glossy_ratio, KERNEL_FEATURE_LIGHT_PASSES)
|
KERNEL_STRUCT_MEMBER(path, float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES)
|
||||||
|
KERNEL_STRUCT_MEMBER(path, float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES)
|
||||||
/* Denoising. */
|
/* Denoising. */
|
||||||
KERNEL_STRUCT_MEMBER(path, float3, denoising_feature_throughput, KERNEL_FEATURE_DENOISING)
|
KERNEL_STRUCT_MEMBER(path, float3, denoising_feature_throughput, KERNEL_FEATURE_DENOISING)
|
||||||
/* Shader sorting. */
|
/* Shader sorting. */
|
||||||
|
@@ -71,6 +71,10 @@ ccl_device int subsurface_bounce(KernelGlobals kg,
|
|||||||
}
|
}
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
if (sd->flag & SD_BACKFACING) {
|
||||||
|
path_flag |= PATH_RAY_SUBSURFACE_BACKFACING;
|
||||||
|
}
|
||||||
|
|
||||||
INTEGRATOR_STATE_WRITE(state, path, throughput) *= weight;
|
INTEGRATOR_STATE_WRITE(state, path, throughput) *= weight;
|
||||||
INTEGRATOR_STATE_WRITE(state, path, flag) = path_flag;
|
INTEGRATOR_STATE_WRITE(state, path, flag) = path_flag;
|
||||||
|
|
||||||
@@ -79,7 +83,8 @@ ccl_device int subsurface_bounce(KernelGlobals kg,
|
|||||||
|
|
||||||
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
|
||||||
if (INTEGRATOR_STATE(state, path, bounce) == 0) {
|
if (INTEGRATOR_STATE(state, path, bounce) == 0) {
|
||||||
INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = one_float3();
|
INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_float3();
|
||||||
|
INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_float3();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -47,6 +47,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
|
|||||||
const float time = INTEGRATOR_STATE(state, ray, time);
|
const float time = INTEGRATOR_STATE(state, ray, time);
|
||||||
const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
|
const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
|
||||||
const int object = INTEGRATOR_STATE(state, isect, object);
|
const int object = INTEGRATOR_STATE(state, isect, object);
|
||||||
|
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
|
||||||
|
|
||||||
/* Read subsurface scattering parameters. */
|
/* Read subsurface scattering parameters. */
|
||||||
const float3 radius = INTEGRATOR_STATE(state, subsurface, radius);
|
const float3 radius = INTEGRATOR_STATE(state, subsurface, radius);
|
||||||
@@ -123,6 +124,9 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
|
|||||||
const int object = ss_isect.hits[hit].object;
|
const int object = ss_isect.hits[hit].object;
|
||||||
const int object_flag = kernel_tex_fetch(__object_flag, object);
|
const int object_flag = kernel_tex_fetch(__object_flag, object);
|
||||||
float3 hit_Ng = ss_isect.Ng[hit];
|
float3 hit_Ng = ss_isect.Ng[hit];
|
||||||
|
if (path_flag & PATH_RAY_SUBSURFACE_BACKFACING) {
|
||||||
|
hit_Ng = -hit_Ng;
|
||||||
|
}
|
||||||
if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
|
if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
|
||||||
hit_Ng = -hit_Ng;
|
hit_Ng = -hit_Ng;
|
||||||
}
|
}
|
||||||
|
@@ -73,7 +73,7 @@ ccl_device_inline bool light_sample(KernelGlobals kg,
|
|||||||
ls->P = zero_float3();
|
ls->P = zero_float3();
|
||||||
ls->Ng = zero_float3();
|
ls->Ng = zero_float3();
|
||||||
ls->D = zero_float3();
|
ls->D = zero_float3();
|
||||||
ls->pdf = true;
|
ls->pdf = 1.0f;
|
||||||
ls->t = FLT_MAX;
|
ls->t = FLT_MAX;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -131,7 +131,7 @@ ccl_device_inline bool light_sample(KernelGlobals kg,
|
|||||||
float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]);
|
float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]);
|
||||||
ls->eval_fac *= spot_light_attenuation(
|
ls->eval_fac *= spot_light_attenuation(
|
||||||
dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls->Ng);
|
dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls->Ng);
|
||||||
if (ls->eval_fac == 0.0f) {
|
if (!in_volume_segment && ls->eval_fac == 0.0f) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -170,7 +170,7 @@ ccl_device_inline bool light_sample(KernelGlobals kg,
|
|||||||
float3 sample_axisu = axisu;
|
float3 sample_axisu = axisu;
|
||||||
float3 sample_axisv = axisv;
|
float3 sample_axisv = axisv;
|
||||||
|
|
||||||
if (klight->area.tan_spread > 0.0f) {
|
if (!in_volume_segment && klight->area.tan_spread > 0.0f) {
|
||||||
if (!light_spread_clamp_area_light(
|
if (!light_spread_clamp_area_light(
|
||||||
P, Ng, &ls->P, &sample_axisu, &sample_axisv, klight->area.tan_spread)) {
|
P, Ng, &ls->P, &sample_axisu, &sample_axisv, klight->area.tan_spread)) {
|
||||||
return false;
|
return false;
|
||||||
@@ -203,7 +203,7 @@ ccl_device_inline bool light_sample(KernelGlobals kg,
|
|||||||
|
|
||||||
ls->pdf *= kernel_data.integrator.pdf_lights;
|
ls->pdf *= kernel_data.integrator.pdf_lights;
|
||||||
|
|
||||||
return (ls->pdf > 0.0f);
|
return in_volume_segment || (ls->pdf > 0.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device bool lights_intersect(KernelGlobals kg,
|
ccl_device bool lights_intersect(KernelGlobals kg,
|
||||||
|
@@ -199,6 +199,9 @@ ccl_device_inline float3 shadow_ray_offset(KernelGlobals kg,
|
|||||||
if (offset_cutoff > 0.0f) {
|
if (offset_cutoff > 0.0f) {
|
||||||
float NgL = dot(Ng, L);
|
float NgL = dot(Ng, L);
|
||||||
float offset_amount = 0.0f;
|
float offset_amount = 0.0f;
|
||||||
|
if (NL < 0) {
|
||||||
|
NL = -NL;
|
||||||
|
}
|
||||||
if (NL < offset_cutoff) {
|
if (NL < offset_cutoff) {
|
||||||
offset_amount = clamp(2.0f - (NgL + NL) / offset_cutoff, 0.0f, 1.0f);
|
offset_amount = clamp(2.0f - (NgL + NL) / offset_cutoff, 0.0f, 1.0f);
|
||||||
}
|
}
|
||||||
|
@@ -23,7 +23,8 @@ CCL_NAMESPACE_BEGIN
|
|||||||
ccl_device_inline bool svm_node_aov_check(const uint32_t path_flag,
|
ccl_device_inline bool svm_node_aov_check(const uint32_t path_flag,
|
||||||
ccl_global float *render_buffer)
|
ccl_global float *render_buffer)
|
||||||
{
|
{
|
||||||
bool is_primary = (path_flag & PATH_RAY_CAMERA) && (!(path_flag & PATH_RAY_SINGLE_PASS_DONE));
|
bool is_primary = (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND) &&
|
||||||
|
(!(path_flag & PATH_RAY_SINGLE_PASS_DONE));
|
||||||
|
|
||||||
return ((render_buffer != NULL) && is_primary);
|
return ((render_buffer != NULL) && is_primary);
|
||||||
}
|
}
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user