Compare commits
632 Commits
temp-gpenc
...
temp-ui-cp
Author | SHA1 | Date | |
---|---|---|---|
f77be09171 | |||
1a3681b83c | |||
3c2073d844 | |||
a1ea6d6496 | |||
1836acbeac | |||
8d269a2488 | |||
ed6e1381dc | |||
248ee6270b | |||
afd16c487d | |||
4029cdee7b | |||
680a0fb23b | |||
ed8fee16ac | |||
4a0e19e608 | |||
5ca6965273 | |||
0147e09e0b | |||
f07b09da27 | |||
f83aa1ae08 | |||
d1c21d5673 | |||
b5ca43b2a0 | |||
994e3c6ac5 | |||
dad0f37c1e | |||
118afe1de3 | |||
ae6e35279f | |||
043673ca70 | |||
3cf803cf3c | |||
6422f75088 | |||
b918c079da | |||
6bc3311410 | |||
60ad5f49fa | |||
826535979a | |||
32690cafd1 | |||
64c26d2862 | |||
848dff1e4c | |||
b9c358392d | |||
0ce18561bc | |||
851906744e | |||
c4cfa1b23e | |||
0710ec485e | |||
49129180b2 | |||
cdc1c60f5b | |||
58c8c4fde3 | |||
81754a0fc5 | |||
412642865d | |||
14a0fb0cc6 | |||
959f3cd645 | |||
![]() |
008cc625aa | ||
de27925aea | |||
38c4f40159 | |||
d6d5089c65 | |||
20c1ce3d9b | |||
f47daa7ec9 | |||
6e37f14489 | |||
![]() |
bb665ef8d2 | ||
![]() |
705fe08b61 | ||
1b7b996e16 | |||
17e562311f | |||
2a53e0f437 | |||
5f626ac331 | |||
369914e7b1 | |||
bbf09eb59c | |||
b3daf61ddf | |||
2dcdfab94c | |||
38573d515e | |||
f4e1f62c62 | |||
dc1ed9c1aa | |||
584089879c | |||
38cf48f62b | |||
50aad904b3 | |||
962e221cd3 | |||
f13160d188 | |||
583f19d692 | |||
a87d78860b | |||
c1eeb38f7c | |||
c3d6f5ecf3 | |||
ea57c011de | |||
737d363e02 | |||
fe1b8b671a | |||
687272c409 | |||
460f7ec7aa | |||
![]() |
a819523dff | ||
68a450cbe4 | |||
aa0c2c0f47 | |||
4f02817367 | |||
247d75d2b1 | |||
6396d29779 | |||
ff9606ddc4 | |||
1c00b2ef70 | |||
106277be43 | |||
3250ab31cd | |||
7d44676b5f | |||
11275b7363 | |||
80249ce6e4 | |||
e0c5ff87b7 | |||
571f373155 | |||
c464fd724b | |||
356373ff7a | |||
5938e97a24 | |||
0d73d5c1a2 | |||
01e479b790 | |||
63ae0939ed | |||
db28a8b3d1 | |||
cdcbf05ea8 | |||
2cb6b0b4eb | |||
41ae2c6438 | |||
0e6d893d07 | |||
e3567aad0a | |||
af8a449ca5 | |||
0f3b3ee679 | |||
f5e99abb60 | |||
40ac3776db | |||
acf8f6220d | |||
df788ecfd9 | |||
a777c09d5f | |||
41f58fadae | |||
aa1a51ff9f | |||
3f294a37f5 | |||
461cb550cc | |||
55db44ca2c | |||
fa8a59689b | |||
c3e919b317 | |||
a2a72d89b1 | |||
14d0b57be7 | |||
8e535ee9b4 | |||
772696a1a5 | |||
8990983b07 | |||
a5e7657cee | |||
822dab9a42 | |||
67194fb247 | |||
f8d6bbeb63 | |||
ed82bbfc2c | |||
dd5fdb9370 | |||
51f56e71cb | |||
6d7a067a83 | |||
02c6136958 | |||
b79e5ae4f2 | |||
7dea18b3aa | |||
04dc58df83 | |||
294ff0de43 | |||
f383dfabf7 | |||
d37efe332c | |||
ceaf4779da | |||
88d9ed3c1c | |||
0b251493c8 | |||
ea969ccc02 | |||
b864397201 | |||
4d1a116cdf | |||
1300da6d39 | |||
016f9c2cf5 | |||
953f719e58 | |||
1168665653 | |||
fe38715600 | |||
4d497721ec | |||
3c479b9823 | |||
7411fa4e0d | |||
44d7ec7e80 | |||
![]() |
6dac345a64 | ||
efa87ad5eb | |||
8c82d4cbf6 | |||
89349067b6 | |||
b53c4fa8da | |||
25ddb576ff | |||
02e045ffbe | |||
![]() |
03b5be4e3c | ||
be1745425c | |||
b391037424 | |||
41a3de878f | |||
b0e38f4d04 | |||
![]() |
2f69266c64 | ||
5d08396970 | |||
![]() |
8fe423a7a9 | ||
![]() |
1c0f5e79fa | ||
2910be8f19 | |||
c58e7da43e | |||
9f83ef2149 | |||
85990c877c | |||
b75946bcb6 | |||
71efb7805b | |||
3648fd9917 | |||
7a6cdeb242 | |||
24bd2d0bea | |||
![]() |
e83f46ea76 | ||
97e0cc41ca | |||
984edb2c4e | |||
012895e8a1 | |||
47c92bf8de | |||
afb7da5538 | |||
b041678028 | |||
03b7982301 | |||
42c30f3b9c | |||
6b3cee2538 | |||
5097105b3c | |||
7193e5aa10 | |||
41d29a1603 | |||
567ae90374 | |||
d24c0011cf | |||
0fd94a1f5e | |||
9100cc0f39 | |||
4cac8025f0 | |||
4d17301ba4 | |||
63c4ec89e1 | |||
dfb157f9c4 | |||
331e8007ed | |||
2654c523c1 | |||
7ca9fb9865 | |||
9fd6dae793 | |||
![]() |
bfb3d78902 | ||
511ac66dab | |||
12d7994a48 | |||
c83e33b661 | |||
05f93b58d3 | |||
1ea169d90e | |||
c0f33814c1 | |||
ab819517fc | |||
21adf2ec89 | |||
8fa69dafdd | |||
b3b00be34e | |||
1c0cd50472 | |||
f5128f219f | |||
c6e4953719 | |||
b211266226 | |||
6c0a5461f7 | |||
3d41d0b1b5 | |||
0151d846e8 | |||
4e38771d5c | |||
![]() |
c94583cd64 | ||
b4c3ea2644 | |||
40b63bbf5b | |||
7b82d8f029 | |||
754f674977 | |||
d5c8d3e661 | |||
61d0f77810 | |||
e31f282917 | |||
7c0cecfd00 | |||
3e81d177ea | |||
![]() |
6239e089cf | ||
dec459e424 | |||
bc886bc8d7 | |||
![]() |
2c096f17a6 | ||
8d77973dd7 | |||
9de35e396b | |||
609a681fb5 | |||
52c3214776 | |||
ca253df623 | |||
f74234895a | |||
1c27cc5529 | |||
011c2a37eb | |||
780b29109c | |||
617cf2f291 | |||
6bf13d0734 | |||
c7bd508766 | |||
53f401ea63 | |||
58795c6047 | |||
27f277d948 | |||
576d99e59a | |||
dad8f4ac09 | |||
a67b33acd0 | |||
59f8061a34 | |||
67869432f2 | |||
dd260d2f03 | |||
9a09adb7af | |||
bf0180d206 | |||
c481549870 | |||
845a3573f5 | |||
![]() |
b7a4f79748 | ||
0d3a33e45e | |||
145839aa42 | |||
cacfaaa9a5 | |||
1677ddb7ee | |||
87ace7207d | |||
676137f043 | |||
25b3515324 | |||
6cf4999e50 | |||
89ca298210 | |||
99c970a94d | |||
![]() |
cce4271b31 | ||
801451c459 | |||
a81abbbb8f | |||
841460afce | |||
6077fe7fae | |||
1a1341c387 | |||
f7ca0ecfff | |||
4f2ce8d8d3 | |||
5a05fa8f74 | |||
1a4a96a9d1 | |||
59641042a7 | |||
3b35d9d667 | |||
44ec911633 | |||
e4871b2835 | |||
0ebb7ab41f | |||
c2a68c066b | |||
edcce2c073 | |||
1e88fc251f | |||
71ce178b3e | |||
51914d4a4a | |||
06e9d40c33 | |||
![]() |
9d732445b9 | ||
![]() |
f415051a57 | ||
![]() |
2683b74cb3 | ||
ec9acdeac2 | |||
91d3cc51c3 | |||
33fa053fe8 | |||
02fa3a8128 | |||
1aa851e939 | |||
c8c14d1681 | |||
90fb1cc4e6 | |||
192cd76b7c | |||
e412fe1798 | |||
550c51b08b | |||
71067a58ec | |||
6940c4b602 | |||
0ac19425d4 | |||
e3ddfedbb6 | |||
5e203c4f4b | |||
25630ab2a1 | |||
88c9f1266d | |||
1f5e1b59a3 | |||
c2c41fb14c | |||
64a114d11c | |||
d1a5f24e73 | |||
aee5fcc120 | |||
9f0e9f36be | |||
87ba0dcaca | |||
f7e0317b96 | |||
60523ea523 | |||
9f2f9dbca6 | |||
4401c93e45 | |||
da82d46a5a | |||
65944e7e84 | |||
![]() |
c0df88f3b5 | ||
e8f4010611 | |||
b2d9716b4a | |||
d775995dc3 | |||
a9a5f7ce17 | |||
b8fd474bb1 | |||
8831c6f056 | |||
50b257715f | |||
2f442234e7 | |||
6a97668c9b | |||
2d251478bb | |||
e460f188f0 | |||
277b2fcbfa | |||
4fb02d7f8e | |||
cff78860ac | |||
be40c67b56 | |||
23d0b5dcd2 | |||
![]() |
2459a3c9b1 | ||
bcb20e9a29 | |||
a859837cde | |||
4d1a88e374 | |||
a94c3aafe5 | |||
0ecf6f2abb | |||
ff40b90f99 | |||
5db84d0ef1 | |||
f0ce95b7b9 | |||
d2728868c0 | |||
191a3bf2ad | |||
21eeedfc60 | |||
66939d47b1 | |||
a968f1b9b3 | |||
ffa14008ac | |||
84dce9c1fa | |||
5c42e54f6e | |||
1b5ceb9a75 | |||
![]() |
337dbb1ab0 | ||
d95216b94c | |||
![]() |
9795f2645c | ||
![]() |
83c21e01dd | ||
d46317cf3c | |||
7f80b5e675 | |||
91215ace72 | |||
5be3a68f58 | |||
2a41cd46ba | |||
f0f97e18c1 | |||
f396ab236a | |||
3ff7d0796d | |||
be024ee7b7 | |||
435c824a5f | |||
2205e5f63f | |||
37b256e26f | |||
f1646a4d5e | |||
56f783d883 | |||
![]() |
efcd587bc2 | ||
b64042b482 | |||
![]() |
b28348f9e9 | ||
![]() |
18af9da572 | ||
d158db475b | |||
db94d030bc | |||
e3ee913932 | |||
103fe4d1d1 | |||
970be7e65a | |||
6873aabf93 | |||
edae67c036 | |||
b0e2e45496 | |||
377da3f949 | |||
5f4afecbf3 | |||
85bbcb32eb | |||
99d00a7489 | |||
276d7f7c19 | |||
cc12d2a5a0 | |||
1aaf4ce0a4 | |||
6192695a94 | |||
28dc3b0b84 | |||
37f50ffdbc | |||
![]() |
b557e4317d | ||
![]() |
857bb1b5ec | ||
d7f1430f11 | |||
![]() |
4dd19a1ad6 | ||
4f5f8622f9 | |||
187bce103b | |||
0fe21fe7b9 | |||
![]() |
8447ab606e | ||
ea2dda306c | |||
6a96edce2e | |||
a84c92fc73 | |||
bfb6ea898b | |||
ab3fcd62cc | |||
5fe146e505 | |||
0190b104c8 | |||
909f47e0e1 | |||
e0cb3e0a39 | |||
b8d1022dff | |||
d7971972fa | |||
fe6114aaf5 | |||
dc513a0af8 | |||
3eb2bc2c3f | |||
fba7461e1a | |||
d17f5bcd8f | |||
7419e291e8 | |||
b927cc9ba6 | |||
ce9fcb15a3 | |||
bd622aef3c | |||
c255be2d02 | |||
cd64615425 | |||
930d14cc62 | |||
f1466ce9a8 | |||
0e4bdd428c | |||
67dfb61700 | |||
d0f05ba915 | |||
721fc9c1c9 | |||
533c396898 | |||
1b34da5da6 | |||
115cf5ef98 | |||
a6c822733a | |||
a145b96396 | |||
ed852c8401 | |||
![]() |
99fe17f52d | ||
db25e64f6a | |||
![]() |
f8d968a13c | ||
![]() |
b5e82ff93d | ||
5a37724455 | |||
![]() |
fc544bc974 | ||
![]() |
3534c2b4ad | ||
0fc27536fb | |||
a582abd923 | |||
4737f9cff2 | |||
935d6a965a | |||
b973e27327 | |||
cd659f7bbf | |||
41137eb7a5 | |||
e87b99d7f3 | |||
fcfa9ac219 | |||
436e6dca24 | |||
bc3f5c7e14 | |||
1a8516163f | |||
d63ada602d | |||
787ae01dad | |||
19ba229391 | |||
2a17fd40a5 | |||
7c519aa5d8 | |||
7931ae0df3 | |||
deb8ae6bd1 | |||
78bfb74743 | |||
d9e5a3e6ad | |||
d0522d4ef1 | |||
03ccf37162 | |||
5465aa63d5 | |||
c5b36aa940 | |||
9d827a1834 | |||
1fdaf748bf | |||
5671e7a92c | |||
abbbf9f002 | |||
864af51d6a | |||
![]() |
2c596319a4 | ||
6f6a0185f2 | |||
ba2072524b | |||
![]() |
8799ab201d | ||
097a13f5be | |||
dc8a1d38b7 | |||
ff7645c5ed | |||
e508de0417 | |||
a304dfdb69 | |||
111974234c | |||
80f5a5a8aa | |||
fe2be36510 | |||
38430c384a | |||
5f35e7f12a | |||
a3877d8fe4 | |||
57dd1b7799 | |||
824d5984aa | |||
84c66fe9db | |||
6f1b5e1081 | |||
88c956c13b | |||
c967aab4ef | |||
ca1642cd0c | |||
34f4646786 | |||
![]() |
9800312590 | ||
f613c504c4 | |||
b2000412f2 | |||
3c089c0a88 | |||
![]() |
59618c7646 | ||
366796bbbe | |||
cad11f3098 | |||
969aa7bbfc | |||
![]() |
659de90a32 | ||
cc2b5959bb | |||
6a8ce5ec1c | |||
2688d7200a | |||
a5b2a3041f | |||
df9ab1c922 | |||
d3121fe4ec | |||
acaa736037 | |||
04eab3fd39 | |||
02c23e1613 | |||
e7a3454f5f | |||
b85fe57887 | |||
6295bdfd38 | |||
598bb9065c | |||
55e86f94a0 | |||
bc672e76eb | |||
6e2437d82b | |||
a4668ecf17 | |||
e4f484330a | |||
![]() |
0b4bd3ddc0 | ||
8ef092d2d8 | |||
ec1ab6310a | |||
3153bd0f5d | |||
2a6a492a82 | |||
4b6d58fd6d | |||
baabac5909 | |||
8140f7f574 | |||
7d606ad3b8 | |||
1bf3069912 | |||
9b646cfae5 | |||
2630fdb787 | |||
7e4e8cca7d | |||
79dae1a43f | |||
81ca6308d1 | |||
d49dec896a | |||
32757b2429 | |||
95077549c1 | |||
1cdc6381cf | |||
7c1ab77fa6 | |||
c932fd79ac | |||
b8fc7ed994 | |||
c8cec11353 | |||
4cd9e9991c | |||
cebea62b47 | |||
7a2827ee99 | |||
bfc7653490 | |||
501036faae | |||
7f6521f8dc | |||
5f169fdfdc | |||
ad227e73f3 | |||
8232cf5287 | |||
5291e4c358 | |||
1efc94bb2f | |||
7395062480 | |||
1fbb1d8cf6 | |||
eea3913348 | |||
![]() |
11f6c65e61 | ||
a43053a00a | |||
ce68367969 | |||
d01187c963 | |||
![]() |
77bc95bbd5 | ||
![]() |
edc00429e8 | ||
e6b38deb9d | |||
efe073f57c | |||
c26d49e854 | |||
477faffd78 | |||
683b945917 | |||
024bec85f6 | |||
59e69fc2bd | |||
aba0d01b78 | |||
3836b6ff8c | |||
800b025518 | |||
fb52a09840 | |||
baee7ce4a5 | |||
76c308e45d | |||
801db0d429 | |||
6fa05e2c29 | |||
756538b4a1 | |||
c6612da1e6 | |||
2d9d08677e | |||
3fa6aacb91 | |||
494385a5bc | |||
335082dcd3 | |||
ee5b6f7150 | |||
f0b5f94cb5 | |||
f04f9cc3d0 | |||
75265f27da | |||
62e32e7c2e | |||
![]() |
1eca437197 | ||
c6aacd718a | |||
96d8e5e66b | |||
d80d7b8f70 | |||
da41f11a29 | |||
4b57bc4e5d | |||
b539d425f0 | |||
![]() |
c306ccb67f | ||
35eb37c60d | |||
5925b1821a | |||
ad5814a2a7 | |||
66dda2b902 | |||
8eab23bc66 | |||
fd35216025 | |||
95d36a31b6 | |||
871c4380c4 | |||
4c182aef7c | |||
ae3f443220 | |||
b9d1b07a45 | |||
dd1be8db19 | |||
6450f380ac | |||
1051c17af3 | |||
87628abaa1 | |||
462177fd62 | |||
5d221a2a8a | |||
fb4b7aaa8f | |||
6183f63250 | |||
08228956d9 | |||
956be86c09 | |||
e2f9602be2 | |||
24ae9c52d8 |
@@ -162,6 +162,7 @@ PenaltyBreakString: 1000000
|
|||||||
ForEachMacros:
|
ForEachMacros:
|
||||||
- BEGIN_ANIMFILTER_SUBCHANNELS
|
- BEGIN_ANIMFILTER_SUBCHANNELS
|
||||||
- BKE_pbvh_vertex_iter_begin
|
- BKE_pbvh_vertex_iter_begin
|
||||||
|
- BKE_pbvh_face_iter_begin
|
||||||
- BLI_FOREACH_SPARSE_RANGE
|
- BLI_FOREACH_SPARSE_RANGE
|
||||||
- BLI_SMALLSTACK_ITER_BEGIN
|
- BLI_SMALLSTACK_ITER_BEGIN
|
||||||
- BMO_ITER
|
- BMO_ITER
|
||||||
|
@@ -257,6 +257,12 @@ if(UNIX AND NOT (APPLE OR HAIKU))
|
|||||||
|
|
||||||
option(WITH_GHOST_WAYLAND_DYNLOAD "Enable runtime dynamic WAYLAND libraries loading" ON)
|
option(WITH_GHOST_WAYLAND_DYNLOAD "Enable runtime dynamic WAYLAND libraries loading" ON)
|
||||||
mark_as_advanced(WITH_GHOST_WAYLAND_DYNLOAD)
|
mark_as_advanced(WITH_GHOST_WAYLAND_DYNLOAD)
|
||||||
|
|
||||||
|
set(WITH_GHOST_WAYLAND_APP_ID "" CACHE STRING "\
|
||||||
|
The application ID used for Blender (use default when an empty string), \
|
||||||
|
this can be used to differentiate Blender instances by version or branch for example."
|
||||||
|
)
|
||||||
|
mark_as_advanced(WITH_GHOST_WAYLAND_APP_ID)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
@@ -339,8 +345,12 @@ if(APPLE)
|
|||||||
else()
|
else()
|
||||||
set(WITH_COREAUDIO OFF)
|
set(WITH_COREAUDIO OFF)
|
||||||
endif()
|
endif()
|
||||||
if(UNIX AND NOT APPLE)
|
if(NOT WIN32)
|
||||||
option(WITH_JACK "Enable JACK Support (http://www.jackaudio.org)" ON)
|
if(APPLE)
|
||||||
|
option(WITH_JACK "Enable JACK Support (http://www.jackaudio.org)" OFF)
|
||||||
|
else()
|
||||||
|
option(WITH_JACK "Enable JACK Support (http://www.jackaudio.org)" ON)
|
||||||
|
endif()
|
||||||
option(WITH_JACK_DYNLOAD "Enable runtime dynamic JACK libraries loading" OFF)
|
option(WITH_JACK_DYNLOAD "Enable runtime dynamic JACK libraries loading" OFF)
|
||||||
else()
|
else()
|
||||||
set(WITH_JACK OFF)
|
set(WITH_JACK OFF)
|
||||||
@@ -457,7 +467,6 @@ if(NOT APPLE)
|
|||||||
|
|
||||||
option(WITH_CYCLES_CUDA_BINARIES "Build Cycles NVIDIA CUDA binaries" OFF)
|
option(WITH_CYCLES_CUDA_BINARIES "Build Cycles NVIDIA CUDA binaries" OFF)
|
||||||
set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for")
|
set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for")
|
||||||
option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF)
|
|
||||||
option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF)
|
option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF)
|
||||||
option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime (for developers, makes cuda-gdb work)" ON)
|
option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime (for developers, makes cuda-gdb work)" ON)
|
||||||
|
|
||||||
@@ -465,7 +474,6 @@ if(NOT APPLE)
|
|||||||
set(CYCLES_RUNTIME_OPTIX_ROOT_DIR "" CACHE PATH "Path to the OptiX SDK root directory. When set, this path will be used at runtime to compile OptiX kernels.")
|
set(CYCLES_RUNTIME_OPTIX_ROOT_DIR "" CACHE PATH "Path to the OptiX SDK root directory. When set, this path will be used at runtime to compile OptiX kernels.")
|
||||||
|
|
||||||
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
|
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
|
||||||
mark_as_advanced(WITH_CYCLES_CUBIN_COMPILER)
|
|
||||||
mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
|
mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
|
||||||
mark_as_advanced(WITH_CUDA_DYNLOAD)
|
mark_as_advanced(WITH_CUDA_DYNLOAD)
|
||||||
mark_as_advanced(OPTIX_ROOT_DIR)
|
mark_as_advanced(OPTIX_ROOT_DIR)
|
||||||
@@ -476,7 +484,7 @@ endif()
|
|||||||
if(NOT APPLE)
|
if(NOT APPLE)
|
||||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
|
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
|
||||||
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
||||||
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 CACHE STRING "AMD HIP architectures to build binaries for")
|
set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
|
||||||
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
||||||
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
|
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
|
||||||
endif()
|
endif()
|
||||||
@@ -1231,12 +1239,11 @@ if(WITH_OPENGL)
|
|||||||
add_definitions(-DWITH_OPENGL)
|
add_definitions(-DWITH_OPENGL)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
# -----------------------------------------------------------------------------
|
|
||||||
# Configure Vulkan.
|
# Configure Vulkan.
|
||||||
|
|
||||||
if(WITH_VULKAN_BACKEND)
|
if(WITH_VULKAN_BACKEND)
|
||||||
add_definitions(-DWITH_VULKAN_BACKEND)
|
list(APPEND BLENDER_GL_LIBRARIES ${VULKAN_LIBRARIES})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
|
@@ -40,15 +40,15 @@ ver-ocio:,ver-oiio:,ver-llvm:,ver-osl:,ver-osd:,ver-openvdb:,ver-xr-openxr:,ver-
|
|||||||
force-all,force-python,force-boost,force-tbb,\
|
force-all,force-python,force-boost,force-tbb,\
|
||||||
force-ocio,force-imath,force-openexr,force-oiio,force-llvm,force-osl,force-osd,force-openvdb,\
|
force-ocio,force-imath,force-openexr,force-oiio,force-llvm,force-osl,force-osd,force-openvdb,\
|
||||||
force-ffmpeg,force-opencollada,force-alembic,force-embree,force-oidn,force-usd,\
|
force-ffmpeg,force-opencollada,force-alembic,force-embree,force-oidn,force-usd,\
|
||||||
force-xr-openxr,force-level-zero, force-openpgl,\
|
force-xr-openxr,force-level-zero,force-openpgl,\
|
||||||
build-all,build-python,build-boost,build-tbb,\
|
build-all,build-python,build-boost,build-tbb,\
|
||||||
build-ocio,build-imath,build-openexr,build-oiio,build-llvm,build-osl,build-osd,build-openvdb,\
|
build-ocio,build-imath,build-openexr,build-oiio,build-llvm,build-osl,build-osd,build-openvdb,\
|
||||||
build-ffmpeg,build-opencollada,build-alembic,build-embree,build-oidn,build-usd,\
|
build-ffmpeg,build-opencollada,build-alembic,build-embree,build-oidn,build-usd,\
|
||||||
build-xr-openxr,build-level-zero, build-openpgl,\
|
build-xr-openxr,build-level-zero,build-openpgl,\
|
||||||
skip-python,skip-boost,skip-tbb,\
|
skip-python,skip-boost,skip-tbb,\
|
||||||
skip-ocio,skip-imath,skip-openexr,skip-oiio,skip-llvm,skip-osl,skip-osd,skip-openvdb,\
|
skip-ocio,skip-imath,skip-openexr,skip-oiio,skip-llvm,skip-osl,skip-osd,skip-openvdb,\
|
||||||
skip-ffmpeg,skip-opencollada,skip-alembic,skip-embree,skip-oidn,skip-usd,\
|
skip-ffmpeg,skip-opencollada,skip-alembic,skip-embree,skip-oidn,skip-usd,\
|
||||||
skip-xr-openxr,skip-level-zero, skip-openpgl \
|
skip-xr-openxr,skip-level-zero,skip-openpgl \
|
||||||
-- "$@" \
|
-- "$@" \
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -602,9 +602,9 @@ LEVEL_ZERO_FORCE_BUILD=false
|
|||||||
LEVEL_ZERO_FORCE_REBUILD=false
|
LEVEL_ZERO_FORCE_REBUILD=false
|
||||||
LEVEL_ZERO_SKIP=false
|
LEVEL_ZERO_SKIP=false
|
||||||
|
|
||||||
OPENPGL_VERSION="0.4.0"
|
OPENPGL_VERSION="0.4.1"
|
||||||
OPENPGL_VERSION_SHORT="0.4"
|
OPENPGL_VERSION_SHORT="0.4"
|
||||||
OPENPGL_VERSION_MIN="0.3.1"
|
OPENPGL_VERSION_MIN="0.4.1"
|
||||||
OPENPGL_VERSION_MEX="0.5"
|
OPENPGL_VERSION_MEX="0.5"
|
||||||
OPENPGL_FORCE_BUILD=false
|
OPENPGL_FORCE_BUILD=false
|
||||||
OPENPGL_FORCE_REBUILD=false
|
OPENPGL_FORCE_REBUILD=false
|
||||||
|
59
build_files/cmake/Modules/FindMoltenVK.cmake
Normal file
59
build_files/cmake/Modules/FindMoltenVK.cmake
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
# SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
# Copyright 2022 Blender Foundation.
|
||||||
|
|
||||||
|
# - Find MoltenVK libraries
|
||||||
|
# Find the MoltenVK includes and libraries
|
||||||
|
# This module defines
|
||||||
|
# MOLTENVK_INCLUDE_DIRS, where to find MoltenVK headers, Set when
|
||||||
|
# MOLTENVK_INCLUDE_DIR is found.
|
||||||
|
# MOLTENVK_LIBRARIES, libraries to link against to use MoltenVK.
|
||||||
|
# MOLTENVK_ROOT_DIR, The base directory to search for MoltenVK.
|
||||||
|
# This can also be an environment variable.
|
||||||
|
# MOLTENVK_FOUND, If false, do not try to use MoltenVK.
|
||||||
|
#
|
||||||
|
|
||||||
|
# If MOLTENVK_ROOT_DIR was defined in the environment, use it.
|
||||||
|
IF(NOT MOLTENVK_ROOT_DIR AND NOT $ENV{MOLTENVK_ROOT_DIR} STREQUAL "")
|
||||||
|
SET(MOLTENVK_ROOT_DIR $ENV{MOLTENVK_ROOT_DIR})
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(_moltenvk_SEARCH_DIRS
|
||||||
|
${MOLTENVK_ROOT_DIR}
|
||||||
|
${LIBDIR}/vulkan/MoltenVK
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
FIND_PATH(MOLTENVK_INCLUDE_DIR
|
||||||
|
NAMES
|
||||||
|
MoltenVK/vk_mvk_moltenvk.h
|
||||||
|
HINTS
|
||||||
|
${_moltenvk_SEARCH_DIRS}
|
||||||
|
PATH_SUFFIXES
|
||||||
|
include
|
||||||
|
)
|
||||||
|
|
||||||
|
FIND_LIBRARY(MOLTENVK_LIBRARY
|
||||||
|
NAMES
|
||||||
|
MoltenVK
|
||||||
|
HINTS
|
||||||
|
${_moltenvk_SEARCH_DIRS}
|
||||||
|
PATH_SUFFIXES
|
||||||
|
dylib/macOS
|
||||||
|
)
|
||||||
|
|
||||||
|
# handle the QUIETLY and REQUIRED arguments and set MOLTENVK_FOUND to TRUE if
|
||||||
|
# all listed variables are TRUE
|
||||||
|
INCLUDE(FindPackageHandleStandardArgs)
|
||||||
|
FIND_PACKAGE_HANDLE_STANDARD_ARGS(MoltenVK DEFAULT_MSG MOLTENVK_LIBRARY MOLTENVK_INCLUDE_DIR)
|
||||||
|
|
||||||
|
IF(MOLTENVK_FOUND)
|
||||||
|
SET(MOLTENVK_LIBRARIES ${MOLTENVK_LIBRARY})
|
||||||
|
SET(MOLTENVK_INCLUDE_DIRS ${MOLTENVK_INCLUDE_DIR})
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
MARK_AS_ADVANCED(
|
||||||
|
MOLTENVK_INCLUDE_DIR
|
||||||
|
MOLTENVK_LIBRARY
|
||||||
|
)
|
||||||
|
|
||||||
|
UNSET(_moltenvk_SEARCH_DIRS)
|
@@ -103,10 +103,6 @@ if(EXISTS ${SOURCE_DIR}/.git)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(MY_WC_BRANCH MATCHES "^blender-v")
|
|
||||||
set(MY_WC_BRANCH "master")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
unset(_git_below_check)
|
unset(_git_below_check)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@@ -1240,7 +1240,7 @@ endmacro()
|
|||||||
|
|
||||||
macro(set_and_warn_library_found
|
macro(set_and_warn_library_found
|
||||||
_library_name _library_found _setting)
|
_library_name _library_found _setting)
|
||||||
if(NOT ${${_library_found}} AND ${${_setting}})
|
if(((NOT ${_library_found}) OR (NOT ${${_library_found}})) AND ${${_setting}})
|
||||||
if(WITH_STRICT_BUILD_OPTIONS)
|
if(WITH_STRICT_BUILD_OPTIONS)
|
||||||
message(SEND_ERROR "${_library_name} required but not found")
|
message(SEND_ERROR "${_library_name} required but not found")
|
||||||
else()
|
else()
|
||||||
|
@@ -106,8 +106,8 @@ if(WIN32)
|
|||||||
set(CPACK_WIX_LIGHT_EXTRA_FLAGS -dcl:medium)
|
set(CPACK_WIX_LIGHT_EXTRA_FLAGS -dcl:medium)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(CPACK_PACKAGE_EXECUTABLES "blender-launcher" "Blender")
|
set(CPACK_PACKAGE_EXECUTABLES "blender-launcher" "Blender ${MAJOR_VERSION}.${MINOR_VERSION}")
|
||||||
set(CPACK_CREATE_DESKTOP_LINKS "blender-launcher" "Blender")
|
set(CPACK_CREATE_DESKTOP_LINKS "blender-launcher" "Blender ${MAJOR_VERSION}.${MINOR_VERSION}")
|
||||||
|
|
||||||
include(CPack)
|
include(CPack)
|
||||||
|
|
||||||
|
@@ -100,6 +100,23 @@ if(WITH_USD)
|
|||||||
find_package(USD REQUIRED)
|
find_package(USD REQUIRED)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(WITH_VULKAN_BACKEND)
|
||||||
|
find_package(MoltenVK REQUIRED)
|
||||||
|
|
||||||
|
if(EXISTS ${LIBDIR}/vulkan)
|
||||||
|
set(VULKAN_FOUND On)
|
||||||
|
set(VULKAN_ROOT_DIR ${LIBDIR}/vulkan/macOS)
|
||||||
|
set(VULKAN_INCLUDE_DIR ${VULKAN_ROOT_DIR}/include)
|
||||||
|
set(VULKAN_LIBRARY ${VULKAN_ROOT_DIR}/lib/libvulkan.1.dylib)
|
||||||
|
|
||||||
|
set(VULKAN_INCLUDE_DIRS ${VULKAN_INCLUDE_DIR} ${MOLTENVK_INCLUDE_DIRS})
|
||||||
|
set(VULKAN_LIBRARIES ${VULKAN_LIBRARY} ${MOLTENVK_LIBRARIES})
|
||||||
|
else()
|
||||||
|
message(WARNING "Vulkan SDK was not found, disabling WITH_VULKAN_BACKEND")
|
||||||
|
set(WITH_VULKAN_BACKEND OFF)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
if(WITH_OPENSUBDIV)
|
if(WITH_OPENSUBDIV)
|
||||||
find_package(OpenSubdiv)
|
find_package(OpenSubdiv)
|
||||||
endif()
|
endif()
|
||||||
|
@@ -108,6 +108,10 @@ find_package_wrapper(ZLIB REQUIRED)
|
|||||||
find_package_wrapper(Zstd REQUIRED)
|
find_package_wrapper(Zstd REQUIRED)
|
||||||
find_package_wrapper(Epoxy REQUIRED)
|
find_package_wrapper(Epoxy REQUIRED)
|
||||||
|
|
||||||
|
if(WITH_VULKAN_BACKEND)
|
||||||
|
find_package_wrapper(Vulkan REQUIRED)
|
||||||
|
endif()
|
||||||
|
|
||||||
function(check_freetype_for_brotli)
|
function(check_freetype_for_brotli)
|
||||||
include(CheckSymbolExists)
|
include(CheckSymbolExists)
|
||||||
set(CMAKE_REQUIRED_INCLUDES ${FREETYPE_INCLUDE_DIRS})
|
set(CMAKE_REQUIRED_INCLUDES ${FREETYPE_INCLUDE_DIRS})
|
||||||
@@ -322,9 +326,10 @@ if(WITH_CYCLES AND WITH_CYCLES_DEVICE_ONEAPI)
|
|||||||
file(GLOB _sycl_runtime_libraries
|
file(GLOB _sycl_runtime_libraries
|
||||||
${SYCL_ROOT_DIR}/lib/libsycl.so
|
${SYCL_ROOT_DIR}/lib/libsycl.so
|
||||||
${SYCL_ROOT_DIR}/lib/libsycl.so.*
|
${SYCL_ROOT_DIR}/lib/libsycl.so.*
|
||||||
${SYCL_ROOT_DIR}/lib/libpi_level_zero.so
|
${SYCL_ROOT_DIR}/lib/libpi_*.so
|
||||||
)
|
)
|
||||||
list(FILTER _sycl_runtime_libraries EXCLUDE REGEX ".*\.py")
|
list(FILTER _sycl_runtime_libraries EXCLUDE REGEX ".*\.py")
|
||||||
|
list(REMOVE_ITEM _sycl_runtime_libraries "${SYCL_ROOT_DIR}/lib/libpi_opencl.so")
|
||||||
list(APPEND PLATFORM_BUNDLED_LIBRARIES ${_sycl_runtime_libraries})
|
list(APPEND PLATFORM_BUNDLED_LIBRARIES ${_sycl_runtime_libraries})
|
||||||
unset(_sycl_runtime_libraries)
|
unset(_sycl_runtime_libraries)
|
||||||
endif()
|
endif()
|
||||||
|
@@ -419,7 +419,7 @@ if(WITH_IMAGE_OPENEXR)
|
|||||||
warn_hardcoded_paths(OpenEXR)
|
warn_hardcoded_paths(OpenEXR)
|
||||||
set(OPENEXR ${LIBDIR}/openexr)
|
set(OPENEXR ${LIBDIR}/openexr)
|
||||||
set(OPENEXR_INCLUDE_DIR ${OPENEXR}/include)
|
set(OPENEXR_INCLUDE_DIR ${OPENEXR}/include)
|
||||||
set(OPENEXR_INCLUDE_DIRS ${OPENEXR_INCLUDE_DIR} ${IMATH_INCLUDE_DIRS} ${OPENEXR}/include/OpenEXR)
|
set(OPENEXR_INCLUDE_DIRS ${OPENEXR_INCLUDE_DIR} ${IMATH_INCLUDE_DIRS} ${OPENEXR_INCLUDE_DIR}/OpenEXR)
|
||||||
set(OPENEXR_LIBPATH ${OPENEXR}/lib)
|
set(OPENEXR_LIBPATH ${OPENEXR}/lib)
|
||||||
# Check if the 3.x library name exists
|
# Check if the 3.x library name exists
|
||||||
# if not assume this is a 2.x library folder
|
# if not assume this is a 2.x library folder
|
||||||
@@ -568,7 +568,8 @@ if(WITH_OPENIMAGEIO)
|
|||||||
if(NOT OpenImageIO_FOUND)
|
if(NOT OpenImageIO_FOUND)
|
||||||
set(OPENIMAGEIO ${LIBDIR}/OpenImageIO)
|
set(OPENIMAGEIO ${LIBDIR}/OpenImageIO)
|
||||||
set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib)
|
set(OPENIMAGEIO_LIBPATH ${OPENIMAGEIO}/lib)
|
||||||
set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO}/include)
|
set(OPENIMAGEIO_INCLUDE_DIR ${OPENIMAGEIO}/include)
|
||||||
|
set(OPENIMAGEIO_INCLUDE_DIRS ${OPENIMAGEIO_INCLUDE_DIR})
|
||||||
set(OIIO_OPTIMIZED optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO.lib optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util.lib)
|
set(OIIO_OPTIMIZED optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO.lib optimized ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util.lib)
|
||||||
set(OIIO_DEBUG debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_d.lib debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util_d.lib)
|
set(OIIO_DEBUG debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_d.lib debug ${OPENIMAGEIO_LIBPATH}/OpenImageIO_Util_d.lib)
|
||||||
set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG})
|
set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG})
|
||||||
@@ -785,6 +786,14 @@ if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
|||||||
endif()
|
endif()
|
||||||
find_path(OSL_INCLUDE_DIR OSL/oslclosure.h PATHS ${CYCLES_OSL}/include)
|
find_path(OSL_INCLUDE_DIR OSL/oslclosure.h PATHS ${CYCLES_OSL}/include)
|
||||||
find_program(OSL_COMPILER NAMES oslc PATHS ${CYCLES_OSL}/bin)
|
find_program(OSL_COMPILER NAMES oslc PATHS ${CYCLES_OSL}/bin)
|
||||||
|
file(STRINGS "${OSL_INCLUDE_DIR}/OSL/oslversion.h" OSL_LIBRARY_VERSION_MAJOR
|
||||||
|
REGEX "^[ \t]*#define[ \t]+OSL_LIBRARY_VERSION_MAJOR[ \t]+[0-9]+.*$")
|
||||||
|
file(STRINGS "${OSL_INCLUDE_DIR}/OSL/oslversion.h" OSL_LIBRARY_VERSION_MINOR
|
||||||
|
REGEX "^[ \t]*#define[ \t]+OSL_LIBRARY_VERSION_MINOR[ \t]+[0-9]+.*$")
|
||||||
|
string(REGEX REPLACE ".*#define[ \t]+OSL_LIBRARY_VERSION_MAJOR[ \t]+([.0-9]+).*"
|
||||||
|
"\\1" OSL_LIBRARY_VERSION_MAJOR ${OSL_LIBRARY_VERSION_MAJOR})
|
||||||
|
string(REGEX REPLACE ".*#define[ \t]+OSL_LIBRARY_VERSION_MINOR[ \t]+([.0-9]+).*"
|
||||||
|
"\\1" OSL_LIBRARY_VERSION_MINOR ${OSL_LIBRARY_VERSION_MINOR})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
|
if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
|
||||||
@@ -917,6 +926,20 @@ if(WITH_HARU)
|
|||||||
set(HARU_LIBRARIES ${HARU_ROOT_DIR}/lib/libhpdfs.lib)
|
set(HARU_LIBRARIES ${HARU_ROOT_DIR}/lib/libhpdfs.lib)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(WITH_VULKAN_BACKEND)
|
||||||
|
if(EXISTS ${LIBDIR}/vulkan)
|
||||||
|
set(VULKAN_FOUND On)
|
||||||
|
set(VULKAN_ROOT_DIR ${LIBDIR}/vulkan)
|
||||||
|
set(VULKAN_INCLUDE_DIR ${VULKAN_ROOT_DIR}/include)
|
||||||
|
set(VULKAN_INCLUDE_DIRS ${VULKAN_INCLUDE_DIR})
|
||||||
|
set(VULKAN_LIBRARY ${VULKAN_ROOT_DIR}/lib/vulkan-1.lib)
|
||||||
|
set(VULKAN_LIBRARIES ${VULKAN_LIBRARY})
|
||||||
|
else()
|
||||||
|
message(WARNING "Vulkan SDK was not found, disabling WITH_VULKAN_BACKEND")
|
||||||
|
set(WITH_VULKAN_BACKEND OFF)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES AND WITH_CYCLES_PATH_GUIDING)
|
if(WITH_CYCLES AND WITH_CYCLES_PATH_GUIDING)
|
||||||
find_package(openpgl QUIET)
|
find_package(openpgl QUIET)
|
||||||
if(openpgl_FOUND)
|
if(openpgl_FOUND)
|
||||||
@@ -949,7 +972,13 @@ if(WITH_CYCLES AND WITH_CYCLES_DEVICE_ONEAPI)
|
|||||||
endforeach()
|
endforeach()
|
||||||
unset(_sycl_runtime_libraries_glob)
|
unset(_sycl_runtime_libraries_glob)
|
||||||
|
|
||||||
list(APPEND _sycl_runtime_libraries ${SYCL_ROOT_DIR}/bin/pi_level_zero.dll)
|
file(GLOB _sycl_pi_runtime_libraries_glob
|
||||||
|
${SYCL_ROOT_DIR}/bin/pi_*.dll
|
||||||
|
)
|
||||||
|
list(REMOVE_ITEM _sycl_pi_runtime_libraries_glob "${SYCL_ROOT_DIR}/bin/pi_opencl.dll")
|
||||||
|
list (APPEND _sycl_runtime_libraries ${_sycl_pi_runtime_libraries_glob})
|
||||||
|
unset(_sycl_pi_runtime_libraries_glob)
|
||||||
|
|
||||||
list(APPEND PLATFORM_BUNDLED_LIBRARIES ${_sycl_runtime_libraries})
|
list(APPEND PLATFORM_BUNDLED_LIBRARIES ${_sycl_runtime_libraries})
|
||||||
unset(_sycl_runtime_libraries)
|
unset(_sycl_runtime_libraries)
|
||||||
endif()
|
endif()
|
||||||
|
@@ -55,7 +55,7 @@ buildbot:
|
|||||||
cuda11:
|
cuda11:
|
||||||
version: '11.4.1'
|
version: '11.4.1'
|
||||||
hip:
|
hip:
|
||||||
version: '5.2.21440'
|
version: '5.3.22480'
|
||||||
optix:
|
optix:
|
||||||
version: '7.3.0'
|
version: '7.3.0'
|
||||||
ocloc:
|
ocloc:
|
||||||
|
@@ -35,6 +35,25 @@ from typing import (
|
|||||||
Tuple,
|
Tuple,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# Long Description
|
||||||
|
|
||||||
|
long_description = """# Blender
|
||||||
|
|
||||||
|
[Blender](https://www.blender.org) is the free and open source 3D creation suite. It supports the entirety of the 3D pipeline—modeling, rigging, animation, simulation, rendering, compositing and motion tracking, even video editing.
|
||||||
|
|
||||||
|
This package provides Blender as a Python module for use in studio pipelines, web services, scientific research, and more.
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
* [Blender Python API](https://docs.blender.org/api/current/)
|
||||||
|
* [Blender as a Python Module](https://docs.blender.org/api/current/info_advanced_blender_as_bpy.html)
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
[System requirements](https://www.blender.org/download/requirements/) are the same as Blender.
|
||||||
|
|
||||||
|
Each Blender release supports one Python version, and the package is only compatible with that version."""
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
# Generic Functions
|
# Generic Functions
|
||||||
@@ -195,6 +214,8 @@ def main() -> None:
|
|||||||
options={"bdist_wheel": {"plat_name": platform_tag}},
|
options={"bdist_wheel": {"plat_name": platform_tag}},
|
||||||
|
|
||||||
description="Blender as a Python module",
|
description="Blender as a Python module",
|
||||||
|
long_description=long_description,
|
||||||
|
long_description_content_type='text/markdown',
|
||||||
license="GPL-3.0",
|
license="GPL-3.0",
|
||||||
author="Blender Foundation",
|
author="Blender Foundation",
|
||||||
author_email="bf-committers@blender.org",
|
author_email="bf-committers@blender.org",
|
||||||
|
@@ -59,10 +59,11 @@ def svn_update(args: argparse.Namespace, release_version: Optional[str]) -> None
|
|||||||
|
|
||||||
# Checkout precompiled libraries
|
# Checkout precompiled libraries
|
||||||
if sys.platform == 'darwin':
|
if sys.platform == 'darwin':
|
||||||
if platform.machine() == 'x86_64':
|
# Check platform.version to detect arm64 with x86_64 python binary.
|
||||||
lib_platform = "darwin"
|
if platform.machine() == 'arm64' or ('ARM64' in platform.version()):
|
||||||
elif platform.machine() == 'arm64':
|
|
||||||
lib_platform = "darwin_arm64"
|
lib_platform = "darwin_arm64"
|
||||||
|
elif platform.machine() == 'x86_64':
|
||||||
|
lib_platform = "darwin"
|
||||||
else:
|
else:
|
||||||
lib_platform = None
|
lib_platform = None
|
||||||
elif sys.platform == 'win32':
|
elif sys.platform == 'win32':
|
||||||
|
4
extern/CMakeLists.txt
vendored
4
extern/CMakeLists.txt
vendored
@@ -91,3 +91,7 @@ endif()
|
|||||||
if(WITH_COMPOSITOR_CPU)
|
if(WITH_COMPOSITOR_CPU)
|
||||||
add_subdirectory(smaa_areatex)
|
add_subdirectory(smaa_areatex)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(WITH_VULKAN_BACKEND)
|
||||||
|
add_subdirectory(vulkan_memory_allocator)
|
||||||
|
endif()
|
||||||
|
@@ -27,6 +27,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
AUD_NAMESPACE_BEGIN
|
AUD_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
24
extern/vulkan_memory_allocator/CMakeLists.txt
vendored
Normal file
24
extern/vulkan_memory_allocator/CMakeLists.txt
vendored
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
# Copyright 2022 Blender Foundation. All rights reserved.
|
||||||
|
|
||||||
|
set(INC
|
||||||
|
.
|
||||||
|
)
|
||||||
|
|
||||||
|
set(INC_SYS
|
||||||
|
${VULKAN_INCLUDE_DIRS}
|
||||||
|
)
|
||||||
|
|
||||||
|
set(SRC
|
||||||
|
vk_mem_alloc_impl.cc
|
||||||
|
|
||||||
|
vk_mem_alloc.h
|
||||||
|
)
|
||||||
|
|
||||||
|
blender_add_lib(extern_vulkan_memory_allocator "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
|
||||||
|
|
||||||
|
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||||
|
target_compile_options(extern_vulkan_memory_allocator
|
||||||
|
PRIVATE "-Wno-nullability-completeness"
|
||||||
|
)
|
||||||
|
endif()
|
19
extern/vulkan_memory_allocator/LICENSE.txt
vendored
Normal file
19
extern/vulkan_memory_allocator/LICENSE.txt
vendored
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
5
extern/vulkan_memory_allocator/README.blender
vendored
Normal file
5
extern/vulkan_memory_allocator/README.blender
vendored
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
Project: VulkanMemoryAllocator
|
||||||
|
URL: https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator
|
||||||
|
License: MIT
|
||||||
|
Upstream version: a6bfc23
|
||||||
|
Local modifications: None
|
175
extern/vulkan_memory_allocator/README.md
vendored
Normal file
175
extern/vulkan_memory_allocator/README.md
vendored
Normal file
@@ -0,0 +1,175 @@
|
|||||||
|
# Vulkan Memory Allocator
|
||||||
|
|
||||||
|
Easy to integrate Vulkan memory allocation library.
|
||||||
|
|
||||||
|
**Documentation:** Browse online: [Vulkan Memory Allocator](https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/) (generated from Doxygen-style comments in [include/vk_mem_alloc.h](include/vk_mem_alloc.h))
|
||||||
|
|
||||||
|
**License:** MIT. See [LICENSE.txt](LICENSE.txt)
|
||||||
|
|
||||||
|
**Changelog:** See [CHANGELOG.md](CHANGELOG.md)
|
||||||
|
|
||||||
|
**Product page:** [Vulkan Memory Allocator on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/)
|
||||||
|
|
||||||
|
**Build status:**
|
||||||
|
|
||||||
|
- Windows: [](https://ci.appveyor.com/project/adam-sawicki-amd/vulkanmemoryallocator/branch/master)
|
||||||
|
- Linux: [](https://app.travis-ci.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator)
|
||||||
|
|
||||||
|
[](http://isitmaintained.com/project/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator "Average time to resolve an issue")
|
||||||
|
|
||||||
|
# Problem
|
||||||
|
|
||||||
|
Memory allocation and resource (buffer and image) creation in Vulkan is difficult (comparing to older graphics APIs, like D3D11 or OpenGL) for several reasons:
|
||||||
|
|
||||||
|
- It requires a lot of boilerplate code, just like everything else in Vulkan, because it is a low-level and high-performance API.
|
||||||
|
- There is additional level of indirection: `VkDeviceMemory` is allocated separately from creating `VkBuffer`/`VkImage` and they must be bound together.
|
||||||
|
- Driver must be queried for supported memory heaps and memory types. Different GPU vendors provide different types of it.
|
||||||
|
- It is recommended to allocate bigger chunks of memory and assign parts of them to particular resources, as there is a limit on maximum number of memory blocks that can be allocated.
|
||||||
|
|
||||||
|
# Features
|
||||||
|
|
||||||
|
This library can help game developers to manage memory allocations and resource creation by offering some higher-level functions:
|
||||||
|
|
||||||
|
1. Functions that help to choose correct and optimal memory type based on intended usage of the memory.
|
||||||
|
- Required or preferred traits of the memory are expressed using higher-level description comparing to Vulkan flags.
|
||||||
|
2. Functions that allocate memory blocks, reserve and return parts of them (`VkDeviceMemory` + offset + size) to the user.
|
||||||
|
- Library keeps track of allocated memory blocks, used and unused ranges inside them, finds best matching unused ranges for new allocations, respects all the rules of alignment and buffer/image granularity.
|
||||||
|
3. Functions that can create an image/buffer, allocate memory for it and bind them together - all in one call.
|
||||||
|
|
||||||
|
Additional features:
|
||||||
|
|
||||||
|
- Well-documented - description of all functions and structures provided, along with chapters that contain general description and example code.
|
||||||
|
- Thread-safety: Library is designed to be used in multithreaded code. Access to a single device memory block referred by different buffers and textures (binding, mapping) is synchronized internally. Memory mapping is reference-counted.
|
||||||
|
- Configuration: Fill optional members of `VmaAllocatorCreateInfo` structure to provide custom CPU memory allocator, pointers to Vulkan functions and other parameters.
|
||||||
|
- Customization and integration with custom engines: Predefine appropriate macros to provide your own implementation of all external facilities used by the library like assert, mutex, atomic.
|
||||||
|
- Support for memory mapping, reference-counted internally. Support for persistently mapped memory: Just allocate with appropriate flag and access the pointer to already mapped memory.
|
||||||
|
- Support for non-coherent memory. Functions that flush/invalidate memory. `nonCoherentAtomSize` is respected automatically.
|
||||||
|
- Support for resource aliasing (overlap).
|
||||||
|
- Support for sparse binding and sparse residency: Convenience functions that allocate or free multiple memory pages at once.
|
||||||
|
- Custom memory pools: Create a pool with desired parameters (e.g. fixed or limited maximum size) and allocate memory out of it.
|
||||||
|
- Linear allocator: Create a pool with linear algorithm and use it for much faster allocations and deallocations in free-at-once, stack, double stack, or ring buffer fashion.
|
||||||
|
- Support for Vulkan 1.0, 1.1, 1.2, 1.3.
|
||||||
|
- Support for extensions (and equivalent functionality included in new Vulkan versions):
|
||||||
|
- VK_KHR_dedicated_allocation: Just enable it and it will be used automatically by the library.
|
||||||
|
- VK_KHR_buffer_device_address: Flag `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR` is automatically added to memory allocations where needed.
|
||||||
|
- VK_EXT_memory_budget: Used internally if available to query for current usage and budget. If not available, it falls back to an estimation based on memory heap sizes.
|
||||||
|
- VK_EXT_memory_priority: Set `priority` of allocations or custom pools and it will be set automatically using this extension.
|
||||||
|
- VK_AMD_device_coherent_memory
|
||||||
|
- Defragmentation of GPU and CPU memory: Let the library move data around to free some memory blocks and make your allocations better compacted.
|
||||||
|
- Statistics: Obtain brief or detailed statistics about the amount of memory used, unused, number of allocated blocks, number of allocations etc. - globally, per memory heap, and per memory type.
|
||||||
|
- Debug annotations: Associate custom `void* pUserData` and debug `char* pName` with each allocation.
|
||||||
|
- JSON dump: Obtain a string in JSON format with detailed map of internal state, including list of allocations, their string names, and gaps between them.
|
||||||
|
- Convert this JSON dump into a picture to visualize your memory. See [tools/GpuMemDumpVis](tools/GpuMemDumpVis/README.md).
|
||||||
|
- Debugging incorrect memory usage: Enable initialization of all allocated memory with a bit pattern to detect usage of uninitialized or freed memory. Enable validation of a magic number after every allocation to detect out-of-bounds memory corruption.
|
||||||
|
- Support for interoperability with OpenGL.
|
||||||
|
- Virtual allocator: Interface for using core allocation algorithm to allocate any custom data, e.g. pieces of one large buffer.
|
||||||
|
|
||||||
|
# Prerequisites
|
||||||
|
|
||||||
|
- Self-contained C++ library in single header file. No external dependencies other than standard C and C++ library and of course Vulkan. Some features of C++14 used. STL containers, RTTI, or C++ exceptions are not used.
|
||||||
|
- Public interface in C, in same convention as Vulkan API. Implementation in C++.
|
||||||
|
- Error handling implemented by returning `VkResult` error codes - same way as in Vulkan.
|
||||||
|
- Interface documented using Doxygen-style comments.
|
||||||
|
- Platform-independent, but developed and tested on Windows using Visual Studio. Continuous integration setup for Windows and Linux. Used also on Android, MacOS, and other platforms.
|
||||||
|
|
||||||
|
# Example
|
||||||
|
|
||||||
|
Basic usage of this library is very simple. Advanced features are optional. After you created global `VmaAllocator` object, a complete code needed to create a buffer may look like this:
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
|
||||||
|
bufferInfo.size = 65536;
|
||||||
|
bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||||
|
|
||||||
|
VmaAllocationCreateInfo allocInfo = {};
|
||||||
|
allocInfo.usage = VMA_MEMORY_USAGE_AUTO;
|
||||||
|
|
||||||
|
VkBuffer buffer;
|
||||||
|
VmaAllocation allocation;
|
||||||
|
vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr);
|
||||||
|
```
|
||||||
|
|
||||||
|
With this one function call:
|
||||||
|
|
||||||
|
1. `VkBuffer` is created.
|
||||||
|
2. `VkDeviceMemory` block is allocated if needed.
|
||||||
|
3. An unused region of the memory block is bound to this buffer.
|
||||||
|
|
||||||
|
`VmaAllocation` is an object that represents memory assigned to this buffer. It can be queried for parameters like `VkDeviceMemory` handle and offset.
|
||||||
|
|
||||||
|
# How to build
|
||||||
|
|
||||||
|
On Windows it is recommended to use [CMake UI](https://cmake.org/runningcmake/). Alternatively you can generate a Visual Studio project map using CMake in command line: `cmake -B./build/ -DCMAKE_BUILD_TYPE=Debug -G "Visual Studio 16 2019" -A x64 ./`
|
||||||
|
|
||||||
|
On Linux:
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake ..
|
||||||
|
make
|
||||||
|
```
|
||||||
|
|
||||||
|
The following targets are available
|
||||||
|
|
||||||
|
| Target | Description | CMake option | Default setting |
|
||||||
|
| ------------- | ------------- | ------------- | ------------- |
|
||||||
|
| VmaSample | VMA sample application | `VMA_BUILD_SAMPLE` | `OFF` |
|
||||||
|
| VmaBuildSampleShaders | Shaders for VmaSample | `VMA_BUILD_SAMPLE_SHADERS` | `OFF` |
|
||||||
|
|
||||||
|
Please note that while VulkanMemoryAllocator library is supported on other platforms besides Windows, VmaSample is not.
|
||||||
|
|
||||||
|
These CMake options are available
|
||||||
|
|
||||||
|
| CMake option | Description | Default setting |
|
||||||
|
| ------------- | ------------- | ------------- |
|
||||||
|
| `VMA_RECORDING_ENABLED` | Enable VMA memory recording for debugging | `OFF` |
|
||||||
|
| `VMA_USE_STL_CONTAINERS` | Use C++ STL containers instead of VMA's containers | `OFF` |
|
||||||
|
| `VMA_STATIC_VULKAN_FUNCTIONS` | Link statically with Vulkan API | `OFF` |
|
||||||
|
| `VMA_DYNAMIC_VULKAN_FUNCTIONS` | Fetch pointers to Vulkan functions internally (no static linking) | `ON` |
|
||||||
|
| `VMA_DEBUG_ALWAYS_DEDICATED_MEMORY` | Every allocation will have its own memory block | `OFF` |
|
||||||
|
| `VMA_DEBUG_INITIALIZE_ALLOCATIONS` | Automatically fill new allocations and destroyed allocations with some bit pattern | `OFF` |
|
||||||
|
| `VMA_DEBUG_GLOBAL_MUTEX` | Enable single mutex protecting all entry calls to the library | `OFF` |
|
||||||
|
| `VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT` | Never exceed [VkPhysicalDeviceLimits::maxMemoryAllocationCount](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#limits-maxMemoryAllocationCount) and return error | `OFF` |
|
||||||
|
|
||||||
|
# Binaries
|
||||||
|
|
||||||
|
The release comes with precompiled binary executable for "VulkanSample" application which contains test suite. It is compiled using Visual Studio 2019, so it requires appropriate libraries to work, including "MSVCP140.dll", "VCRUNTIME140.dll", "VCRUNTIME140_1.dll". If the launch fails with error message telling about those files missing, please download and install [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads), "x64" version.
|
||||||
|
|
||||||
|
# Read more
|
||||||
|
|
||||||
|
See **[Documentation](https://gpuopen-librariesandsdks.github.io/VulkanMemoryAllocator/html/)**.
|
||||||
|
|
||||||
|
# Software using this library
|
||||||
|
|
||||||
|
- **[X-Plane](https://x-plane.com/)**
|
||||||
|
- **[Detroit: Become Human](https://gpuopen.com/learn/porting-detroit-3/)**
|
||||||
|
- **[Vulkan Samples](https://github.com/LunarG/VulkanSamples)** - official Khronos Vulkan samples. License: Apache-style.
|
||||||
|
- **[Anvil](https://github.com/GPUOpen-LibrariesAndSDKs/Anvil)** - cross-platform framework for Vulkan. License: MIT.
|
||||||
|
- **[Filament](https://github.com/google/filament)** - physically based rendering engine for Android, Windows, Linux and macOS, from Google. Apache License 2.0.
|
||||||
|
- **[Atypical Games - proprietary game engine](https://developer.samsung.com/galaxy-gamedev/gamedev-blog/infinitejet.html)**
|
||||||
|
- **[Flax Engine](https://flaxengine.com/)**
|
||||||
|
- **[Godot Engine](https://github.com/godotengine/godot/)** - multi-platform 2D and 3D game engine. License: MIT.
|
||||||
|
- **[Lightweight Java Game Library (LWJGL)](https://www.lwjgl.org/)** - includes binding of the library for Java. License: BSD.
|
||||||
|
- **[PowerVR SDK](https://github.com/powervr-graphics/Native_SDK)** - C++ cross-platform 3D graphics SDK, from Imagination. License: MIT.
|
||||||
|
- **[Skia](https://github.com/google/skia)** - complete 2D graphic library for drawing Text, Geometries, and Images, from Google.
|
||||||
|
- **[The Forge](https://github.com/ConfettiFX/The-Forge)** - cross-platform rendering framework. Apache License 2.0.
|
||||||
|
- **[VK9](https://github.com/disks86/VK9)** - Direct3D 9 compatibility layer using Vulkan. Zlib lincese.
|
||||||
|
- **[vkDOOM3](https://github.com/DustinHLand/vkDOOM3)** - Vulkan port of GPL DOOM 3 BFG Edition. License: GNU GPL.
|
||||||
|
- **[vkQuake2](https://github.com/kondrak/vkQuake2)** - vanilla Quake 2 with Vulkan support. License: GNU GPL.
|
||||||
|
- **[Vulkan Best Practice for Mobile Developers](https://github.com/ARM-software/vulkan_best_practice_for_mobile_developers)** from ARM. License: MIT.
|
||||||
|
- **[RPCS3](https://github.com/RPCS3/rpcs3)** - PlayStation 3 emulator/debugger. License: GNU GPLv2.
|
||||||
|
- **[PPSSPP](https://github.com/hrydgard/ppsspp)** - Playstation Portable emulator/debugger. License: GNU GPLv2+.
|
||||||
|
|
||||||
|
[Many other projects on GitHub](https://github.com/search?q=AMD_VULKAN_MEMORY_ALLOCATOR_H&type=Code) and some game development studios that use Vulkan in their games.
|
||||||
|
|
||||||
|
# See also
|
||||||
|
|
||||||
|
- **[D3D12 Memory Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator)** - equivalent library for Direct3D 12. License: MIT.
|
||||||
|
- **[Awesome Vulkan](https://github.com/vinjn/awesome-vulkan)** - a curated list of awesome Vulkan libraries, debuggers and resources.
|
||||||
|
- **[VulkanMemoryAllocator-Hpp](https://github.com/malte-v/VulkanMemoryAllocator-Hpp)** - C++ binding for this library. License: CC0-1.0.
|
||||||
|
- **[PyVMA](https://github.com/realitix/pyvma)** - Python wrapper for this library. Author: Jean-Sébastien B. (@realitix). License: Apache 2.0.
|
||||||
|
- **[vk-mem](https://github.com/gwihlidal/vk-mem-rs)** - Rust binding for this library. Author: Graham Wihlidal. License: Apache 2.0 or MIT.
|
||||||
|
- **[Haskell bindings](https://hackage.haskell.org/package/VulkanMemoryAllocator)**, **[github](https://github.com/expipiplus1/vulkan/tree/master/VulkanMemoryAllocator)** - Haskell bindings for this library. Author: Ellie Hermaszewska (@expipiplus1). License BSD-3-Clause.
|
||||||
|
- **[vma_sample_sdl](https://github.com/rextimmy/vma_sample_sdl)** - SDL port of the sample app of this library (with the goal of running it on multiple platforms, including MacOS). Author: @rextimmy. License: MIT.
|
||||||
|
- **[vulkan-malloc](https://github.com/dylanede/vulkan-malloc)** - Vulkan memory allocation library for Rust. Based on version 1 of this library. Author: Dylan Ede (@dylanede). License: MIT / Apache 2.0.
|
19558
extern/vulkan_memory_allocator/vk_mem_alloc.h
vendored
Normal file
19558
extern/vulkan_memory_allocator/vk_mem_alloc.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
12
extern/vulkan_memory_allocator/vk_mem_alloc_impl.cc
vendored
Normal file
12
extern/vulkan_memory_allocator/vk_mem_alloc_impl.cc
vendored
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
* Copyright 2022 Blender Foundation. All rights reserved. */
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
# include <MoltenVK/vk_mvk_moltenvk.h>
|
||||||
|
#else
|
||||||
|
# include <vulkan/vulkan.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define VMA_IMPLEMENTATION
|
||||||
|
|
||||||
|
#include "vk_mem_alloc.h"
|
@@ -253,6 +253,33 @@ if(WITH_CYCLES_OSL)
|
|||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX)
|
||||||
|
add_definitions(-DWITH_CUDA)
|
||||||
|
|
||||||
|
if(WITH_CUDA_DYNLOAD)
|
||||||
|
include_directories(
|
||||||
|
../../extern/cuew/include
|
||||||
|
)
|
||||||
|
add_definitions(-DWITH_CUDA_DYNLOAD)
|
||||||
|
else()
|
||||||
|
include_directories(
|
||||||
|
SYSTEM
|
||||||
|
${CUDA_TOOLKIT_INCLUDE}
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(WITH_CYCLES_DEVICE_HIP)
|
||||||
|
add_definitions(-DWITH_HIP)
|
||||||
|
|
||||||
|
if(WITH_HIP_DYNLOAD)
|
||||||
|
include_directories(
|
||||||
|
../../extern/hipew/include
|
||||||
|
)
|
||||||
|
add_definitions(-DWITH_HIP_DYNLOAD)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_DEVICE_OPTIX)
|
if(WITH_CYCLES_DEVICE_OPTIX)
|
||||||
find_package(OptiX 7.3.0)
|
find_package(OptiX 7.3.0)
|
||||||
|
|
||||||
@@ -261,12 +288,16 @@ if(WITH_CYCLES_DEVICE_OPTIX)
|
|||||||
include_directories(
|
include_directories(
|
||||||
SYSTEM
|
SYSTEM
|
||||||
${OPTIX_INCLUDE_DIR}
|
${OPTIX_INCLUDE_DIR}
|
||||||
)
|
)
|
||||||
else()
|
else()
|
||||||
set_and_warn_library_found("OptiX" OPTIX_FOUND WITH_CYCLES_DEVICE_OPTIX)
|
set_and_warn_library_found("OptiX" OPTIX_FOUND WITH_CYCLES_DEVICE_OPTIX)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(WITH_CYCLES_DEVICE_METAL)
|
||||||
|
add_definitions(-DWITH_METAL)
|
||||||
|
endif()
|
||||||
|
|
||||||
if (WITH_CYCLES_DEVICE_ONEAPI)
|
if (WITH_CYCLES_DEVICE_ONEAPI)
|
||||||
add_definitions(-DWITH_ONEAPI)
|
add_definitions(-DWITH_ONEAPI)
|
||||||
endif()
|
endif()
|
||||||
@@ -392,7 +423,7 @@ if(WITH_CYCLES_HYDRA_RENDER_DELEGATE AND (NOT WITH_BLENDER) AND (NOT WITH_CYCLES
|
|||||||
set(CYCLES_INSTALL_PATH ${CYCLES_INSTALL_PATH}/hdCycles/resources)
|
set(CYCLES_INSTALL_PATH ${CYCLES_INSTALL_PATH}/hdCycles/resources)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
|
if(WITH_CYCLES_CUDA_BINARIES)
|
||||||
if(MSVC)
|
if(MSVC)
|
||||||
set(MAX_MSVC 1800)
|
set(MAX_MSVC 1800)
|
||||||
if(${CUDA_VERSION} EQUAL "8.0")
|
if(${CUDA_VERSION} EQUAL "8.0")
|
||||||
@@ -404,24 +435,7 @@ if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER))
|
|||||||
elseif(${CUDA_VERSION} VERSION_GREATER_EQUAL 10.0)
|
elseif(${CUDA_VERSION} VERSION_GREATER_EQUAL 10.0)
|
||||||
set(MAX_MSVC 1999)
|
set(MAX_MSVC 1999)
|
||||||
endif()
|
endif()
|
||||||
if(NOT MSVC_VERSION LESS ${MAX_MSVC} OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
|
||||||
message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
|
|
||||||
set(WITH_CYCLES_CUBIN_COMPILER ON)
|
|
||||||
endif()
|
|
||||||
unset(MAX_MSVC)
|
unset(MAX_MSVC)
|
||||||
elseif(APPLE)
|
|
||||||
if(NOT (${XCODE_VERSION} VERSION_LESS 10.0))
|
|
||||||
message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.")
|
|
||||||
set(WITH_CYCLES_CUBIN_COMPILER ON)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# NVRTC gives wrong rendering result in CUDA 10.0, so we must use NVCC.
|
|
||||||
if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER AND NOT WITH_CYCLES_CUBIN_COMPILER_OVERRRIDE)
|
|
||||||
if(NOT (${CUDA_VERSION} VERSION_LESS 10.0))
|
|
||||||
message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.")
|
|
||||||
set(WITH_CYCLES_CUBIN_COMPILER OFF)
|
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@@ -103,32 +103,3 @@ if(WITH_CYCLES_STANDALONE)
|
|||||||
$<TARGET_FILE:cycles>
|
$<TARGET_FILE:cycles>
|
||||||
DESTINATION ${CMAKE_INSTALL_PREFIX})
|
DESTINATION ${CMAKE_INSTALL_PREFIX})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
#####################################################################
|
|
||||||
# Cycles cubin compiler executable
|
|
||||||
#####################################################################
|
|
||||||
|
|
||||||
if(WITH_CYCLES_CUBIN_COMPILER)
|
|
||||||
# 32 bit windows is special, nvrtc is not supported on x86, so even
|
|
||||||
# though we are building 32 bit blender a 64 bit cubin_cc will have
|
|
||||||
# to be build to compile the cubins.
|
|
||||||
if(MSVC AND NOT CMAKE_CL_64)
|
|
||||||
message("Building with CUDA not supported on 32 bit, skipped")
|
|
||||||
set(WITH_CYCLES_CUDA_BINARIES OFF CACHE BOOL "" FORCE)
|
|
||||||
else()
|
|
||||||
set(SRC
|
|
||||||
cycles_cubin_cc.cpp
|
|
||||||
)
|
|
||||||
set(INC
|
|
||||||
../../../extern/cuew/include
|
|
||||||
)
|
|
||||||
set(LIB
|
|
||||||
)
|
|
||||||
cycles_external_libraries_append(LIB)
|
|
||||||
add_executable(cycles_cubin_cc ${SRC})
|
|
||||||
include_directories(${INC})
|
|
||||||
target_link_libraries(cycles_cubin_cc PRIVATE ${LIB})
|
|
||||||
unset(SRC)
|
|
||||||
unset(INC)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
@@ -1,311 +0,0 @@
|
|||||||
/* SPDX-License-Identifier: Apache-2.0
|
|
||||||
* Copyright 2017-2022 Blender Foundation */
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include <OpenImageIO/argparse.h>
|
|
||||||
#include <OpenImageIO/filesystem.h>
|
|
||||||
|
|
||||||
#include "cuew.h"
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
# include <Windows.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
using std::string;
|
|
||||||
using std::vector;
|
|
||||||
|
|
||||||
namespace std {
|
|
||||||
template<typename T> std::string to_string(const T &n)
|
|
||||||
{
|
|
||||||
std::ostringstream s;
|
|
||||||
s << n;
|
|
||||||
return s.str();
|
|
||||||
}
|
|
||||||
} // namespace std
|
|
||||||
|
|
||||||
class CompilationSettings {
|
|
||||||
public:
|
|
||||||
CompilationSettings()
|
|
||||||
: target_arch(0), bits(64), verbose(false), fast_math(false), ptx_only(false)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
string cuda_toolkit_dir;
|
|
||||||
string input_file;
|
|
||||||
string output_file;
|
|
||||||
string ptx_file;
|
|
||||||
vector<string> defines;
|
|
||||||
vector<string> includes;
|
|
||||||
int target_arch;
|
|
||||||
int bits;
|
|
||||||
bool verbose;
|
|
||||||
bool fast_math;
|
|
||||||
bool ptx_only;
|
|
||||||
};
|
|
||||||
|
|
||||||
static bool compile_cuda(CompilationSettings &settings)
|
|
||||||
{
|
|
||||||
const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h", "stddef.h"};
|
|
||||||
const char *header_content[] = {"\n", "\n", "\n", "\n", "\n"};
|
|
||||||
|
|
||||||
printf("Building %s\n", settings.input_file.c_str());
|
|
||||||
|
|
||||||
string code;
|
|
||||||
if (!OIIO::Filesystem::read_text_file(settings.input_file, code)) {
|
|
||||||
fprintf(stderr, "Error: unable to read %s\n", settings.input_file.c_str());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
vector<string> options;
|
|
||||||
for (size_t i = 0; i < settings.includes.size(); i++) {
|
|
||||||
options.push_back("-I" + settings.includes[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0; i < settings.defines.size(); i++) {
|
|
||||||
options.push_back("-D" + settings.defines[i]);
|
|
||||||
}
|
|
||||||
options.push_back("-D__KERNEL_CUDA_VERSION__=" + std::to_string(cuewNvrtcVersion()));
|
|
||||||
options.push_back("-arch=compute_" + std::to_string(settings.target_arch));
|
|
||||||
options.push_back("--device-as-default-execution-space");
|
|
||||||
options.push_back("-DCYCLES_CUBIN_CC");
|
|
||||||
options.push_back("--std=c++11");
|
|
||||||
if (settings.fast_math)
|
|
||||||
options.push_back("--use_fast_math");
|
|
||||||
|
|
||||||
nvrtcProgram prog;
|
|
||||||
nvrtcResult result = nvrtcCreateProgram(&prog,
|
|
||||||
code.c_str(), // buffer
|
|
||||||
NULL, // name
|
|
||||||
sizeof(headers) / sizeof(void *), // numHeaders
|
|
||||||
header_content, // headers
|
|
||||||
headers); // includeNames
|
|
||||||
|
|
||||||
if (result != NVRTC_SUCCESS) {
|
|
||||||
fprintf(stderr, "Error: nvrtcCreateProgram failed (%d)\n\n", (int)result);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Transfer options to a classic C array. */
|
|
||||||
vector<const char *> opts(options.size());
|
|
||||||
for (size_t i = 0; i < options.size(); i++) {
|
|
||||||
opts[i] = options[i].c_str();
|
|
||||||
}
|
|
||||||
|
|
||||||
result = nvrtcCompileProgram(prog, options.size(), &opts[0]);
|
|
||||||
|
|
||||||
if (result != NVRTC_SUCCESS) {
|
|
||||||
fprintf(stderr, "Error: nvrtcCompileProgram failed (%d)\n\n", (int)result);
|
|
||||||
|
|
||||||
size_t log_size;
|
|
||||||
nvrtcGetProgramLogSize(prog, &log_size);
|
|
||||||
|
|
||||||
vector<char> log(log_size);
|
|
||||||
nvrtcGetProgramLog(prog, &log[0]);
|
|
||||||
fprintf(stderr, "%s\n", &log[0]);
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Retrieve the ptx code. */
|
|
||||||
size_t ptx_size;
|
|
||||||
result = nvrtcGetPTXSize(prog, &ptx_size);
|
|
||||||
if (result != NVRTC_SUCCESS) {
|
|
||||||
fprintf(stderr, "Error: nvrtcGetPTXSize failed (%d)\n\n", (int)result);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
vector<char> ptx_code(ptx_size);
|
|
||||||
result = nvrtcGetPTX(prog, &ptx_code[0]);
|
|
||||||
if (result != NVRTC_SUCCESS) {
|
|
||||||
fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (settings.ptx_only) {
|
|
||||||
settings.ptx_file = settings.output_file;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* Write a file in the temp folder with the ptx code. */
|
|
||||||
settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" +
|
|
||||||
OIIO::Filesystem::unique_path();
|
|
||||||
}
|
|
||||||
FILE *f = fopen(settings.ptx_file.c_str(), "wb");
|
|
||||||
fwrite(&ptx_code[0], 1, ptx_size, f);
|
|
||||||
fclose(f);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool link_ptxas(CompilationSettings &settings)
|
|
||||||
{
|
|
||||||
string cudapath = "";
|
|
||||||
if (settings.cuda_toolkit_dir.size())
|
|
||||||
cudapath = settings.cuda_toolkit_dir + "/bin/";
|
|
||||||
|
|
||||||
string ptx = "\"" + cudapath + "ptxas\" " + settings.ptx_file + " -o " + settings.output_file +
|
|
||||||
" --gpu-name sm_" + std::to_string(settings.target_arch) + " -m" +
|
|
||||||
std::to_string(settings.bits);
|
|
||||||
|
|
||||||
if (settings.verbose) {
|
|
||||||
ptx += " --verbose";
|
|
||||||
printf("%s\n", ptx.c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
int pxresult = system(ptx.c_str());
|
|
||||||
if (pxresult) {
|
|
||||||
fprintf(stderr, "Error: ptxas failed (%d)\n\n", pxresult);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!OIIO::Filesystem::remove(settings.ptx_file)) {
|
|
||||||
fprintf(stderr, "Error: removing %s\n\n", settings.ptx_file.c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool init(CompilationSettings &settings)
|
|
||||||
{
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
if (settings.cuda_toolkit_dir.size()) {
|
|
||||||
SetDllDirectory((settings.cuda_toolkit_dir + "/bin").c_str());
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
(void)settings;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int cuewresult = cuewInit(CUEW_INIT_NVRTC);
|
|
||||||
if (cuewresult != CUEW_SUCCESS) {
|
|
||||||
fprintf(stderr, "Error: cuew init fialed (0x%d)\n\n", cuewresult);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cuewNvrtcVersion() < 80) {
|
|
||||||
fprintf(stderr, "Error: only cuda 8 and higher is supported, %d\n\n", cuewCompilerVersion());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!nvrtcCreateProgram) {
|
|
||||||
fprintf(stderr, "Error: nvrtcCreateProgram not resolved\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!nvrtcCompileProgram) {
|
|
||||||
fprintf(stderr, "Error: nvrtcCompileProgram not resolved\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!nvrtcGetProgramLogSize) {
|
|
||||||
fprintf(stderr, "Error: nvrtcGetProgramLogSize not resolved\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!nvrtcGetProgramLog) {
|
|
||||||
fprintf(stderr, "Error: nvrtcGetProgramLog not resolved\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!nvrtcGetPTXSize) {
|
|
||||||
fprintf(stderr, "Error: nvrtcGetPTXSize not resolved\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!nvrtcGetPTX) {
|
|
||||||
fprintf(stderr, "Error: nvrtcGetPTX not resolved\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool parse_parameters(int argc, const char **argv, CompilationSettings &settings)
|
|
||||||
{
|
|
||||||
OIIO::ArgParse ap;
|
|
||||||
ap.options("Usage: cycles_cubin_cc [options]",
|
|
||||||
"-target %d",
|
|
||||||
&settings.target_arch,
|
|
||||||
"target shader model",
|
|
||||||
"-m %d",
|
|
||||||
&settings.bits,
|
|
||||||
"Cuda architecture bits",
|
|
||||||
"-i %s",
|
|
||||||
&settings.input_file,
|
|
||||||
"Input source filename",
|
|
||||||
"-o %s",
|
|
||||||
&settings.output_file,
|
|
||||||
"Output cubin filename",
|
|
||||||
"-I %L",
|
|
||||||
&settings.includes,
|
|
||||||
"Add additional includepath",
|
|
||||||
"-D %L",
|
|
||||||
&settings.defines,
|
|
||||||
"Add additional defines",
|
|
||||||
"-ptx",
|
|
||||||
&settings.ptx_only,
|
|
||||||
"emit PTX code",
|
|
||||||
"-v",
|
|
||||||
&settings.verbose,
|
|
||||||
"Use verbose logging",
|
|
||||||
"--use_fast_math",
|
|
||||||
&settings.fast_math,
|
|
||||||
"Use fast math",
|
|
||||||
"-cuda-toolkit-dir %s",
|
|
||||||
&settings.cuda_toolkit_dir,
|
|
||||||
"path to the cuda toolkit binary directory",
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
if (ap.parse(argc, argv) < 0) {
|
|
||||||
fprintf(stderr, "%s\n", ap.geterror().c_str());
|
|
||||||
ap.usage();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!settings.output_file.size()) {
|
|
||||||
fprintf(stderr, "Error: Output file not set(-o), required\n\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!settings.input_file.size()) {
|
|
||||||
fprintf(stderr, "Error: Input file not set(-i, required\n\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!settings.target_arch) {
|
|
||||||
fprintf(stderr, "Error: target shader model not set (-target), required\n\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, const char **argv)
|
|
||||||
{
|
|
||||||
CompilationSettings settings;
|
|
||||||
|
|
||||||
if (!parse_parameters(argc, argv, settings)) {
|
|
||||||
fprintf(stderr, "Error: invalid parameters, exiting\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!init(settings)) {
|
|
||||||
fprintf(stderr, "Error: initialization error, exiting\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!compile_cuda(settings)) {
|
|
||||||
fprintf(stderr, "Error: compilation error, exiting\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!settings.ptx_only) {
|
|
||||||
if (!link_ptxas(settings)) {
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
@@ -58,7 +58,7 @@ class CyclesRender(bpy.types.RenderEngine):
|
|||||||
if not self.session:
|
if not self.session:
|
||||||
if self.is_preview:
|
if self.is_preview:
|
||||||
cscene = bpy.context.scene.cycles
|
cscene = bpy.context.scene.cycles
|
||||||
use_osl = cscene.shading_system and cscene.device == 'CPU'
|
use_osl = cscene.shading_system
|
||||||
|
|
||||||
engine.create(self, data, preview_osl=use_osl)
|
engine.create(self, data, preview_osl=use_osl)
|
||||||
else:
|
else:
|
||||||
|
@@ -156,6 +156,11 @@ def with_osl():
|
|||||||
return _cycles.with_osl
|
return _cycles.with_osl
|
||||||
|
|
||||||
|
|
||||||
|
def osl_version():
|
||||||
|
import _cycles
|
||||||
|
return _cycles.osl_version
|
||||||
|
|
||||||
|
|
||||||
def with_path_guiding():
|
def with_path_guiding():
|
||||||
import _cycles
|
import _cycles
|
||||||
return _cycles.with_path_guiding
|
return _cycles.with_path_guiding
|
||||||
|
@@ -114,7 +114,7 @@ class CYCLES_OT_denoise_animation(Operator):
|
|||||||
|
|
||||||
|
|
||||||
class CYCLES_OT_merge_images(Operator):
|
class CYCLES_OT_merge_images(Operator):
|
||||||
"Combine OpenEXR multilayer images rendered with different sample " \
|
"Combine OpenEXR multi-layer images rendered with different sample " \
|
||||||
"ranges into one image with reduced noise"
|
"ranges into one image with reduced noise"
|
||||||
bl_idname = "cycles.merge_images"
|
bl_idname = "cycles.merge_images"
|
||||||
bl_label = "Merge Images"
|
bl_label = "Merge Images"
|
||||||
|
@@ -91,7 +91,7 @@ class AddPresetPerformance(AddPresetBase, Operator):
|
|||||||
preset_menu = "CYCLES_PT_performance_presets"
|
preset_menu = "CYCLES_PT_performance_presets"
|
||||||
|
|
||||||
preset_defines = [
|
preset_defines = [
|
||||||
"render = bpy.context.scene.render"
|
"render = bpy.context.scene.render",
|
||||||
"cycles = bpy.context.scene.cycles"
|
"cycles = bpy.context.scene.cycles"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@@ -290,7 +290,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
|||||||
)
|
)
|
||||||
shading_system: BoolProperty(
|
shading_system: BoolProperty(
|
||||||
name="Open Shading Language",
|
name="Open Shading Language",
|
||||||
description="Use Open Shading Language (CPU rendering only)",
|
description="Use Open Shading Language",
|
||||||
)
|
)
|
||||||
|
|
||||||
preview_pause: BoolProperty(
|
preview_pause: BoolProperty(
|
||||||
|
@@ -154,8 +154,9 @@ def use_mnee(context):
|
|||||||
# The MNEE kernel doesn't compile on macOS < 13.
|
# The MNEE kernel doesn't compile on macOS < 13.
|
||||||
if use_metal(context):
|
if use_metal(context):
|
||||||
import platform
|
import platform
|
||||||
v, _, _ = platform.mac_ver()
|
version, _, _ = platform.mac_ver()
|
||||||
if float(v) < 13.0:
|
major_version = version.split(".")[0]
|
||||||
|
if int(major_version) < 13:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -313,10 +314,11 @@ class CYCLES_RENDER_PT_sampling_path_guiding(CyclesButtonsPanel, Panel):
|
|||||||
layout.use_property_decorate = False
|
layout.use_property_decorate = False
|
||||||
layout.active = cscene.use_guiding
|
layout.active = cscene.use_guiding
|
||||||
|
|
||||||
|
layout.prop(cscene, "guiding_training_samples")
|
||||||
|
|
||||||
col = layout.column(align=True)
|
col = layout.column(align=True)
|
||||||
col.prop(cscene, "use_surface_guiding")
|
col.prop(cscene, "use_surface_guiding", text="Surface")
|
||||||
col.prop(cscene, "use_volume_guiding")
|
col.prop(cscene, "use_volume_guiding", text="Volume")
|
||||||
col.prop(cscene, "guiding_training_samples")
|
|
||||||
|
|
||||||
|
|
||||||
class CYCLES_RENDER_PT_sampling_path_guiding_debug(CyclesDebugButtonsPanel, Panel):
|
class CYCLES_RENDER_PT_sampling_path_guiding_debug(CyclesDebugButtonsPanel, Panel):
|
||||||
@@ -2305,7 +2307,10 @@ def draw_device(self, context):
|
|||||||
col.prop(cscene, "device")
|
col.prop(cscene, "device")
|
||||||
|
|
||||||
from . import engine
|
from . import engine
|
||||||
if engine.with_osl() and use_cpu(context):
|
if engine.with_osl() and (
|
||||||
|
use_cpu(context) or
|
||||||
|
(use_optix(context) and (engine.osl_version()[1] >= 13 or engine.osl_version()[0] > 1))
|
||||||
|
):
|
||||||
col.prop(cscene, "shading_system")
|
col.prop(cscene, "shading_system")
|
||||||
|
|
||||||
|
|
||||||
|
@@ -72,6 +72,11 @@ bool BlenderImageLoader::load_metadata(const ImageDeviceFeatures &, ImageMetaDat
|
|||||||
metadata.colorspace = u_colorspace_raw;
|
metadata.colorspace = u_colorspace_raw;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
/* In some cases (e.g. T94135), the colorspace setting in Blender gets updated as part of the
|
||||||
|
* metadata queries in this function, so update the colorspace setting here. */
|
||||||
|
PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
|
||||||
|
metadata.colorspace = get_enum_identifier(colorspace_ptr, "name");
|
||||||
|
|
||||||
if (metadata.channels == 1) {
|
if (metadata.channels == 1) {
|
||||||
metadata.type = IMAGE_DATA_TYPE_BYTE;
|
metadata.type = IMAGE_DATA_TYPE_BYTE;
|
||||||
}
|
}
|
||||||
|
@@ -478,6 +478,7 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
|
|||||||
|
|
||||||
/* Read metadata. */
|
/* Read metadata. */
|
||||||
bool is_bool_param = false;
|
bool is_bool_param = false;
|
||||||
|
bool hide_value = !param->validdefault;
|
||||||
ustring param_label = param->name;
|
ustring param_label = param->name;
|
||||||
|
|
||||||
for (const OSL::OSLQuery::Parameter &metadata : param->metadata) {
|
for (const OSL::OSLQuery::Parameter &metadata : param->metadata) {
|
||||||
@@ -487,6 +488,9 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
|
|||||||
if (metadata.sdefault[0] == "boolean" || metadata.sdefault[0] == "checkBox") {
|
if (metadata.sdefault[0] == "boolean" || metadata.sdefault[0] == "checkBox") {
|
||||||
is_bool_param = true;
|
is_bool_param = true;
|
||||||
}
|
}
|
||||||
|
else if (metadata.sdefault[0] == "null") {
|
||||||
|
hide_value = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (metadata.name == "label") {
|
else if (metadata.name == "label") {
|
||||||
/* Socket label. */
|
/* Socket label. */
|
||||||
@@ -596,6 +600,9 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
|
|||||||
if (b_sock.name() != param_label) {
|
if (b_sock.name() != param_label) {
|
||||||
b_sock.name(param_label.string());
|
b_sock.name(param_label.string());
|
||||||
}
|
}
|
||||||
|
if (b_sock.hide_value() != hide_value) {
|
||||||
|
b_sock.hide_value(hide_value);
|
||||||
|
}
|
||||||
used_sockets.insert(b_sock.ptr.data);
|
used_sockets.insert(b_sock.ptr.data);
|
||||||
found_existing = true;
|
found_existing = true;
|
||||||
}
|
}
|
||||||
@@ -635,6 +642,8 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
|
|||||||
set_boolean(b_sock.ptr, "default_value", default_boolean);
|
set_boolean(b_sock.ptr, "default_value", default_boolean);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
b_sock.hide_value(hide_value);
|
||||||
|
|
||||||
used_sockets.insert(b_sock.ptr.data);
|
used_sockets.insert(b_sock.ptr.data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -8,28 +8,13 @@ set(INC
|
|||||||
set(INC_SYS )
|
set(INC_SYS )
|
||||||
|
|
||||||
if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
|
if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
|
||||||
if(WITH_CUDA_DYNLOAD)
|
if(NOT WITH_CUDA_DYNLOAD)
|
||||||
list(APPEND INC
|
|
||||||
../../../extern/cuew/include
|
|
||||||
)
|
|
||||||
add_definitions(-DWITH_CUDA_DYNLOAD)
|
|
||||||
else()
|
|
||||||
list(APPEND INC_SYS
|
|
||||||
${CUDA_TOOLKIT_INCLUDE}
|
|
||||||
)
|
|
||||||
add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
|
add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_definitions(-DCYCLES_RUNTIME_OPTIX_ROOT_DIR="${CYCLES_RUNTIME_OPTIX_ROOT_DIR}")
|
add_definitions(-DCYCLES_RUNTIME_OPTIX_ROOT_DIR="${CYCLES_RUNTIME_OPTIX_ROOT_DIR}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
|
|
||||||
list(APPEND INC
|
|
||||||
../../../extern/hipew/include
|
|
||||||
)
|
|
||||||
add_definitions(-DWITH_HIP_DYNLOAD)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(SRC_BASE
|
set(SRC_BASE
|
||||||
device.cpp
|
device.cpp
|
||||||
denoise.cpp
|
denoise.cpp
|
||||||
@@ -168,24 +153,15 @@ if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
|
|||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_DEVICE_CUDA)
|
|
||||||
add_definitions(-DWITH_CUDA)
|
|
||||||
endif()
|
|
||||||
if(WITH_CYCLES_DEVICE_HIP)
|
|
||||||
add_definitions(-DWITH_HIP)
|
|
||||||
endif()
|
|
||||||
if(WITH_CYCLES_DEVICE_OPTIX)
|
|
||||||
add_definitions(-DWITH_OPTIX)
|
|
||||||
endif()
|
|
||||||
if(WITH_CYCLES_DEVICE_METAL)
|
if(WITH_CYCLES_DEVICE_METAL)
|
||||||
list(APPEND LIB
|
list(APPEND LIB
|
||||||
${METAL_LIBRARY}
|
${METAL_LIBRARY}
|
||||||
)
|
)
|
||||||
add_definitions(-DWITH_METAL)
|
|
||||||
list(APPEND SRC
|
list(APPEND SRC
|
||||||
${SRC_METAL}
|
${SRC_METAL}
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (WITH_CYCLES_DEVICE_ONEAPI)
|
if (WITH_CYCLES_DEVICE_ONEAPI)
|
||||||
if(WITH_CYCLES_ONEAPI_BINARIES)
|
if(WITH_CYCLES_ONEAPI_BINARIES)
|
||||||
set(cycles_kernel_oneapi_lib_suffix "_aot")
|
set(cycles_kernel_oneapi_lib_suffix "_aot")
|
||||||
@@ -203,7 +179,6 @@ if (WITH_CYCLES_DEVICE_ONEAPI)
|
|||||||
else()
|
else()
|
||||||
list(APPEND LIB ${SYCL_LIBRARY})
|
list(APPEND LIB ${SYCL_LIBRARY})
|
||||||
endif()
|
endif()
|
||||||
add_definitions(-DWITH_ONEAPI)
|
|
||||||
list(APPEND SRC
|
list(APPEND SRC
|
||||||
${SRC_ONEAPI}
|
${SRC_ONEAPI}
|
||||||
)
|
)
|
||||||
|
@@ -232,7 +232,7 @@ string CUDADevice::compile_kernel_get_common_cflags(const uint kernel_features)
|
|||||||
return cflags;
|
return cflags;
|
||||||
}
|
}
|
||||||
|
|
||||||
string CUDADevice::compile_kernel(const uint kernel_features,
|
string CUDADevice::compile_kernel(const string &common_cflags,
|
||||||
const char *name,
|
const char *name,
|
||||||
const char *base,
|
const char *base,
|
||||||
bool force_ptx)
|
bool force_ptx)
|
||||||
@@ -281,7 +281,6 @@ string CUDADevice::compile_kernel(const uint kernel_features,
|
|||||||
/* We include cflags into md5 so changing cuda toolkit or changing other
|
/* We include cflags into md5 so changing cuda toolkit or changing other
|
||||||
* compiler command line arguments makes sure cubin gets re-built.
|
* compiler command line arguments makes sure cubin gets re-built.
|
||||||
*/
|
*/
|
||||||
string common_cflags = compile_kernel_get_common_cflags(kernel_features);
|
|
||||||
const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
|
const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
|
||||||
|
|
||||||
const char *const kernel_ext = force_ptx ? "ptx" : "cubin";
|
const char *const kernel_ext = force_ptx ? "ptx" : "cubin";
|
||||||
@@ -417,7 +416,8 @@ bool CUDADevice::load_kernels(const uint kernel_features)
|
|||||||
|
|
||||||
/* get kernel */
|
/* get kernel */
|
||||||
const char *kernel_name = "kernel";
|
const char *kernel_name = "kernel";
|
||||||
string cubin = compile_kernel(kernel_features, kernel_name);
|
string cflags = compile_kernel_get_common_cflags(kernel_features);
|
||||||
|
string cubin = compile_kernel(cflags, kernel_name);
|
||||||
if (cubin.empty())
|
if (cubin.empty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@@ -77,9 +77,9 @@ class CUDADevice : public Device {
|
|||||||
|
|
||||||
bool use_adaptive_compilation();
|
bool use_adaptive_compilation();
|
||||||
|
|
||||||
virtual string compile_kernel_get_common_cflags(const uint kernel_features);
|
string compile_kernel_get_common_cflags(const uint kernel_features);
|
||||||
|
|
||||||
string compile_kernel(const uint kernel_features,
|
string compile_kernel(const string &cflags,
|
||||||
const char *name,
|
const char *name,
|
||||||
const char *base = "cuda",
|
const char *base = "cuda",
|
||||||
bool force_ptx = false);
|
bool force_ptx = false);
|
||||||
|
@@ -78,24 +78,4 @@ class DenoiseParams : public Node {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/* All the parameters needed to perform buffer denoising on a device.
|
|
||||||
* Is not really a task in its canonical terms (as in, is not an asynchronous running task). Is
|
|
||||||
* more like a wrapper for all the arguments and parameters needed to perform denoising. Is a
|
|
||||||
* single place where they are all listed, so that it's not required to modify all device methods
|
|
||||||
* when these parameters do change. */
|
|
||||||
class DeviceDenoiseTask {
|
|
||||||
public:
|
|
||||||
DenoiseParams params;
|
|
||||||
|
|
||||||
int num_samples;
|
|
||||||
|
|
||||||
RenderBuffers *render_buffers;
|
|
||||||
BufferParams buffer_params;
|
|
||||||
|
|
||||||
/* Allow to do in-place modification of the input passes (scaling them down i.e.). This will
|
|
||||||
* lower the memory footprint of the denoiser but will make input passes "invalid" (from path
|
|
||||||
* tracer) point of view. */
|
|
||||||
bool allow_inplace_modification;
|
|
||||||
};
|
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -160,6 +160,11 @@ class Device {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual bool load_osl_kernels()
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/* GPU device only functions.
|
/* GPU device only functions.
|
||||||
* These may not be used on CPU or multi-devices. */
|
* These may not be used on CPU or multi-devices. */
|
||||||
|
|
||||||
@@ -228,21 +233,6 @@ class Device {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Buffer denoising. */
|
|
||||||
|
|
||||||
/* Returns true if task is fully handled. */
|
|
||||||
virtual bool denoise_buffer(const DeviceDenoiseTask & /*task*/)
|
|
||||||
{
|
|
||||||
LOG(ERROR) << "Request buffer denoising from a device which does not support it.";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual DeviceQueue *get_denoise_queue()
|
|
||||||
{
|
|
||||||
LOG(ERROR) << "Request denoising queue from a device which does not support it.";
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Sub-devices */
|
/* Sub-devices */
|
||||||
|
|
||||||
/* Run given callback for every individual device which will be handling rendering.
|
/* Run given callback for every individual device which will be handling rendering.
|
||||||
|
@@ -74,7 +74,7 @@ class HIPDevice : public Device {
|
|||||||
|
|
||||||
bool use_adaptive_compilation();
|
bool use_adaptive_compilation();
|
||||||
|
|
||||||
virtual string compile_kernel_get_common_cflags(const uint kernel_features);
|
string compile_kernel_get_common_cflags(const uint kernel_features);
|
||||||
|
|
||||||
string compile_kernel(const uint kernel_features, const char *name, const char *base = "hip");
|
string compile_kernel(const uint kernel_features, const char *name, const char *base = "hip");
|
||||||
|
|
||||||
|
@@ -7,6 +7,30 @@
|
|||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
bool device_kernel_has_shading(DeviceKernel kernel)
|
||||||
|
{
|
||||||
|
return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND ||
|
||||||
|
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT ||
|
||||||
|
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE ||
|
||||||
|
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
|
||||||
|
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE ||
|
||||||
|
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME ||
|
||||||
|
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW ||
|
||||||
|
kernel == DEVICE_KERNEL_SHADER_EVAL_DISPLACE ||
|
||||||
|
kernel == DEVICE_KERNEL_SHADER_EVAL_BACKGROUND ||
|
||||||
|
kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool device_kernel_has_intersection(DeviceKernel kernel)
|
||||||
|
{
|
||||||
|
return (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||||
|
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
|
||||||
|
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
|
||||||
|
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
|
||||||
|
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
|
||||||
|
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE);
|
||||||
|
}
|
||||||
|
|
||||||
const char *device_kernel_as_string(DeviceKernel kernel)
|
const char *device_kernel_as_string(DeviceKernel kernel)
|
||||||
{
|
{
|
||||||
switch (kernel) {
|
switch (kernel) {
|
||||||
|
@@ -11,6 +11,9 @@
|
|||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
bool device_kernel_has_shading(DeviceKernel kernel);
|
||||||
|
bool device_kernel_has_intersection(DeviceKernel kernel);
|
||||||
|
|
||||||
const char *device_kernel_as_string(DeviceKernel kernel);
|
const char *device_kernel_as_string(DeviceKernel kernel);
|
||||||
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);
|
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);
|
||||||
|
|
||||||
|
@@ -496,7 +496,7 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress,
|
|||||||
num_motion_steps = pointcloud->get_motion_steps();
|
num_motion_steps = pointcloud->get_motion_steps();
|
||||||
}
|
}
|
||||||
|
|
||||||
const size_t num_aabbs = num_motion_steps;
|
const size_t num_aabbs = num_motion_steps * num_points;
|
||||||
|
|
||||||
MTLResourceOptions storage_mode;
|
MTLResourceOptions storage_mode;
|
||||||
if (device.hasUnifiedMemory) {
|
if (device.hasUnifiedMemory) {
|
||||||
@@ -757,6 +757,10 @@ bool BVHMetal::build_TLAS(Progress &progress,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (num_instances == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/*------------------------------------------------*/
|
/*------------------------------------------------*/
|
||||||
BVH_status("Building TLAS | %7d instances", (int)num_instances);
|
BVH_status("Building TLAS | %7d instances", (int)num_instances);
|
||||||
/*------------------------------------------------*/
|
/*------------------------------------------------*/
|
||||||
|
@@ -307,6 +307,9 @@ void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_feat
|
|||||||
MD5Hash md5;
|
MD5Hash md5;
|
||||||
md5.append(baked_constants);
|
md5.append(baked_constants);
|
||||||
md5.append(source);
|
md5.append(source);
|
||||||
|
if (use_metalrt) {
|
||||||
|
md5.append(std::to_string(kernel_features & METALRT_FEATURE_MASK));
|
||||||
|
}
|
||||||
source_md5[pso_type] = md5.get_hex();
|
source_md5[pso_type] = md5.get_hex();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -54,6 +54,10 @@ enum MetalPipelineType {
|
|||||||
PSO_NUM
|
PSO_NUM
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# define METALRT_FEATURE_MASK \
|
||||||
|
(KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD | \
|
||||||
|
KERNEL_FEATURE_OBJECT_MOTION)
|
||||||
|
|
||||||
const char *kernel_type_as_string(MetalPipelineType pso_type);
|
const char *kernel_type_as_string(MetalPipelineType pso_type);
|
||||||
|
|
||||||
struct MetalKernelPipeline {
|
struct MetalKernelPipeline {
|
||||||
@@ -67,9 +71,7 @@ struct MetalKernelPipeline {
|
|||||||
|
|
||||||
KernelData kernel_data_;
|
KernelData kernel_data_;
|
||||||
bool use_metalrt;
|
bool use_metalrt;
|
||||||
bool metalrt_hair;
|
uint32_t metalrt_features = 0;
|
||||||
bool metalrt_hair_thick;
|
|
||||||
bool metalrt_pointcloud;
|
|
||||||
|
|
||||||
int threads_per_threadgroup;
|
int threads_per_threadgroup;
|
||||||
|
|
||||||
|
@@ -274,12 +274,9 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
|
|||||||
|
|
||||||
/* metalrt options */
|
/* metalrt options */
|
||||||
request.pipeline->use_metalrt = device->use_metalrt;
|
request.pipeline->use_metalrt = device->use_metalrt;
|
||||||
request.pipeline->metalrt_hair = device->use_metalrt &&
|
request.pipeline->metalrt_features = device->use_metalrt ?
|
||||||
(device->kernel_features & KERNEL_FEATURE_HAIR);
|
(device->kernel_features & METALRT_FEATURE_MASK) :
|
||||||
request.pipeline->metalrt_hair_thick = device->use_metalrt &&
|
0;
|
||||||
(device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
|
|
||||||
request.pipeline->metalrt_pointcloud = device->use_metalrt &&
|
|
||||||
(device->kernel_features & KERNEL_FEATURE_POINTCLOUD);
|
|
||||||
|
|
||||||
{
|
{
|
||||||
thread_scoped_lock lock(cache_mutex);
|
thread_scoped_lock lock(cache_mutex);
|
||||||
@@ -316,9 +313,13 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
|
|||||||
|
|
||||||
/* metalrt options */
|
/* metalrt options */
|
||||||
bool use_metalrt = device->use_metalrt;
|
bool use_metalrt = device->use_metalrt;
|
||||||
bool metalrt_hair = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR);
|
bool device_metalrt_hair = use_metalrt && device->kernel_features & KERNEL_FEATURE_HAIR;
|
||||||
bool metalrt_hair_thick = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR_THICK);
|
bool device_metalrt_hair_thick = use_metalrt &&
|
||||||
bool metalrt_pointcloud = use_metalrt && (device->kernel_features & KERNEL_FEATURE_POINTCLOUD);
|
device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
|
||||||
|
bool device_metalrt_pointcloud = use_metalrt &&
|
||||||
|
device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
|
||||||
|
bool device_metalrt_motion = use_metalrt &&
|
||||||
|
device->kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
|
||||||
|
|
||||||
MetalKernelPipeline *best_pipeline = nullptr;
|
MetalKernelPipeline *best_pipeline = nullptr;
|
||||||
for (auto &pipeline : collection) {
|
for (auto &pipeline : collection) {
|
||||||
@@ -327,9 +328,16 @@ MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const M
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pipeline->use_metalrt != use_metalrt || pipeline->metalrt_hair != metalrt_hair ||
|
bool pipeline_metalrt_hair = pipeline->metalrt_features & KERNEL_FEATURE_HAIR;
|
||||||
pipeline->metalrt_hair_thick != metalrt_hair_thick ||
|
bool pipeline_metalrt_hair_thick = pipeline->metalrt_features & KERNEL_FEATURE_HAIR_THICK;
|
||||||
pipeline->metalrt_pointcloud != metalrt_pointcloud) {
|
bool pipeline_metalrt_pointcloud = pipeline->metalrt_features & KERNEL_FEATURE_POINTCLOUD;
|
||||||
|
bool pipeline_metalrt_motion = use_metalrt &&
|
||||||
|
pipeline->metalrt_features & KERNEL_FEATURE_OBJECT_MOTION;
|
||||||
|
|
||||||
|
if (pipeline->use_metalrt != use_metalrt || pipeline_metalrt_hair != device_metalrt_hair ||
|
||||||
|
pipeline_metalrt_hair_thick != device_metalrt_hair_thick ||
|
||||||
|
pipeline_metalrt_pointcloud != device_metalrt_pointcloud ||
|
||||||
|
pipeline_metalrt_motion != device_metalrt_motion) {
|
||||||
/* wrong combination of metalrt options */
|
/* wrong combination of metalrt options */
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -400,6 +408,8 @@ static MTLFunctionConstantValues *GetConstantValues(KernelData const *data = nul
|
|||||||
if (!data) {
|
if (!data) {
|
||||||
data = &zero_data;
|
data = &zero_data;
|
||||||
}
|
}
|
||||||
|
int zero_int = 0;
|
||||||
|
[constant_values setConstantValue:&zero_int type:MTLDataType_int atIndex:Kernel_DummyConstant];
|
||||||
|
|
||||||
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
|
# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
|
||||||
[constant_values setConstantValue:&data->parent.name \
|
[constant_values setConstantValue:&data->parent.name \
|
||||||
@@ -423,10 +433,7 @@ void MetalKernelPipeline::compile()
|
|||||||
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
|
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
|
||||||
func_desc.name = entryPoint;
|
func_desc.name = entryPoint;
|
||||||
|
|
||||||
if (pso_type == PSO_SPECIALIZED_SHADE) {
|
if (pso_type != PSO_GENERIC) {
|
||||||
func_desc.constantValues = GetConstantValues(&kernel_data_);
|
|
||||||
}
|
|
||||||
else if (pso_type == PSO_SPECIALIZED_INTERSECT) {
|
|
||||||
func_desc.constantValues = GetConstantValues(&kernel_data_);
|
func_desc.constantValues = GetConstantValues(&kernel_data_);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -471,6 +478,13 @@ void MetalKernelPipeline::compile()
|
|||||||
const char *function_name = function_names[i];
|
const char *function_name = function_names[i];
|
||||||
desc.name = [@(function_name) copy];
|
desc.name = [@(function_name) copy];
|
||||||
|
|
||||||
|
if (pso_type != PSO_GENERIC) {
|
||||||
|
desc.constantValues = GetConstantValues(&kernel_data_);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
desc.constantValues = GetConstantValues();
|
||||||
|
}
|
||||||
|
|
||||||
NSError *error = NULL;
|
NSError *error = NULL;
|
||||||
rt_intersection_function[i] = [mtlLibrary newFunctionWithDescriptor:desc error:&error];
|
rt_intersection_function[i] = [mtlLibrary newFunctionWithDescriptor:desc error:&error];
|
||||||
|
|
||||||
@@ -491,6 +505,10 @@ void MetalKernelPipeline::compile()
|
|||||||
NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
|
NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
|
||||||
NSArray *linked_functions = nil;
|
NSArray *linked_functions = nil;
|
||||||
|
|
||||||
|
bool metalrt_hair = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR);
|
||||||
|
bool metalrt_hair_thick = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR_THICK);
|
||||||
|
bool metalrt_pointcloud = use_metalrt && (metalrt_features & KERNEL_FEATURE_POINTCLOUD);
|
||||||
|
|
||||||
if (use_metalrt) {
|
if (use_metalrt) {
|
||||||
id<MTLFunction> curve_intersect_default = nil;
|
id<MTLFunction> curve_intersect_default = nil;
|
||||||
id<MTLFunction> curve_intersect_shadow = nil;
|
id<MTLFunction> curve_intersect_shadow = nil;
|
||||||
@@ -618,7 +636,9 @@ void MetalKernelPipeline::compile()
|
|||||||
metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
|
metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
|
||||||
path_create_directories(metalbin_path);
|
path_create_directories(metalbin_path);
|
||||||
|
|
||||||
if (path_exists(metalbin_path) && use_binary_archive) {
|
/* Retrieve shader binary from disk, and update the file timestamp for LRU purging to work as
|
||||||
|
* intended. */
|
||||||
|
if (use_binary_archive && path_cache_kernel_exists_and_mark_used(metalbin_path)) {
|
||||||
if (@available(macOS 11.0, *)) {
|
if (@available(macOS 11.0, *)) {
|
||||||
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
|
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
|
||||||
archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
|
archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
|
||||||
@@ -695,6 +715,9 @@ void MetalKernelPipeline::compile()
|
|||||||
metal_printf("Failed to save binary archive, error:\n%s\n",
|
metal_printf("Failed to save binary archive, error:\n%s\n",
|
||||||
[[error localizedDescription] UTF8String]);
|
[[error localizedDescription] UTF8String]);
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
path_cache_kernel_mark_added_and_clear_old(metalbin_path);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -730,7 +753,8 @@ void MetalKernelPipeline::compile()
|
|||||||
newIntersectionFunctionTableWithDescriptor:ift_desc];
|
newIntersectionFunctionTableWithDescriptor:ift_desc];
|
||||||
|
|
||||||
/* Finally write the function handles into this pipeline's table */
|
/* Finally write the function handles into this pipeline's table */
|
||||||
for (int i = 0; i < 2; i++) {
|
int size = (int)[table_functions[table] count];
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
id<MTLFunctionHandle> handle = [pipeline
|
id<MTLFunctionHandle> handle = [pipeline
|
||||||
functionHandleWithFunction:table_functions[table][i]];
|
functionHandleWithFunction:table_functions[table][i]];
|
||||||
[intersection_func_table[table] setFunction:handle atIndex:i];
|
[intersection_func_table[table] setFunction:handle atIndex:i];
|
||||||
|
@@ -138,6 +138,15 @@ class MultiDevice : public Device {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool load_osl_kernels() override
|
||||||
|
{
|
||||||
|
foreach (SubDevice &sub, devices)
|
||||||
|
if (!sub.device->load_osl_kernels())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void build_bvh(BVH *bvh, Progress &progress, bool refit) override
|
void build_bvh(BVH *bvh, Progress &progress, bool refit) override
|
||||||
{
|
{
|
||||||
/* Try to build and share a single acceleration structure, if possible */
|
/* Try to build and share a single acceleration structure, if possible */
|
||||||
@@ -204,10 +213,12 @@ class MultiDevice : public Device {
|
|||||||
|
|
||||||
virtual void *get_cpu_osl_memory() override
|
virtual void *get_cpu_osl_memory() override
|
||||||
{
|
{
|
||||||
if (devices.size() > 1) {
|
/* Always return the OSL memory of the CPU device (this works since the constructor above
|
||||||
|
* guarantees that CPU devices are always added to the back). */
|
||||||
|
if (devices.size() > 1 && devices.back().device->info.type != DEVICE_CPU) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return devices.front().device->get_cpu_osl_memory();
|
return devices.back().device->get_cpu_osl_memory();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_resident(device_ptr key, Device *sub_device) override
|
bool is_resident(device_ptr key, Device *sub_device) override
|
||||||
|
@@ -31,6 +31,8 @@ bool device_oneapi_init()
|
|||||||
* improves stability as of intel/LLVM SYCL-nightly/20220529.
|
* improves stability as of intel/LLVM SYCL-nightly/20220529.
|
||||||
* All these env variable can be set beforehand by end-users and
|
* All these env variable can be set beforehand by end-users and
|
||||||
* will in that case -not- be overwritten. */
|
* will in that case -not- be overwritten. */
|
||||||
|
/* By default, enable only Level-Zero and if all devices are allowed, also CUDA and HIP.
|
||||||
|
* OpenCL backend isn't currently well supported. */
|
||||||
# ifdef _WIN32
|
# ifdef _WIN32
|
||||||
if (getenv("SYCL_CACHE_PERSISTENT") == nullptr) {
|
if (getenv("SYCL_CACHE_PERSISTENT") == nullptr) {
|
||||||
_putenv_s("SYCL_CACHE_PERSISTENT", "1");
|
_putenv_s("SYCL_CACHE_PERSISTENT", "1");
|
||||||
@@ -39,7 +41,12 @@ bool device_oneapi_init()
|
|||||||
_putenv_s("SYCL_CACHE_THRESHOLD", "0");
|
_putenv_s("SYCL_CACHE_THRESHOLD", "0");
|
||||||
}
|
}
|
||||||
if (getenv("SYCL_DEVICE_FILTER") == nullptr) {
|
if (getenv("SYCL_DEVICE_FILTER") == nullptr) {
|
||||||
_putenv_s("SYCL_DEVICE_FILTER", "level_zero");
|
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
|
||||||
|
_putenv_s("SYCL_DEVICE_FILTER", "level_zero");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
_putenv_s("SYCL_DEVICE_FILTER", "level_zero,cuda,hip");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (getenv("SYCL_ENABLE_PCI") == nullptr) {
|
if (getenv("SYCL_ENABLE_PCI") == nullptr) {
|
||||||
_putenv_s("SYCL_ENABLE_PCI", "1");
|
_putenv_s("SYCL_ENABLE_PCI", "1");
|
||||||
@@ -50,7 +57,12 @@ bool device_oneapi_init()
|
|||||||
# elif __linux__
|
# elif __linux__
|
||||||
setenv("SYCL_CACHE_PERSISTENT", "1", false);
|
setenv("SYCL_CACHE_PERSISTENT", "1", false);
|
||||||
setenv("SYCL_CACHE_THRESHOLD", "0", false);
|
setenv("SYCL_CACHE_THRESHOLD", "0", false);
|
||||||
setenv("SYCL_DEVICE_FILTER", "level_zero", false);
|
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
|
||||||
|
setenv("SYCL_DEVICE_FILTER", "level_zero", false);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
setenv("SYCL_DEVICE_FILTER", "level_zero,cuda,hip", false);
|
||||||
|
}
|
||||||
setenv("SYCL_ENABLE_PCI", "1", false);
|
setenv("SYCL_ENABLE_PCI", "1", false);
|
||||||
setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
|
setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
|
||||||
# endif
|
# endif
|
||||||
|
@@ -430,9 +430,9 @@ void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_
|
|||||||
sycl::usm::alloc usm_type = get_pointer_type(usm_ptr, queue->get_context());
|
sycl::usm::alloc usm_type = get_pointer_type(usm_ptr, queue->get_context());
|
||||||
(void)usm_type;
|
(void)usm_type;
|
||||||
assert(usm_type == sycl::usm::alloc::device ||
|
assert(usm_type == sycl::usm::alloc::device ||
|
||||||
((device_type == sycl::info::device_type::cpu || allow_host) &&
|
(usm_type == sycl::usm::alloc::host &&
|
||||||
usm_type == sycl::usm::alloc::host ||
|
(allow_host || device_type == sycl::info::device_type::cpu)) ||
|
||||||
usm_type == sycl::usm::alloc::unknown));
|
usm_type == sycl::usm::alloc::unknown);
|
||||||
# else
|
# else
|
||||||
/* Silence warning about unused arguments. */
|
/* Silence warning about unused arguments. */
|
||||||
(void)queue_;
|
(void)queue_;
|
||||||
|
@@ -9,6 +9,10 @@
|
|||||||
|
|
||||||
#include "util/log.h"
|
#include "util/log.h"
|
||||||
|
|
||||||
|
#ifdef WITH_OSL
|
||||||
|
# include <OSL/oslversion.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef WITH_OPTIX
|
#ifdef WITH_OPTIX
|
||||||
# include <optix_function_table_definition.h>
|
# include <optix_function_table_definition.h>
|
||||||
#endif
|
#endif
|
||||||
@@ -65,6 +69,9 @@ void device_optix_info(const vector<DeviceInfo> &cuda_devices, vector<DeviceInfo
|
|||||||
|
|
||||||
info.type = DEVICE_OPTIX;
|
info.type = DEVICE_OPTIX;
|
||||||
info.id += "_OptiX";
|
info.id += "_OptiX";
|
||||||
|
# if defined(WITH_OSL) && (OSL_VERSION_MINOR >= 13 || OSL_VERSION_MAJOR > 1)
|
||||||
|
info.has_osl = true;
|
||||||
|
# endif
|
||||||
info.denoisers |= DENOISER_OPTIX;
|
info.denoisers |= DENOISER_OPTIX;
|
||||||
|
|
||||||
devices.push_back(info);
|
devices.push_back(info);
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,16 +1,14 @@
|
|||||||
/* SPDX-License-Identifier: Apache-2.0
|
/* SPDX-License-Identifier: Apache-2.0
|
||||||
* Copyright 2019, NVIDIA Corporation.
|
* Copyright 2019, NVIDIA Corporation
|
||||||
* Copyright 2019-2022 Blender Foundation. */
|
* Copyright 2019-2022 Blender Foundation */
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#ifdef WITH_OPTIX
|
#ifdef WITH_OPTIX
|
||||||
|
|
||||||
# include "device/cuda/device_impl.h"
|
# include "device/cuda/device_impl.h"
|
||||||
# include "device/optix/queue.h"
|
|
||||||
# include "device/optix/util.h"
|
# include "device/optix/util.h"
|
||||||
# include "kernel/types.h"
|
# include "kernel/osl/globals.h"
|
||||||
# include "util/unique_ptr.h"
|
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
@@ -23,8 +21,16 @@ enum {
|
|||||||
PG_RGEN_INTERSECT_SHADOW,
|
PG_RGEN_INTERSECT_SHADOW,
|
||||||
PG_RGEN_INTERSECT_SUBSURFACE,
|
PG_RGEN_INTERSECT_SUBSURFACE,
|
||||||
PG_RGEN_INTERSECT_VOLUME_STACK,
|
PG_RGEN_INTERSECT_VOLUME_STACK,
|
||||||
|
PG_RGEN_SHADE_BACKGROUND,
|
||||||
|
PG_RGEN_SHADE_LIGHT,
|
||||||
|
PG_RGEN_SHADE_SURFACE,
|
||||||
PG_RGEN_SHADE_SURFACE_RAYTRACE,
|
PG_RGEN_SHADE_SURFACE_RAYTRACE,
|
||||||
PG_RGEN_SHADE_SURFACE_MNEE,
|
PG_RGEN_SHADE_SURFACE_MNEE,
|
||||||
|
PG_RGEN_SHADE_VOLUME,
|
||||||
|
PG_RGEN_SHADE_SHADOW,
|
||||||
|
PG_RGEN_EVAL_DISPLACE,
|
||||||
|
PG_RGEN_EVAL_BACKGROUND,
|
||||||
|
PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY,
|
||||||
PG_MISS,
|
PG_MISS,
|
||||||
PG_HITD, /* Default hit group. */
|
PG_HITD, /* Default hit group. */
|
||||||
PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */
|
PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */
|
||||||
@@ -40,14 +46,14 @@ enum {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static const int MISS_PROGRAM_GROUP_OFFSET = PG_MISS;
|
static const int MISS_PROGRAM_GROUP_OFFSET = PG_MISS;
|
||||||
static const int NUM_MIS_PROGRAM_GROUPS = 1;
|
static const int NUM_MISS_PROGRAM_GROUPS = 1;
|
||||||
static const int HIT_PROGAM_GROUP_OFFSET = PG_HITD;
|
static const int HIT_PROGAM_GROUP_OFFSET = PG_HITD;
|
||||||
static const int NUM_HIT_PROGRAM_GROUPS = 8;
|
static const int NUM_HIT_PROGRAM_GROUPS = 8;
|
||||||
static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
|
static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
|
||||||
static const int NUM_CALLABLE_PROGRAM_GROUPS = 2;
|
static const int NUM_CALLABLE_PROGRAM_GROUPS = 2;
|
||||||
|
|
||||||
/* List of OptiX pipelines. */
|
/* List of OptiX pipelines. */
|
||||||
enum { PIP_SHADE_RAYTRACE, PIP_SHADE_MNEE, PIP_INTERSECT, NUM_PIPELINES };
|
enum { PIP_SHADE, PIP_INTERSECT, NUM_PIPELINES };
|
||||||
|
|
||||||
/* A single shader binding table entry. */
|
/* A single shader binding table entry. */
|
||||||
struct SbtRecord {
|
struct SbtRecord {
|
||||||
@@ -61,52 +67,35 @@ class OptiXDevice : public CUDADevice {
|
|||||||
OptixModule optix_module = NULL; /* All necessary OptiX kernels are in one module. */
|
OptixModule optix_module = NULL; /* All necessary OptiX kernels are in one module. */
|
||||||
OptixModule builtin_modules[2] = {};
|
OptixModule builtin_modules[2] = {};
|
||||||
OptixPipeline pipelines[NUM_PIPELINES] = {};
|
OptixPipeline pipelines[NUM_PIPELINES] = {};
|
||||||
|
OptixProgramGroup groups[NUM_PROGRAM_GROUPS] = {};
|
||||||
|
OptixPipelineCompileOptions pipeline_options = {};
|
||||||
|
|
||||||
bool motion_blur = false;
|
|
||||||
device_vector<SbtRecord> sbt_data;
|
device_vector<SbtRecord> sbt_data;
|
||||||
device_only_memory<KernelParamsOptiX> launch_params;
|
device_only_memory<KernelParamsOptiX> launch_params;
|
||||||
OptixTraversableHandle tlas_handle = 0;
|
|
||||||
|
|
||||||
|
# ifdef WITH_OSL
|
||||||
|
OSLGlobals osl_globals;
|
||||||
|
vector<OptixModule> osl_modules;
|
||||||
|
vector<OptixProgramGroup> osl_groups;
|
||||||
|
# endif
|
||||||
|
|
||||||
|
private:
|
||||||
|
OptixTraversableHandle tlas_handle = 0;
|
||||||
vector<unique_ptr<device_only_memory<char>>> delayed_free_bvh_memory;
|
vector<unique_ptr<device_only_memory<char>>> delayed_free_bvh_memory;
|
||||||
thread_mutex delayed_free_bvh_mutex;
|
thread_mutex delayed_free_bvh_mutex;
|
||||||
|
|
||||||
class Denoiser {
|
|
||||||
public:
|
|
||||||
explicit Denoiser(OptiXDevice *device);
|
|
||||||
|
|
||||||
OptiXDevice *device;
|
|
||||||
OptiXDeviceQueue queue;
|
|
||||||
|
|
||||||
OptixDenoiser optix_denoiser = nullptr;
|
|
||||||
|
|
||||||
/* Configuration size, as provided to `optixDenoiserSetup`.
|
|
||||||
* If the `optixDenoiserSetup()` was never used on the current `optix_denoiser` the
|
|
||||||
* `is_configured` will be false. */
|
|
||||||
bool is_configured = false;
|
|
||||||
int2 configured_size = make_int2(0, 0);
|
|
||||||
|
|
||||||
/* OptiX denoiser state and scratch buffers, stored in a single memory buffer.
|
|
||||||
* The memory layout goes as following: [denoiser state][scratch buffer]. */
|
|
||||||
device_only_memory<unsigned char> state;
|
|
||||||
OptixDenoiserSizes sizes = {};
|
|
||||||
|
|
||||||
bool use_pass_albedo = false;
|
|
||||||
bool use_pass_normal = false;
|
|
||||||
bool use_pass_flow = false;
|
|
||||||
};
|
|
||||||
Denoiser denoiser_;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||||
~OptiXDevice();
|
~OptiXDevice();
|
||||||
|
|
||||||
private:
|
|
||||||
BVHLayoutMask get_bvh_layout_mask() const override;
|
BVHLayoutMask get_bvh_layout_mask() const override;
|
||||||
|
|
||||||
string compile_kernel_get_common_cflags(const uint kernel_features) override;
|
string compile_kernel_get_common_cflags(const uint kernel_features);
|
||||||
|
|
||||||
bool load_kernels(const uint kernel_features) override;
|
bool load_kernels(const uint kernel_features) override;
|
||||||
|
|
||||||
|
bool load_osl_kernels() override;
|
||||||
|
|
||||||
bool build_optix_bvh(BVHOptiX *bvh,
|
bool build_optix_bvh(BVHOptiX *bvh,
|
||||||
OptixBuildOperation operation,
|
OptixBuildOperation operation,
|
||||||
const OptixBuildInput &build_input,
|
const OptixBuildInput &build_input,
|
||||||
@@ -123,52 +112,7 @@ class OptiXDevice : public CUDADevice {
|
|||||||
|
|
||||||
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
|
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
|
||||||
|
|
||||||
/* --------------------------------------------------------------------
|
void *get_cpu_osl_memory() override;
|
||||||
* Denoising.
|
|
||||||
*/
|
|
||||||
|
|
||||||
class DenoiseContext;
|
|
||||||
class DenoisePass;
|
|
||||||
|
|
||||||
virtual bool denoise_buffer(const DeviceDenoiseTask &task) override;
|
|
||||||
virtual DeviceQueue *get_denoise_queue() override;
|
|
||||||
|
|
||||||
/* Read guiding passes from the render buffers, preprocess them in a way which is expected by
|
|
||||||
* OptiX and store in the guiding passes memory within the given context.
|
|
||||||
*
|
|
||||||
* Pre=-processing of the guiding passes is to only happen once per context lifetime. DO not
|
|
||||||
* preprocess them for every pass which is being denoised. */
|
|
||||||
bool denoise_filter_guiding_preprocess(DenoiseContext &context);
|
|
||||||
|
|
||||||
/* Set fake albedo pixels in the albedo guiding pass storage.
|
|
||||||
* After this point only passes which do not need albedo for denoising can be processed. */
|
|
||||||
bool denoise_filter_guiding_set_fake_albedo(DenoiseContext &context);
|
|
||||||
|
|
||||||
void denoise_pass(DenoiseContext &context, PassType pass_type);
|
|
||||||
|
|
||||||
/* Read input color pass from the render buffer into the memory which corresponds to the noisy
|
|
||||||
* input within the given context. Pixels are scaled to the number of samples, but are not
|
|
||||||
* preprocessed yet. */
|
|
||||||
void denoise_color_read(DenoiseContext &context, const DenoisePass &pass);
|
|
||||||
|
|
||||||
/* Run corresponding filter kernels, preparing data for the denoiser or copying data from the
|
|
||||||
* denoiser result to the render buffer. */
|
|
||||||
bool denoise_filter_color_preprocess(DenoiseContext &context, const DenoisePass &pass);
|
|
||||||
bool denoise_filter_color_postprocess(DenoiseContext &context, const DenoisePass &pass);
|
|
||||||
|
|
||||||
/* Make sure the OptiX denoiser is created and configured. */
|
|
||||||
bool denoise_ensure(DenoiseContext &context);
|
|
||||||
|
|
||||||
/* Create OptiX denoiser descriptor if needed.
|
|
||||||
* Will do nothing if the current OptiX descriptor is usable for the given parameters.
|
|
||||||
* If the OptiX denoiser descriptor did re-allocate here it is left unconfigured. */
|
|
||||||
bool denoise_create_if_needed(DenoiseContext &context);
|
|
||||||
|
|
||||||
/* Configure existing OptiX denoiser descriptor for the use for the given task. */
|
|
||||||
bool denoise_configure_if_needed(DenoiseContext &context);
|
|
||||||
|
|
||||||
/* Run configured denoiser. */
|
|
||||||
bool denoise_run(DenoiseContext &context, const DenoisePass &pass);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -24,21 +24,33 @@ void OptiXDeviceQueue::init_execution()
|
|||||||
CUDADeviceQueue::init_execution();
|
CUDADeviceQueue::init_execution();
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_optix_specific_kernel(DeviceKernel kernel)
|
static bool is_optix_specific_kernel(DeviceKernel kernel, bool use_osl)
|
||||||
{
|
{
|
||||||
return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
|
# ifdef WITH_OSL
|
||||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE ||
|
/* OSL uses direct callables to execute, so shading needs to be done in OptiX if OSL is used. */
|
||||||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
if (use_osl && device_kernel_has_shading(kernel)) {
|
||||||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
|
return true;
|
||||||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
|
}
|
||||||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
# else
|
||||||
|
(void)use_osl;
|
||||||
|
# endif
|
||||||
|
|
||||||
|
return device_kernel_has_intersection(kernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
||||||
const int work_size,
|
const int work_size,
|
||||||
DeviceKernelArguments const &args)
|
DeviceKernelArguments const &args)
|
||||||
{
|
{
|
||||||
if (!is_optix_specific_kernel(kernel)) {
|
OptiXDevice *const optix_device = static_cast<OptiXDevice *>(cuda_device_);
|
||||||
|
|
||||||
|
# ifdef WITH_OSL
|
||||||
|
const bool use_osl = static_cast<OSLGlobals *>(optix_device->get_cpu_osl_memory())->use;
|
||||||
|
# else
|
||||||
|
const bool use_osl = false;
|
||||||
|
# endif
|
||||||
|
|
||||||
|
if (!is_optix_specific_kernel(kernel, use_osl)) {
|
||||||
return CUDADeviceQueue::enqueue(kernel, work_size, args);
|
return CUDADeviceQueue::enqueue(kernel, work_size, args);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -50,8 +62,6 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
|||||||
|
|
||||||
const CUDAContextScope scope(cuda_device_);
|
const CUDAContextScope scope(cuda_device_);
|
||||||
|
|
||||||
OptiXDevice *const optix_device = static_cast<OptiXDevice *>(cuda_device_);
|
|
||||||
|
|
||||||
const device_ptr sbt_data_ptr = optix_device->sbt_data.device_pointer;
|
const device_ptr sbt_data_ptr = optix_device->sbt_data.device_pointer;
|
||||||
const device_ptr launch_params_ptr = optix_device->launch_params.device_pointer;
|
const device_ptr launch_params_ptr = optix_device->launch_params.device_pointer;
|
||||||
|
|
||||||
@@ -62,9 +72,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
|||||||
sizeof(device_ptr),
|
sizeof(device_ptr),
|
||||||
cuda_stream_));
|
cuda_stream_));
|
||||||
|
|
||||||
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || device_kernel_has_shading(kernel)) {
|
||||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
|
|
||||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) {
|
|
||||||
cuda_device_assert(
|
cuda_device_assert(
|
||||||
cuda_device_,
|
cuda_device_,
|
||||||
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
|
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
|
||||||
@@ -72,6 +80,15 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
|||||||
sizeof(device_ptr),
|
sizeof(device_ptr),
|
||||||
cuda_stream_));
|
cuda_stream_));
|
||||||
}
|
}
|
||||||
|
if (kernel == DEVICE_KERNEL_SHADER_EVAL_DISPLACE ||
|
||||||
|
kernel == DEVICE_KERNEL_SHADER_EVAL_BACKGROUND ||
|
||||||
|
kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY) {
|
||||||
|
cuda_device_assert(cuda_device_,
|
||||||
|
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, offset),
|
||||||
|
args.values[2], // &d_offset
|
||||||
|
sizeof(int32_t),
|
||||||
|
cuda_stream_));
|
||||||
|
}
|
||||||
|
|
||||||
cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_));
|
cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_));
|
||||||
|
|
||||||
@@ -79,14 +96,35 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
|||||||
OptixShaderBindingTable sbt_params = {};
|
OptixShaderBindingTable sbt_params = {};
|
||||||
|
|
||||||
switch (kernel) {
|
switch (kernel) {
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
|
||||||
|
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||||
|
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_BACKGROUND * sizeof(SbtRecord);
|
||||||
|
break;
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
|
||||||
|
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||||
|
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_LIGHT * sizeof(SbtRecord);
|
||||||
|
break;
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
|
||||||
|
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||||
|
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE * sizeof(SbtRecord);
|
||||||
|
break;
|
||||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
|
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
|
||||||
pipeline = optix_device->pipelines[PIP_SHADE_RAYTRACE];
|
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord);
|
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord);
|
||||||
break;
|
break;
|
||||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
|
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
|
||||||
pipeline = optix_device->pipelines[PIP_SHADE_MNEE];
|
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord);
|
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord);
|
||||||
break;
|
break;
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
|
||||||
|
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||||
|
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_VOLUME * sizeof(SbtRecord);
|
||||||
|
break;
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
|
||||||
|
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||||
|
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SHADOW * sizeof(SbtRecord);
|
||||||
|
break;
|
||||||
|
|
||||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
|
||||||
pipeline = optix_device->pipelines[PIP_INTERSECT];
|
pipeline = optix_device->pipelines[PIP_INTERSECT];
|
||||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord);
|
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord);
|
||||||
@@ -104,6 +142,20 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
|||||||
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_VOLUME_STACK * sizeof(SbtRecord);
|
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_VOLUME_STACK * sizeof(SbtRecord);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case DEVICE_KERNEL_SHADER_EVAL_DISPLACE:
|
||||||
|
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||||
|
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_DISPLACE * sizeof(SbtRecord);
|
||||||
|
break;
|
||||||
|
case DEVICE_KERNEL_SHADER_EVAL_BACKGROUND:
|
||||||
|
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||||
|
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_BACKGROUND * sizeof(SbtRecord);
|
||||||
|
break;
|
||||||
|
case DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
|
||||||
|
pipeline = optix_device->pipelines[PIP_SHADE];
|
||||||
|
sbt_params.raygenRecord = sbt_data_ptr +
|
||||||
|
PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY * sizeof(SbtRecord);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
LOG(ERROR) << "Invalid kernel " << device_kernel_as_string(kernel)
|
LOG(ERROR) << "Invalid kernel " << device_kernel_as_string(kernel)
|
||||||
<< " is attempted to be enqueued.";
|
<< " is attempted to be enqueued.";
|
||||||
@@ -112,7 +164,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
|||||||
|
|
||||||
sbt_params.missRecordBase = sbt_data_ptr + MISS_PROGRAM_GROUP_OFFSET * sizeof(SbtRecord);
|
sbt_params.missRecordBase = sbt_data_ptr + MISS_PROGRAM_GROUP_OFFSET * sizeof(SbtRecord);
|
||||||
sbt_params.missRecordStrideInBytes = sizeof(SbtRecord);
|
sbt_params.missRecordStrideInBytes = sizeof(SbtRecord);
|
||||||
sbt_params.missRecordCount = NUM_MIS_PROGRAM_GROUPS;
|
sbt_params.missRecordCount = NUM_MISS_PROGRAM_GROUPS;
|
||||||
sbt_params.hitgroupRecordBase = sbt_data_ptr + HIT_PROGAM_GROUP_OFFSET * sizeof(SbtRecord);
|
sbt_params.hitgroupRecordBase = sbt_data_ptr + HIT_PROGAM_GROUP_OFFSET * sizeof(SbtRecord);
|
||||||
sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
|
sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
|
||||||
sbt_params.hitgroupRecordCount = NUM_HIT_PROGRAM_GROUPS;
|
sbt_params.hitgroupRecordCount = NUM_HIT_PROGRAM_GROUPS;
|
||||||
@@ -120,6 +172,12 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
|||||||
sbt_params.callablesRecordCount = NUM_CALLABLE_PROGRAM_GROUPS;
|
sbt_params.callablesRecordCount = NUM_CALLABLE_PROGRAM_GROUPS;
|
||||||
sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord);
|
sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord);
|
||||||
|
|
||||||
|
# ifdef WITH_OSL
|
||||||
|
if (use_osl) {
|
||||||
|
sbt_params.callablesRecordCount += static_cast<unsigned int>(optix_device->osl_groups.size());
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
/* Launch the ray generation program. */
|
/* Launch the ray generation program. */
|
||||||
optix_device_assert(optix_device,
|
optix_device_assert(optix_device,
|
||||||
optixLaunch(pipeline,
|
optixLaunch(pipeline,
|
||||||
|
@@ -66,7 +66,9 @@ struct SocketType {
|
|||||||
LINK_NORMAL = (1 << 8),
|
LINK_NORMAL = (1 << 8),
|
||||||
LINK_POSITION = (1 << 9),
|
LINK_POSITION = (1 << 9),
|
||||||
LINK_TANGENT = (1 << 10),
|
LINK_TANGENT = (1 << 10),
|
||||||
DEFAULT_LINK_MASK = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 10)
|
LINK_OSL_INITIALIZER = (1 << 11),
|
||||||
|
DEFAULT_LINK_MASK = (1 << 4) | (1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) |
|
||||||
|
(1 << 10) | (1 << 11)
|
||||||
};
|
};
|
||||||
|
|
||||||
ustring name;
|
ustring name;
|
||||||
|
@@ -8,7 +8,7 @@ set(INC
|
|||||||
set(SRC
|
set(SRC
|
||||||
adaptive_sampling.cpp
|
adaptive_sampling.cpp
|
||||||
denoiser.cpp
|
denoiser.cpp
|
||||||
denoiser_device.cpp
|
denoiser_gpu.cpp
|
||||||
denoiser_oidn.cpp
|
denoiser_oidn.cpp
|
||||||
denoiser_optix.cpp
|
denoiser_optix.cpp
|
||||||
path_trace.cpp
|
path_trace.cpp
|
||||||
@@ -30,7 +30,7 @@ set(SRC
|
|||||||
set(SRC_HEADERS
|
set(SRC_HEADERS
|
||||||
adaptive_sampling.h
|
adaptive_sampling.h
|
||||||
denoiser.h
|
denoiser.h
|
||||||
denoiser_device.h
|
denoiser_gpu.h
|
||||||
denoiser_oidn.h
|
denoiser_oidn.h
|
||||||
denoiser_optix.h
|
denoiser_optix.h
|
||||||
path_trace.h
|
path_trace.h
|
||||||
|
@@ -16,9 +16,11 @@ unique_ptr<Denoiser> Denoiser::create(Device *path_trace_device, const DenoisePa
|
|||||||
{
|
{
|
||||||
DCHECK(params.use);
|
DCHECK(params.use);
|
||||||
|
|
||||||
|
#ifdef WITH_OPTIX
|
||||||
if (params.type == DENOISER_OPTIX && Device::available_devices(DEVICE_MASK_OPTIX).size()) {
|
if (params.type == DENOISER_OPTIX && Device::available_devices(DEVICE_MASK_OPTIX).size()) {
|
||||||
return make_unique<OptiXDenoiser>(path_trace_device, params);
|
return make_unique<OptiXDenoiser>(path_trace_device, params);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Always fallback to OIDN. */
|
/* Always fallback to OIDN. */
|
||||||
DenoiseParams oidn_params = params;
|
DenoiseParams oidn_params = params;
|
||||||
|
@@ -1,27 +0,0 @@
|
|||||||
/* SPDX-License-Identifier: Apache-2.0
|
|
||||||
* Copyright 2011-2022 Blender Foundation */
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "integrator/denoiser.h"
|
|
||||||
#include "util/unique_ptr.h"
|
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
|
||||||
|
|
||||||
/* Denoiser which uses device-specific denoising implementation, such as OptiX denoiser which are
|
|
||||||
* implemented as a part of a driver of specific device.
|
|
||||||
*
|
|
||||||
* This implementation makes sure the to-be-denoised buffer is available on the denoising device
|
|
||||||
* and invoke denoising kernel via device API. */
|
|
||||||
class DeviceDenoiser : public Denoiser {
|
|
||||||
public:
|
|
||||||
DeviceDenoiser(Device *path_trace_device, const DenoiseParams ¶ms);
|
|
||||||
~DeviceDenoiser();
|
|
||||||
|
|
||||||
virtual bool denoise_buffer(const BufferParams &buffer_params,
|
|
||||||
RenderBuffers *render_buffers,
|
|
||||||
const int num_samples,
|
|
||||||
bool allow_inplace_modification) override;
|
|
||||||
};
|
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
|
@@ -1,7 +1,7 @@
|
|||||||
/* SPDX-License-Identifier: Apache-2.0
|
/* SPDX-License-Identifier: Apache-2.0
|
||||||
* Copyright 2011-2022 Blender Foundation */
|
* Copyright 2011-2022 Blender Foundation */
|
||||||
|
|
||||||
#include "integrator/denoiser_device.h"
|
#include "integrator/denoiser_gpu.h"
|
||||||
|
|
||||||
#include "device/denoise.h"
|
#include "device/denoise.h"
|
||||||
#include "device/device.h"
|
#include "device/device.h"
|
||||||
@@ -13,27 +13,27 @@
|
|||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
DeviceDenoiser::DeviceDenoiser(Device *path_trace_device, const DenoiseParams ¶ms)
|
DenoiserGPU::DenoiserGPU(Device *path_trace_device, const DenoiseParams ¶ms)
|
||||||
: Denoiser(path_trace_device, params)
|
: Denoiser(path_trace_device, params)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
DeviceDenoiser::~DeviceDenoiser()
|
DenoiserGPU::~DenoiserGPU()
|
||||||
{
|
{
|
||||||
/* Explicit implementation, to allow forward declaration of Device in the header. */
|
/* Explicit implementation, to allow forward declaration of Device in the header. */
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params,
|
bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
|
||||||
RenderBuffers *render_buffers,
|
RenderBuffers *render_buffers,
|
||||||
const int num_samples,
|
const int num_samples,
|
||||||
bool allow_inplace_modification)
|
bool allow_inplace_modification)
|
||||||
{
|
{
|
||||||
Device *denoiser_device = get_denoiser_device();
|
Device *denoiser_device = get_denoiser_device();
|
||||||
if (!denoiser_device) {
|
if (!denoiser_device) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
DeviceDenoiseTask task;
|
DenoiseTask task;
|
||||||
task.params = params_;
|
task.params = params_;
|
||||||
task.num_samples = num_samples;
|
task.num_samples = num_samples;
|
||||||
task.buffer_params = buffer_params;
|
task.buffer_params = buffer_params;
|
||||||
@@ -50,8 +50,6 @@ bool DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params,
|
|||||||
else {
|
else {
|
||||||
VLOG_WORK << "Creating temporary buffer on denoiser device.";
|
VLOG_WORK << "Creating temporary buffer on denoiser device.";
|
||||||
|
|
||||||
DeviceQueue *queue = denoiser_device->get_denoise_queue();
|
|
||||||
|
|
||||||
/* Create buffer which is available by the device used by denoiser. */
|
/* Create buffer which is available by the device used by denoiser. */
|
||||||
|
|
||||||
/* TODO(sergey): Optimize data transfers. For example, only copy denoising related passes,
|
/* TODO(sergey): Optimize data transfers. For example, only copy denoising related passes,
|
||||||
@@ -70,13 +68,13 @@ bool DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params,
|
|||||||
render_buffers->buffer.data(),
|
render_buffers->buffer.data(),
|
||||||
sizeof(float) * local_render_buffers.buffer.size());
|
sizeof(float) * local_render_buffers.buffer.size());
|
||||||
|
|
||||||
queue->copy_to_device(local_render_buffers.buffer);
|
denoiser_queue_->copy_to_device(local_render_buffers.buffer);
|
||||||
|
|
||||||
task.render_buffers = &local_render_buffers;
|
task.render_buffers = &local_render_buffers;
|
||||||
task.allow_inplace_modification = true;
|
task.allow_inplace_modification = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool denoise_result = denoiser_device->denoise_buffer(task);
|
const bool denoise_result = denoise_buffer(task);
|
||||||
|
|
||||||
if (local_buffer_used) {
|
if (local_buffer_used) {
|
||||||
local_render_buffers.copy_from_device();
|
local_render_buffers.copy_from_device();
|
||||||
@@ -90,4 +88,21 @@ bool DeviceDenoiser::denoise_buffer(const BufferParams &buffer_params,
|
|||||||
return denoise_result;
|
return denoise_result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Device *DenoiserGPU::ensure_denoiser_device(Progress *progress)
|
||||||
|
{
|
||||||
|
Device *denoiser_device = Denoiser::ensure_denoiser_device(progress);
|
||||||
|
if (!denoiser_device) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!denoiser_queue_) {
|
||||||
|
denoiser_queue_ = denoiser_device->gpu_queue_create();
|
||||||
|
if (!denoiser_queue_) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return denoiser_device;
|
||||||
|
}
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
52
intern/cycles/integrator/denoiser_gpu.h
Normal file
52
intern/cycles/integrator/denoiser_gpu.h
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
/* SPDX-License-Identifier: Apache-2.0
|
||||||
|
* Copyright 2011-2022 Blender Foundation */
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "integrator/denoiser.h"
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
/* Implementation of Denoiser which uses a device-specific denoising implementation, running on a
|
||||||
|
* GPU device queue. It makes sure the to-be-denoised buffer is available on the denoising device
|
||||||
|
* and invokes denoising kernels via the device queue API. */
|
||||||
|
class DenoiserGPU : public Denoiser {
|
||||||
|
public:
|
||||||
|
DenoiserGPU(Device *path_trace_device, const DenoiseParams ¶ms);
|
||||||
|
~DenoiserGPU();
|
||||||
|
|
||||||
|
virtual bool denoise_buffer(const BufferParams &buffer_params,
|
||||||
|
RenderBuffers *render_buffers,
|
||||||
|
const int num_samples,
|
||||||
|
bool allow_inplace_modification) override;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
/* All the parameters needed to perform buffer denoising on a device.
|
||||||
|
* Is not really a task in its canonical terms (as in, is not an asynchronous running task). Is
|
||||||
|
* more like a wrapper for all the arguments and parameters needed to perform denoising. Is a
|
||||||
|
* single place where they are all listed, so that it's not required to modify all device methods
|
||||||
|
* when these parameters do change. */
|
||||||
|
class DenoiseTask {
|
||||||
|
public:
|
||||||
|
DenoiseParams params;
|
||||||
|
|
||||||
|
int num_samples;
|
||||||
|
|
||||||
|
RenderBuffers *render_buffers;
|
||||||
|
BufferParams buffer_params;
|
||||||
|
|
||||||
|
/* Allow to do in-place modification of the input passes (scaling them down i.e.). This will
|
||||||
|
* lower the memory footprint of the denoiser but will make input passes "invalid" (from path
|
||||||
|
* tracer) point of view. */
|
||||||
|
bool allow_inplace_modification;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Returns true if task is fully handled. */
|
||||||
|
virtual bool denoise_buffer(const DenoiseTask & /*task*/) = 0;
|
||||||
|
|
||||||
|
virtual Device *ensure_denoiser_device(Progress *progress) override;
|
||||||
|
|
||||||
|
unique_ptr<DeviceQueue> denoiser_queue_;
|
||||||
|
};
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
@@ -1,16 +1,216 @@
|
|||||||
/* SPDX-License-Identifier: Apache-2.0
|
/* SPDX-License-Identifier: Apache-2.0
|
||||||
* Copyright 2011-2022 Blender Foundation */
|
* Copyright 2011-2022 Blender Foundation */
|
||||||
|
|
||||||
#include "integrator/denoiser_optix.h"
|
#ifdef WITH_OPTIX
|
||||||
|
|
||||||
#include "device/denoise.h"
|
# include "integrator/denoiser_optix.h"
|
||||||
#include "device/device.h"
|
# include "integrator/pass_accessor_gpu.h"
|
||||||
|
|
||||||
|
# include "device/optix/device_impl.h"
|
||||||
|
# include "device/optix/queue.h"
|
||||||
|
|
||||||
|
# include <optix_denoiser_tiling.h>
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
OptiXDenoiser::OptiXDenoiser(Device *path_trace_device, const DenoiseParams ¶ms)
|
# if OPTIX_ABI_VERSION >= 60
|
||||||
: DeviceDenoiser(path_trace_device, params)
|
using ::optixUtilDenoiserInvokeTiled;
|
||||||
|
# else
|
||||||
|
// A minimal copy of functionality `optix_denoiser_tiling.h` which allows to fix integer overflow
|
||||||
|
// issues without bumping SDK or driver requirement.
|
||||||
|
//
|
||||||
|
// The original code is Copyright NVIDIA Corporation, BSD-3-Clause.
|
||||||
|
static OptixResult optixUtilDenoiserSplitImage(const OptixImage2D &input,
|
||||||
|
const OptixImage2D &output,
|
||||||
|
unsigned int overlapWindowSizeInPixels,
|
||||||
|
unsigned int tileWidth,
|
||||||
|
unsigned int tileHeight,
|
||||||
|
std::vector<OptixUtilDenoiserImageTile> &tiles)
|
||||||
{
|
{
|
||||||
|
if (tileWidth == 0 || tileHeight == 0)
|
||||||
|
return OPTIX_ERROR_INVALID_VALUE;
|
||||||
|
|
||||||
|
unsigned int inPixelStride = optixUtilGetPixelStride(input);
|
||||||
|
unsigned int outPixelStride = optixUtilGetPixelStride(output);
|
||||||
|
|
||||||
|
int inp_w = std::min(tileWidth + 2 * overlapWindowSizeInPixels, input.width);
|
||||||
|
int inp_h = std::min(tileHeight + 2 * overlapWindowSizeInPixels, input.height);
|
||||||
|
int inp_y = 0, copied_y = 0;
|
||||||
|
|
||||||
|
do {
|
||||||
|
int inputOffsetY = inp_y == 0 ? 0 :
|
||||||
|
std::max((int)overlapWindowSizeInPixels,
|
||||||
|
inp_h - ((int)input.height - inp_y));
|
||||||
|
int copy_y = inp_y == 0 ? std::min(input.height, tileHeight + overlapWindowSizeInPixels) :
|
||||||
|
std::min(tileHeight, input.height - copied_y);
|
||||||
|
|
||||||
|
int inp_x = 0, copied_x = 0;
|
||||||
|
do {
|
||||||
|
int inputOffsetX = inp_x == 0 ? 0 :
|
||||||
|
std::max((int)overlapWindowSizeInPixels,
|
||||||
|
inp_w - ((int)input.width - inp_x));
|
||||||
|
int copy_x = inp_x == 0 ? std::min(input.width, tileWidth + overlapWindowSizeInPixels) :
|
||||||
|
std::min(tileWidth, input.width - copied_x);
|
||||||
|
|
||||||
|
OptixUtilDenoiserImageTile tile;
|
||||||
|
tile.input.data = input.data + (size_t)(inp_y - inputOffsetY) * input.rowStrideInBytes +
|
||||||
|
+(size_t)(inp_x - inputOffsetX) * inPixelStride;
|
||||||
|
tile.input.width = inp_w;
|
||||||
|
tile.input.height = inp_h;
|
||||||
|
tile.input.rowStrideInBytes = input.rowStrideInBytes;
|
||||||
|
tile.input.pixelStrideInBytes = input.pixelStrideInBytes;
|
||||||
|
tile.input.format = input.format;
|
||||||
|
|
||||||
|
tile.output.data = output.data + (size_t)inp_y * output.rowStrideInBytes +
|
||||||
|
(size_t)inp_x * outPixelStride;
|
||||||
|
tile.output.width = copy_x;
|
||||||
|
tile.output.height = copy_y;
|
||||||
|
tile.output.rowStrideInBytes = output.rowStrideInBytes;
|
||||||
|
tile.output.pixelStrideInBytes = output.pixelStrideInBytes;
|
||||||
|
tile.output.format = output.format;
|
||||||
|
|
||||||
|
tile.inputOffsetX = inputOffsetX;
|
||||||
|
tile.inputOffsetY = inputOffsetY;
|
||||||
|
tiles.push_back(tile);
|
||||||
|
|
||||||
|
inp_x += inp_x == 0 ? tileWidth + overlapWindowSizeInPixels : tileWidth;
|
||||||
|
copied_x += copy_x;
|
||||||
|
} while (inp_x < static_cast<int>(input.width));
|
||||||
|
|
||||||
|
inp_y += inp_y == 0 ? tileHeight + overlapWindowSizeInPixels : tileHeight;
|
||||||
|
copied_y += copy_y;
|
||||||
|
} while (inp_y < static_cast<int>(input.height));
|
||||||
|
|
||||||
|
return OPTIX_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
|
||||||
|
CUstream stream,
|
||||||
|
const OptixDenoiserParams *params,
|
||||||
|
CUdeviceptr denoiserState,
|
||||||
|
size_t denoiserStateSizeInBytes,
|
||||||
|
const OptixDenoiserGuideLayer *guideLayer,
|
||||||
|
const OptixDenoiserLayer *layers,
|
||||||
|
unsigned int numLayers,
|
||||||
|
CUdeviceptr scratch,
|
||||||
|
size_t scratchSizeInBytes,
|
||||||
|
unsigned int overlapWindowSizeInPixels,
|
||||||
|
unsigned int tileWidth,
|
||||||
|
unsigned int tileHeight)
|
||||||
|
{
|
||||||
|
if (!guideLayer || !layers)
|
||||||
|
return OPTIX_ERROR_INVALID_VALUE;
|
||||||
|
|
||||||
|
std::vector<std::vector<OptixUtilDenoiserImageTile>> tiles(numLayers);
|
||||||
|
std::vector<std::vector<OptixUtilDenoiserImageTile>> prevTiles(numLayers);
|
||||||
|
for (unsigned int l = 0; l < numLayers; l++) {
|
||||||
|
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].input,
|
||||||
|
layers[l].output,
|
||||||
|
overlapWindowSizeInPixels,
|
||||||
|
tileWidth,
|
||||||
|
tileHeight,
|
||||||
|
tiles[l]))
|
||||||
|
return res;
|
||||||
|
|
||||||
|
if (layers[l].previousOutput.data) {
|
||||||
|
OptixImage2D dummyOutput = layers[l].previousOutput;
|
||||||
|
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].previousOutput,
|
||||||
|
dummyOutput,
|
||||||
|
overlapWindowSizeInPixels,
|
||||||
|
tileWidth,
|
||||||
|
tileHeight,
|
||||||
|
prevTiles[l]))
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<OptixUtilDenoiserImageTile> albedoTiles;
|
||||||
|
if (guideLayer->albedo.data) {
|
||||||
|
OptixImage2D dummyOutput = guideLayer->albedo;
|
||||||
|
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->albedo,
|
||||||
|
dummyOutput,
|
||||||
|
overlapWindowSizeInPixels,
|
||||||
|
tileWidth,
|
||||||
|
tileHeight,
|
||||||
|
albedoTiles))
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<OptixUtilDenoiserImageTile> normalTiles;
|
||||||
|
if (guideLayer->normal.data) {
|
||||||
|
OptixImage2D dummyOutput = guideLayer->normal;
|
||||||
|
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->normal,
|
||||||
|
dummyOutput,
|
||||||
|
overlapWindowSizeInPixels,
|
||||||
|
tileWidth,
|
||||||
|
tileHeight,
|
||||||
|
normalTiles))
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
std::vector<OptixUtilDenoiserImageTile> flowTiles;
|
||||||
|
if (guideLayer->flow.data) {
|
||||||
|
OptixImage2D dummyOutput = guideLayer->flow;
|
||||||
|
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->flow,
|
||||||
|
dummyOutput,
|
||||||
|
overlapWindowSizeInPixels,
|
||||||
|
tileWidth,
|
||||||
|
tileHeight,
|
||||||
|
flowTiles))
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t t = 0; t < tiles[0].size(); t++) {
|
||||||
|
std::vector<OptixDenoiserLayer> tlayers;
|
||||||
|
for (unsigned int l = 0; l < numLayers; l++) {
|
||||||
|
OptixDenoiserLayer layer = {};
|
||||||
|
layer.input = (tiles[l])[t].input;
|
||||||
|
layer.output = (tiles[l])[t].output;
|
||||||
|
if (layers[l].previousOutput.data)
|
||||||
|
layer.previousOutput = (prevTiles[l])[t].input;
|
||||||
|
tlayers.push_back(layer);
|
||||||
|
}
|
||||||
|
|
||||||
|
OptixDenoiserGuideLayer gl = {};
|
||||||
|
if (guideLayer->albedo.data)
|
||||||
|
gl.albedo = albedoTiles[t].input;
|
||||||
|
|
||||||
|
if (guideLayer->normal.data)
|
||||||
|
gl.normal = normalTiles[t].input;
|
||||||
|
|
||||||
|
if (guideLayer->flow.data)
|
||||||
|
gl.flow = flowTiles[t].input;
|
||||||
|
|
||||||
|
if (const OptixResult res = optixDenoiserInvoke(denoiser,
|
||||||
|
stream,
|
||||||
|
params,
|
||||||
|
denoiserState,
|
||||||
|
denoiserStateSizeInBytes,
|
||||||
|
&gl,
|
||||||
|
&tlayers[0],
|
||||||
|
numLayers,
|
||||||
|
(tiles[0])[t].inputOffsetX,
|
||||||
|
(tiles[0])[t].inputOffsetY,
|
||||||
|
scratch,
|
||||||
|
scratchSizeInBytes))
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
return OPTIX_SUCCESS;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
|
OptiXDenoiser::OptiXDenoiser(Device *path_trace_device, const DenoiseParams ¶ms)
|
||||||
|
: DenoiserGPU(path_trace_device, params), state_(path_trace_device, "__denoiser_state", true)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
OptiXDenoiser::~OptiXDenoiser()
|
||||||
|
{
|
||||||
|
/* It is important that the OptixDenoiser handle is destroyed before the OptixDeviceContext
|
||||||
|
* handle, which is guaranteed since the local denoising device owning the OptiX device context
|
||||||
|
* is deleted as part of the Denoiser class destructor call after this. */
|
||||||
|
if (optix_denoiser_ != nullptr) {
|
||||||
|
optixDenoiserDestroy(optix_denoiser_);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint OptiXDenoiser::get_device_type_mask() const
|
uint OptiXDenoiser::get_device_type_mask() const
|
||||||
@@ -18,4 +218,569 @@ uint OptiXDenoiser::get_device_type_mask() const
|
|||||||
return DEVICE_MASK_OPTIX;
|
return DEVICE_MASK_OPTIX;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class OptiXDenoiser::DenoiseContext {
|
||||||
|
public:
|
||||||
|
explicit DenoiseContext(OptiXDevice *device, const DenoiseTask &task)
|
||||||
|
: denoise_params(task.params),
|
||||||
|
render_buffers(task.render_buffers),
|
||||||
|
buffer_params(task.buffer_params),
|
||||||
|
guiding_buffer(device, "denoiser guiding passes buffer", true),
|
||||||
|
num_samples(task.num_samples)
|
||||||
|
{
|
||||||
|
num_input_passes = 1;
|
||||||
|
if (denoise_params.use_pass_albedo) {
|
||||||
|
num_input_passes += 1;
|
||||||
|
use_pass_albedo = true;
|
||||||
|
pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO);
|
||||||
|
if (denoise_params.use_pass_normal) {
|
||||||
|
num_input_passes += 1;
|
||||||
|
use_pass_normal = true;
|
||||||
|
pass_denoising_normal = buffer_params.get_pass_offset(PASS_DENOISING_NORMAL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (denoise_params.temporally_stable) {
|
||||||
|
prev_output.device_pointer = render_buffers->buffer.device_pointer;
|
||||||
|
|
||||||
|
prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
|
||||||
|
|
||||||
|
prev_output.stride = buffer_params.stride;
|
||||||
|
prev_output.pass_stride = buffer_params.pass_stride;
|
||||||
|
|
||||||
|
num_input_passes += 1;
|
||||||
|
use_pass_motion = true;
|
||||||
|
pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
|
||||||
|
}
|
||||||
|
|
||||||
|
use_guiding_passes = (num_input_passes - 1) > 0;
|
||||||
|
|
||||||
|
if (use_guiding_passes) {
|
||||||
|
if (task.allow_inplace_modification) {
|
||||||
|
guiding_params.device_pointer = render_buffers->buffer.device_pointer;
|
||||||
|
|
||||||
|
guiding_params.pass_albedo = pass_denoising_albedo;
|
||||||
|
guiding_params.pass_normal = pass_denoising_normal;
|
||||||
|
guiding_params.pass_flow = pass_motion;
|
||||||
|
|
||||||
|
guiding_params.stride = buffer_params.stride;
|
||||||
|
guiding_params.pass_stride = buffer_params.pass_stride;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
guiding_params.pass_stride = 0;
|
||||||
|
if (use_pass_albedo) {
|
||||||
|
guiding_params.pass_albedo = guiding_params.pass_stride;
|
||||||
|
guiding_params.pass_stride += 3;
|
||||||
|
}
|
||||||
|
if (use_pass_normal) {
|
||||||
|
guiding_params.pass_normal = guiding_params.pass_stride;
|
||||||
|
guiding_params.pass_stride += 3;
|
||||||
|
}
|
||||||
|
if (use_pass_motion) {
|
||||||
|
guiding_params.pass_flow = guiding_params.pass_stride;
|
||||||
|
guiding_params.pass_stride += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
guiding_params.stride = buffer_params.width;
|
||||||
|
|
||||||
|
guiding_buffer.alloc_to_device(buffer_params.width * buffer_params.height *
|
||||||
|
guiding_params.pass_stride);
|
||||||
|
guiding_params.device_pointer = guiding_buffer.device_pointer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pass_sample_count = buffer_params.get_pass_offset(PASS_SAMPLE_COUNT);
|
||||||
|
}
|
||||||
|
|
||||||
|
const DenoiseParams &denoise_params;
|
||||||
|
|
||||||
|
RenderBuffers *render_buffers = nullptr;
|
||||||
|
const BufferParams &buffer_params;
|
||||||
|
|
||||||
|
/* Previous output. */
|
||||||
|
struct {
|
||||||
|
device_ptr device_pointer = 0;
|
||||||
|
|
||||||
|
int offset = PASS_UNUSED;
|
||||||
|
|
||||||
|
int stride = -1;
|
||||||
|
int pass_stride = -1;
|
||||||
|
} prev_output;
|
||||||
|
|
||||||
|
/* Device-side storage of the guiding passes. */
|
||||||
|
device_only_memory<float> guiding_buffer;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
device_ptr device_pointer = 0;
|
||||||
|
|
||||||
|
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
|
||||||
|
int pass_albedo = PASS_UNUSED;
|
||||||
|
int pass_normal = PASS_UNUSED;
|
||||||
|
int pass_flow = PASS_UNUSED;
|
||||||
|
|
||||||
|
int stride = -1;
|
||||||
|
int pass_stride = -1;
|
||||||
|
} guiding_params;
|
||||||
|
|
||||||
|
/* Number of input passes. Including the color and extra auxiliary passes. */
|
||||||
|
int num_input_passes = 0;
|
||||||
|
bool use_guiding_passes = false;
|
||||||
|
bool use_pass_albedo = false;
|
||||||
|
bool use_pass_normal = false;
|
||||||
|
bool use_pass_motion = false;
|
||||||
|
|
||||||
|
int num_samples = 0;
|
||||||
|
|
||||||
|
int pass_sample_count = PASS_UNUSED;
|
||||||
|
|
||||||
|
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
|
||||||
|
int pass_denoising_albedo = PASS_UNUSED;
|
||||||
|
int pass_denoising_normal = PASS_UNUSED;
|
||||||
|
int pass_motion = PASS_UNUSED;
|
||||||
|
|
||||||
|
/* For passes which don't need albedo channel for denoising we replace the actual albedo with
|
||||||
|
* the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
|
||||||
|
* the fake values and denoising of passes which do need albedo can no longer happen. */
|
||||||
|
bool albedo_replaced_with_fake = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
class OptiXDenoiser::DenoisePass {
|
||||||
|
public:
|
||||||
|
DenoisePass(const PassType type, const BufferParams &buffer_params) : type(type)
|
||||||
|
{
|
||||||
|
noisy_offset = buffer_params.get_pass_offset(type, PassMode::NOISY);
|
||||||
|
denoised_offset = buffer_params.get_pass_offset(type, PassMode::DENOISED);
|
||||||
|
|
||||||
|
const PassInfo pass_info = Pass::get_info(type);
|
||||||
|
num_components = pass_info.num_components;
|
||||||
|
use_compositing = pass_info.use_compositing;
|
||||||
|
use_denoising_albedo = pass_info.use_denoising_albedo;
|
||||||
|
}
|
||||||
|
|
||||||
|
PassType type;
|
||||||
|
|
||||||
|
int noisy_offset;
|
||||||
|
int denoised_offset;
|
||||||
|
|
||||||
|
int num_components;
|
||||||
|
bool use_compositing;
|
||||||
|
bool use_denoising_albedo;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool OptiXDenoiser::denoise_buffer(const DenoiseTask &task)
|
||||||
|
{
|
||||||
|
OptiXDevice *const optix_device = static_cast<OptiXDevice *>(denoiser_device_);
|
||||||
|
|
||||||
|
const CUDAContextScope scope(optix_device);
|
||||||
|
|
||||||
|
DenoiseContext context(optix_device, task);
|
||||||
|
|
||||||
|
if (!denoise_ensure(context)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!denoise_filter_guiding_preprocess(context)) {
|
||||||
|
LOG(ERROR) << "Error preprocessing guiding passes.";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Passes which will use real albedo when it is available. */
|
||||||
|
denoise_pass(context, PASS_COMBINED);
|
||||||
|
denoise_pass(context, PASS_SHADOW_CATCHER_MATTE);
|
||||||
|
|
||||||
|
/* Passes which do not need albedo and hence if real is present it needs to become fake. */
|
||||||
|
denoise_pass(context, PASS_SHADOW_CATCHER);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OptiXDenoiser::denoise_filter_guiding_preprocess(const DenoiseContext &context)
|
||||||
|
{
|
||||||
|
const BufferParams &buffer_params = context.buffer_params;
|
||||||
|
|
||||||
|
const int work_size = buffer_params.width * buffer_params.height;
|
||||||
|
|
||||||
|
DeviceKernelArguments args(&context.guiding_params.device_pointer,
|
||||||
|
&context.guiding_params.pass_stride,
|
||||||
|
&context.guiding_params.pass_albedo,
|
||||||
|
&context.guiding_params.pass_normal,
|
||||||
|
&context.guiding_params.pass_flow,
|
||||||
|
&context.render_buffers->buffer.device_pointer,
|
||||||
|
&buffer_params.offset,
|
||||||
|
&buffer_params.stride,
|
||||||
|
&buffer_params.pass_stride,
|
||||||
|
&context.pass_sample_count,
|
||||||
|
&context.pass_denoising_albedo,
|
||||||
|
&context.pass_denoising_normal,
|
||||||
|
&context.pass_motion,
|
||||||
|
&buffer_params.full_x,
|
||||||
|
&buffer_params.full_y,
|
||||||
|
&buffer_params.width,
|
||||||
|
&buffer_params.height,
|
||||||
|
&context.num_samples);
|
||||||
|
|
||||||
|
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OptiXDenoiser::denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context)
|
||||||
|
{
|
||||||
|
const BufferParams &buffer_params = context.buffer_params;
|
||||||
|
|
||||||
|
const int work_size = buffer_params.width * buffer_params.height;
|
||||||
|
|
||||||
|
DeviceKernelArguments args(&context.guiding_params.device_pointer,
|
||||||
|
&context.guiding_params.pass_stride,
|
||||||
|
&context.guiding_params.pass_albedo,
|
||||||
|
&buffer_params.width,
|
||||||
|
&buffer_params.height);
|
||||||
|
|
||||||
|
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
void OptiXDenoiser::denoise_pass(DenoiseContext &context, PassType pass_type)
|
||||||
|
{
|
||||||
|
const BufferParams &buffer_params = context.buffer_params;
|
||||||
|
|
||||||
|
const DenoisePass pass(pass_type, buffer_params);
|
||||||
|
|
||||||
|
if (pass.noisy_offset == PASS_UNUSED) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (pass.denoised_offset == PASS_UNUSED) {
|
||||||
|
LOG(DFATAL) << "Missing denoised pass " << pass_type_as_string(pass_type);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pass.use_denoising_albedo) {
|
||||||
|
if (context.albedo_replaced_with_fake) {
|
||||||
|
LOG(ERROR) << "Pass which requires albedo is denoised after fake albedo has been set.";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
|
||||||
|
context.albedo_replaced_with_fake = true;
|
||||||
|
if (!denoise_filter_guiding_set_fake_albedo(context)) {
|
||||||
|
LOG(ERROR) << "Error replacing real albedo with the fake one.";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Read and preprocess noisy color input pass. */
|
||||||
|
denoise_color_read(context, pass);
|
||||||
|
if (!denoise_filter_color_preprocess(context, pass)) {
|
||||||
|
LOG(ERROR) << "Error converting denoising passes to RGB buffer.";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!denoise_run(context, pass)) {
|
||||||
|
LOG(ERROR) << "Error running OptiX denoiser.";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Store result in the combined pass of the render buffer.
|
||||||
|
*
|
||||||
|
* This will scale the denoiser result up to match the number of, possibly per-pixel, samples. */
|
||||||
|
if (!denoise_filter_color_postprocess(context, pass)) {
|
||||||
|
LOG(ERROR) << "Error copying denoiser result to the denoised pass.";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
denoiser_queue_->synchronize();
|
||||||
|
}
|
||||||
|
|
||||||
|
void OptiXDenoiser::denoise_color_read(const DenoiseContext &context, const DenoisePass &pass)
|
||||||
|
{
|
||||||
|
PassAccessor::PassAccessInfo pass_access_info;
|
||||||
|
pass_access_info.type = pass.type;
|
||||||
|
pass_access_info.mode = PassMode::NOISY;
|
||||||
|
pass_access_info.offset = pass.noisy_offset;
|
||||||
|
|
||||||
|
/* Denoiser operates on passes which are used to calculate the approximation, and is never used
|
||||||
|
* on the approximation. The latter is not even possible because OptiX does not support
|
||||||
|
* denoising of semi-transparent pixels. */
|
||||||
|
pass_access_info.use_approximate_shadow_catcher = false;
|
||||||
|
pass_access_info.use_approximate_shadow_catcher_background = false;
|
||||||
|
pass_access_info.show_active_pixels = false;
|
||||||
|
|
||||||
|
/* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases.
|
||||||
|
*/
|
||||||
|
const PassAccessorGPU pass_accessor(
|
||||||
|
denoiser_queue_.get(), pass_access_info, 1.0f, context.num_samples);
|
||||||
|
|
||||||
|
PassAccessor::Destination destination(pass_access_info.type);
|
||||||
|
destination.d_pixels = context.render_buffers->buffer.device_pointer +
|
||||||
|
pass.denoised_offset * sizeof(float);
|
||||||
|
destination.num_components = 3;
|
||||||
|
destination.pixel_stride = context.buffer_params.pass_stride;
|
||||||
|
|
||||||
|
BufferParams buffer_params = context.buffer_params;
|
||||||
|
buffer_params.window_x = 0;
|
||||||
|
buffer_params.window_y = 0;
|
||||||
|
buffer_params.window_width = buffer_params.width;
|
||||||
|
buffer_params.window_height = buffer_params.height;
|
||||||
|
|
||||||
|
pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OptiXDenoiser::denoise_filter_color_preprocess(const DenoiseContext &context,
|
||||||
|
const DenoisePass &pass)
|
||||||
|
{
|
||||||
|
const BufferParams &buffer_params = context.buffer_params;
|
||||||
|
|
||||||
|
const int work_size = buffer_params.width * buffer_params.height;
|
||||||
|
|
||||||
|
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
|
||||||
|
&buffer_params.full_x,
|
||||||
|
&buffer_params.full_y,
|
||||||
|
&buffer_params.width,
|
||||||
|
&buffer_params.height,
|
||||||
|
&buffer_params.offset,
|
||||||
|
&buffer_params.stride,
|
||||||
|
&buffer_params.pass_stride,
|
||||||
|
&pass.denoised_offset);
|
||||||
|
|
||||||
|
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OptiXDenoiser::denoise_filter_color_postprocess(const DenoiseContext &context,
|
||||||
|
const DenoisePass &pass)
|
||||||
|
{
|
||||||
|
const BufferParams &buffer_params = context.buffer_params;
|
||||||
|
|
||||||
|
const int work_size = buffer_params.width * buffer_params.height;
|
||||||
|
|
||||||
|
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
|
||||||
|
&buffer_params.full_x,
|
||||||
|
&buffer_params.full_y,
|
||||||
|
&buffer_params.width,
|
||||||
|
&buffer_params.height,
|
||||||
|
&buffer_params.offset,
|
||||||
|
&buffer_params.stride,
|
||||||
|
&buffer_params.pass_stride,
|
||||||
|
&context.num_samples,
|
||||||
|
&pass.noisy_offset,
|
||||||
|
&pass.denoised_offset,
|
||||||
|
&context.pass_sample_count,
|
||||||
|
&pass.num_components,
|
||||||
|
&pass.use_compositing);
|
||||||
|
|
||||||
|
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OptiXDenoiser::denoise_ensure(DenoiseContext &context)
|
||||||
|
{
|
||||||
|
if (!denoise_create_if_needed(context)) {
|
||||||
|
LOG(ERROR) << "OptiX denoiser creation has failed.";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!denoise_configure_if_needed(context)) {
|
||||||
|
LOG(ERROR) << "OptiX denoiser configuration has failed.";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OptiXDenoiser::denoise_create_if_needed(DenoiseContext &context)
|
||||||
|
{
|
||||||
|
const bool recreate_denoiser = (optix_denoiser_ == nullptr) ||
|
||||||
|
(use_pass_albedo_ != context.use_pass_albedo) ||
|
||||||
|
(use_pass_normal_ != context.use_pass_normal) ||
|
||||||
|
(use_pass_motion_ != context.use_pass_motion);
|
||||||
|
if (!recreate_denoiser) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Destroy existing handle before creating new one. */
|
||||||
|
if (optix_denoiser_) {
|
||||||
|
optixDenoiserDestroy(optix_denoiser_);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create OptiX denoiser handle on demand when it is first used. */
|
||||||
|
OptixDenoiserOptions denoiser_options = {};
|
||||||
|
denoiser_options.guideAlbedo = context.use_pass_albedo;
|
||||||
|
denoiser_options.guideNormal = context.use_pass_normal;
|
||||||
|
|
||||||
|
OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
|
||||||
|
if (context.use_pass_motion) {
|
||||||
|
model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
const OptixResult result = optixDenoiserCreate(
|
||||||
|
static_cast<OptiXDevice *>(denoiser_device_)->context,
|
||||||
|
model,
|
||||||
|
&denoiser_options,
|
||||||
|
&optix_denoiser_);
|
||||||
|
|
||||||
|
if (result != OPTIX_SUCCESS) {
|
||||||
|
denoiser_device_->set_error("Failed to create OptiX denoiser");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* OptiX denoiser handle was created with the requested number of input passes. */
|
||||||
|
use_pass_albedo_ = context.use_pass_albedo;
|
||||||
|
use_pass_normal_ = context.use_pass_normal;
|
||||||
|
use_pass_motion_ = context.use_pass_motion;
|
||||||
|
|
||||||
|
/* OptiX denoiser has been created, but it needs configuration. */
|
||||||
|
is_configured_ = false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OptiXDenoiser::denoise_configure_if_needed(DenoiseContext &context)
|
||||||
|
{
|
||||||
|
/* Limit maximum tile size denoiser can be invoked with. */
|
||||||
|
const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
|
||||||
|
min(context.buffer_params.height, 4096));
|
||||||
|
|
||||||
|
if (is_configured_ && (configured_size_.x == tile_size.x && configured_size_.y == tile_size.y)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
optix_device_assert(
|
||||||
|
denoiser_device_,
|
||||||
|
optixDenoiserComputeMemoryResources(optix_denoiser_, tile_size.x, tile_size.y, &sizes_));
|
||||||
|
|
||||||
|
/* Allocate denoiser state if tile size has changed since last setup. */
|
||||||
|
state_.device = denoiser_device_;
|
||||||
|
state_.alloc_to_device(sizes_.stateSizeInBytes + sizes_.withOverlapScratchSizeInBytes);
|
||||||
|
|
||||||
|
/* Initialize denoiser state for the current tile size. */
|
||||||
|
const OptixResult result = optixDenoiserSetup(
|
||||||
|
optix_denoiser_,
|
||||||
|
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
|
||||||
|
* on a stream that is not the default stream. */
|
||||||
|
tile_size.x + sizes_.overlapWindowSizeInPixels * 2,
|
||||||
|
tile_size.y + sizes_.overlapWindowSizeInPixels * 2,
|
||||||
|
state_.device_pointer,
|
||||||
|
sizes_.stateSizeInBytes,
|
||||||
|
state_.device_pointer + sizes_.stateSizeInBytes,
|
||||||
|
sizes_.withOverlapScratchSizeInBytes);
|
||||||
|
if (result != OPTIX_SUCCESS) {
|
||||||
|
denoiser_device_->set_error("Failed to set up OptiX denoiser");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
cuda_device_assert(denoiser_device_, cuCtxSynchronize());
|
||||||
|
|
||||||
|
is_configured_ = true;
|
||||||
|
configured_size_ = tile_size;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OptiXDenoiser::denoise_run(const DenoiseContext &context, const DenoisePass &pass)
|
||||||
|
{
|
||||||
|
const BufferParams &buffer_params = context.buffer_params;
|
||||||
|
const int width = buffer_params.width;
|
||||||
|
const int height = buffer_params.height;
|
||||||
|
|
||||||
|
/* Set up input and output layer information. */
|
||||||
|
OptixImage2D color_layer = {0};
|
||||||
|
OptixImage2D albedo_layer = {0};
|
||||||
|
OptixImage2D normal_layer = {0};
|
||||||
|
OptixImage2D flow_layer = {0};
|
||||||
|
|
||||||
|
OptixImage2D output_layer = {0};
|
||||||
|
OptixImage2D prev_output_layer = {0};
|
||||||
|
|
||||||
|
/* Color pass. */
|
||||||
|
{
|
||||||
|
const int pass_denoised = pass.denoised_offset;
|
||||||
|
const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float);
|
||||||
|
|
||||||
|
color_layer.data = context.render_buffers->buffer.device_pointer +
|
||||||
|
pass_denoised * sizeof(float);
|
||||||
|
color_layer.width = width;
|
||||||
|
color_layer.height = height;
|
||||||
|
color_layer.rowStrideInBytes = pass_stride_in_bytes * context.buffer_params.stride;
|
||||||
|
color_layer.pixelStrideInBytes = pass_stride_in_bytes;
|
||||||
|
color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Previous output. */
|
||||||
|
if (context.prev_output.offset != PASS_UNUSED) {
|
||||||
|
const int64_t pass_stride_in_bytes = context.prev_output.pass_stride * sizeof(float);
|
||||||
|
|
||||||
|
prev_output_layer.data = context.prev_output.device_pointer +
|
||||||
|
context.prev_output.offset * sizeof(float);
|
||||||
|
prev_output_layer.width = width;
|
||||||
|
prev_output_layer.height = height;
|
||||||
|
prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
|
||||||
|
prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
|
||||||
|
prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Optional albedo and color passes. */
|
||||||
|
if (context.num_input_passes > 1) {
|
||||||
|
const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
|
||||||
|
const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride * sizeof(float);
|
||||||
|
const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes;
|
||||||
|
|
||||||
|
if (context.use_pass_albedo) {
|
||||||
|
albedo_layer.data = d_guiding_buffer + context.guiding_params.pass_albedo * sizeof(float);
|
||||||
|
albedo_layer.width = width;
|
||||||
|
albedo_layer.height = height;
|
||||||
|
albedo_layer.rowStrideInBytes = row_stride_in_bytes;
|
||||||
|
albedo_layer.pixelStrideInBytes = pixel_stride_in_bytes;
|
||||||
|
albedo_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (context.use_pass_normal) {
|
||||||
|
normal_layer.data = d_guiding_buffer + context.guiding_params.pass_normal * sizeof(float);
|
||||||
|
normal_layer.width = width;
|
||||||
|
normal_layer.height = height;
|
||||||
|
normal_layer.rowStrideInBytes = row_stride_in_bytes;
|
||||||
|
normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
|
||||||
|
normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (context.use_pass_motion) {
|
||||||
|
flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow * sizeof(float);
|
||||||
|
flow_layer.width = width;
|
||||||
|
flow_layer.height = height;
|
||||||
|
flow_layer.rowStrideInBytes = row_stride_in_bytes;
|
||||||
|
flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
|
||||||
|
flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Denoise in-place of the noisy input in the render buffers. */
|
||||||
|
output_layer = color_layer;
|
||||||
|
|
||||||
|
OptixDenoiserGuideLayer guide_layers = {};
|
||||||
|
guide_layers.albedo = albedo_layer;
|
||||||
|
guide_layers.normal = normal_layer;
|
||||||
|
guide_layers.flow = flow_layer;
|
||||||
|
|
||||||
|
OptixDenoiserLayer image_layers = {};
|
||||||
|
image_layers.input = color_layer;
|
||||||
|
image_layers.previousOutput = prev_output_layer;
|
||||||
|
image_layers.output = output_layer;
|
||||||
|
|
||||||
|
/* Finally run denoising. */
|
||||||
|
OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
|
||||||
|
|
||||||
|
optix_device_assert(denoiser_device_,
|
||||||
|
ccl::optixUtilDenoiserInvokeTiled(
|
||||||
|
optix_denoiser_,
|
||||||
|
static_cast<OptiXDeviceQueue *>(denoiser_queue_.get())->stream(),
|
||||||
|
¶ms,
|
||||||
|
state_.device_pointer,
|
||||||
|
sizes_.stateSizeInBytes,
|
||||||
|
&guide_layers,
|
||||||
|
&image_layers,
|
||||||
|
1,
|
||||||
|
state_.device_pointer + sizes_.stateSizeInBytes,
|
||||||
|
sizes_.withOverlapScratchSizeInBytes,
|
||||||
|
sizes_.overlapWindowSizeInPixels,
|
||||||
|
configured_size_.x,
|
||||||
|
configured_size_.y));
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@@ -3,16 +3,84 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "integrator/denoiser_device.h"
|
#ifdef WITH_OPTIX
|
||||||
|
|
||||||
|
# include "integrator/denoiser_gpu.h"
|
||||||
|
|
||||||
|
# include "device/optix/util.h"
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
class OptiXDenoiser : public DeviceDenoiser {
|
/* Implementation of denoising API which uses the OptiX denoiser. */
|
||||||
|
class OptiXDenoiser : public DenoiserGPU {
|
||||||
public:
|
public:
|
||||||
OptiXDenoiser(Device *path_trace_device, const DenoiseParams ¶ms);
|
OptiXDenoiser(Device *path_trace_device, const DenoiseParams ¶ms);
|
||||||
|
~OptiXDenoiser();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual uint get_device_type_mask() const override;
|
virtual uint get_device_type_mask() const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
class DenoiseContext;
|
||||||
|
class DenoisePass;
|
||||||
|
|
||||||
|
virtual bool denoise_buffer(const DenoiseTask &task) override;
|
||||||
|
|
||||||
|
/* Read guiding passes from the render buffers, preprocess them in a way which is expected by
|
||||||
|
* OptiX and store in the guiding passes memory within the given context.
|
||||||
|
*
|
||||||
|
* Pre-processing of the guiding passes is to only happen once per context lifetime. DO not
|
||||||
|
* preprocess them for every pass which is being denoised. */
|
||||||
|
bool denoise_filter_guiding_preprocess(const DenoiseContext &context);
|
||||||
|
|
||||||
|
/* Set fake albedo pixels in the albedo guiding pass storage.
|
||||||
|
* After this point only passes which do not need albedo for denoising can be processed. */
|
||||||
|
bool denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context);
|
||||||
|
|
||||||
|
void denoise_pass(DenoiseContext &context, PassType pass_type);
|
||||||
|
|
||||||
|
/* Read input color pass from the render buffer into the memory which corresponds to the noisy
|
||||||
|
* input within the given context. Pixels are scaled to the number of samples, but are not
|
||||||
|
* preprocessed yet. */
|
||||||
|
void denoise_color_read(const DenoiseContext &context, const DenoisePass &pass);
|
||||||
|
|
||||||
|
/* Run corresponding filter kernels, preparing data for the denoiser or copying data from the
|
||||||
|
* denoiser result to the render buffer. */
|
||||||
|
bool denoise_filter_color_preprocess(const DenoiseContext &context, const DenoisePass &pass);
|
||||||
|
bool denoise_filter_color_postprocess(const DenoiseContext &context, const DenoisePass &pass);
|
||||||
|
|
||||||
|
/* Make sure the OptiX denoiser is created and configured. */
|
||||||
|
bool denoise_ensure(DenoiseContext &context);
|
||||||
|
|
||||||
|
/* Create OptiX denoiser descriptor if needed.
|
||||||
|
* Will do nothing if the current OptiX descriptor is usable for the given parameters.
|
||||||
|
* If the OptiX denoiser descriptor did re-allocate here it is left unconfigured. */
|
||||||
|
bool denoise_create_if_needed(DenoiseContext &context);
|
||||||
|
|
||||||
|
/* Configure existing OptiX denoiser descriptor for the use for the given task. */
|
||||||
|
bool denoise_configure_if_needed(DenoiseContext &context);
|
||||||
|
|
||||||
|
/* Run configured denoiser. */
|
||||||
|
bool denoise_run(const DenoiseContext &context, const DenoisePass &pass);
|
||||||
|
|
||||||
|
OptixDenoiser optix_denoiser_ = nullptr;
|
||||||
|
|
||||||
|
/* Configuration size, as provided to `optixDenoiserSetup`.
|
||||||
|
* If the `optixDenoiserSetup()` was never used on the current `optix_denoiser` the
|
||||||
|
* `is_configured` will be false. */
|
||||||
|
bool is_configured_ = false;
|
||||||
|
int2 configured_size_ = make_int2(0, 0);
|
||||||
|
|
||||||
|
/* OptiX denoiser state and scratch buffers, stored in a single memory buffer.
|
||||||
|
* The memory layout goes as following: [denoiser state][scratch buffer]. */
|
||||||
|
device_only_memory<unsigned char> state_;
|
||||||
|
OptixDenoiserSizes sizes_ = {};
|
||||||
|
|
||||||
|
bool use_pass_albedo_ = false;
|
||||||
|
bool use_pass_normal_ = false;
|
||||||
|
bool use_pass_motion_ = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@@ -37,6 +37,14 @@ set(SRC_KERNEL_DEVICE_OPTIX
|
|||||||
device/optix/kernel_shader_raytrace.cu
|
device/optix/kernel_shader_raytrace.cu
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if(WITH_CYCLES_OSL AND (OSL_LIBRARY_VERSION_MINOR GREATER_EQUAL 13 OR OSL_LIBRARY_VERSION_MAJOR GREATER 1))
|
||||||
|
set(SRC_KERNEL_DEVICE_OPTIX
|
||||||
|
${SRC_KERNEL_DEVICE_OPTIX}
|
||||||
|
osl/services_optix.cu
|
||||||
|
device/optix/kernel_osl.cu
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
set(SRC_KERNEL_DEVICE_ONEAPI
|
set(SRC_KERNEL_DEVICE_ONEAPI
|
||||||
device/oneapi/kernel.cpp
|
device/oneapi/kernel.cpp
|
||||||
)
|
)
|
||||||
@@ -181,6 +189,16 @@ set(SRC_KERNEL_SVM_HEADERS
|
|||||||
svm/vertex_color.h
|
svm/vertex_color.h
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if(WITH_CYCLES_OSL)
|
||||||
|
set(SRC_KERNEL_OSL_HEADERS
|
||||||
|
osl/osl.h
|
||||||
|
osl/closures_setup.h
|
||||||
|
osl/closures_template.h
|
||||||
|
osl/services_gpu.h
|
||||||
|
osl/types.h
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
set(SRC_KERNEL_GEOM_HEADERS
|
set(SRC_KERNEL_GEOM_HEADERS
|
||||||
geom/geom.h
|
geom/geom.h
|
||||||
geom/attribute.h
|
geom/attribute.h
|
||||||
@@ -306,6 +324,7 @@ set(SRC_KERNEL_HEADERS
|
|||||||
${SRC_KERNEL_GEOM_HEADERS}
|
${SRC_KERNEL_GEOM_HEADERS}
|
||||||
${SRC_KERNEL_INTEGRATOR_HEADERS}
|
${SRC_KERNEL_INTEGRATOR_HEADERS}
|
||||||
${SRC_KERNEL_LIGHT_HEADERS}
|
${SRC_KERNEL_LIGHT_HEADERS}
|
||||||
|
${SRC_KERNEL_OSL_HEADERS}
|
||||||
${SRC_KERNEL_SAMPLE_HEADERS}
|
${SRC_KERNEL_SAMPLE_HEADERS}
|
||||||
${SRC_KERNEL_SVM_HEADERS}
|
${SRC_KERNEL_SVM_HEADERS}
|
||||||
${SRC_KERNEL_TYPES_HEADERS}
|
${SRC_KERNEL_TYPES_HEADERS}
|
||||||
@@ -449,52 +468,27 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
|||||||
set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
|
set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_CUBIN_COMPILER)
|
set(_cuda_nvcc_args
|
||||||
string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
|
-arch=${arch}
|
||||||
|
${CUDA_NVCC_FLAGS}
|
||||||
# Needed to find libnvrtc-builtins.so. Can't do it from inside
|
--${format}
|
||||||
# cycles_cubin_cc since the env variable is read before main()
|
${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
|
||||||
if(APPLE)
|
--ptxas-options="-v"
|
||||||
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
|
${cuda_flags})
|
||||||
-E env DYLD_LIBRARY_PATH="${cuda_toolkit_root_dir}/lib")
|
|
||||||
elseif(UNIX)
|
|
||||||
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
|
|
||||||
-E env LD_LIBRARY_PATH="${cuda_toolkit_root_dir}/lib64")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
|
if(WITH_COMPILER_CCACHE AND CCACHE_PROGRAM)
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
OUTPUT ${cuda_file}
|
OUTPUT ${cuda_file}
|
||||||
COMMAND ${CUBIN_CC_ENV}
|
COMMAND ${CCACHE_PROGRAM} ${cuda_nvcc_executable} ${_cuda_nvcc_args}
|
||||||
"$<TARGET_FILE:cycles_cubin_cc>"
|
DEPENDS ${kernel_sources})
|
||||||
-target ${CUDA_ARCH}
|
|
||||||
-i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
|
|
||||||
${cuda_flags}
|
|
||||||
-v
|
|
||||||
-cuda-toolkit-dir "${cuda_toolkit_root_dir}"
|
|
||||||
DEPENDS ${kernel_sources} cycles_cubin_cc)
|
|
||||||
else()
|
else()
|
||||||
set(_cuda_nvcc_args
|
add_custom_command(
|
||||||
-arch=${arch}
|
OUTPUT ${cuda_file}
|
||||||
${CUDA_NVCC_FLAGS}
|
COMMAND ${cuda_nvcc_executable} ${_cuda_nvcc_args}
|
||||||
--${format}
|
DEPENDS ${kernel_sources})
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
|
|
||||||
--ptxas-options="-v"
|
|
||||||
${cuda_flags})
|
|
||||||
|
|
||||||
if(WITH_COMPILER_CCACHE AND CCACHE_PROGRAM)
|
|
||||||
add_custom_command(
|
|
||||||
OUTPUT ${cuda_file}
|
|
||||||
COMMAND ${CCACHE_PROGRAM} ${cuda_nvcc_executable} ${_cuda_nvcc_args}
|
|
||||||
DEPENDS ${kernel_sources})
|
|
||||||
else()
|
|
||||||
add_custom_command(
|
|
||||||
OUTPUT ${cuda_file}
|
|
||||||
COMMAND ${cuda_nvcc_executable} ${_cuda_nvcc_args}
|
|
||||||
DEPENDS ${kernel_sources})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
unset(_cuda_nvcc_args)
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
unset(_cuda_nvcc_args)
|
||||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib)
|
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib)
|
||||||
list(APPEND cuda_cubins ${cuda_file})
|
list(APPEND cuda_cubins ${cuda_file})
|
||||||
|
|
||||||
@@ -570,13 +564,22 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
|||||||
if(WIN32)
|
if(WIN32)
|
||||||
set(hip_command ${CMAKE_COMMAND})
|
set(hip_command ${CMAKE_COMMAND})
|
||||||
set(hip_flags
|
set(hip_flags
|
||||||
-E env "HIP_PATH=${HIP_ROOT_DIR}" "PATH=${HIP_PERL_DIR}"
|
-E env "HIP_PATH=${HIP_ROOT_DIR}"
|
||||||
${HIP_HIPCC_EXECUTABLE}.bat)
|
${HIP_HIPCC_EXECUTABLE}.bat)
|
||||||
else()
|
else()
|
||||||
set(hip_command ${HIP_HIPCC_EXECUTABLE})
|
set(hip_command ${HIP_HIPCC_EXECUTABLE})
|
||||||
set(hip_flags)
|
set(hip_flags)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# There's a bug in the compiler causing some scenes to fail to render on Vega cards
|
||||||
|
# A workaround currently is to set -O1 opt level during kernel compilation for these
|
||||||
|
# cards Remove this when a newer compiler is available with fixes.
|
||||||
|
if(WIN32 AND (${arch} MATCHES "gfx90[a-z0-9]+"))
|
||||||
|
set(hip_opt_flags "-O1")
|
||||||
|
else()
|
||||||
|
set(hip_opt_flags)
|
||||||
|
endif()
|
||||||
|
|
||||||
set(hip_flags
|
set(hip_flags
|
||||||
${hip_flags}
|
${hip_flags}
|
||||||
--amdgpu-target=${arch}
|
--amdgpu-target=${arch}
|
||||||
@@ -593,6 +596,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
|||||||
-Wno-unused-value
|
-Wno-unused-value
|
||||||
--hipcc-func-supp
|
--hipcc-func-supp
|
||||||
-ffast-math
|
-ffast-math
|
||||||
|
${hip_opt_flags}
|
||||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
|
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
|
||||||
|
|
||||||
if(WITH_NANOVDB)
|
if(WITH_NANOVDB)
|
||||||
@@ -646,55 +650,25 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
|
|||||||
set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
|
set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_CUBIN_COMPILER)
|
add_custom_command(
|
||||||
# Needed to find libnvrtc-builtins.so. Can't do it from inside
|
OUTPUT
|
||||||
# cycles_cubin_cc since the env variable is read before main()
|
${output}
|
||||||
if(APPLE)
|
DEPENDS
|
||||||
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
|
${input}
|
||||||
-E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
|
${SRC_KERNEL_HEADERS}
|
||||||
elseif(UNIX)
|
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
||||||
set(CUBIN_CC_ENV ${CMAKE_COMMAND}
|
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
|
||||||
-E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
|
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
|
||||||
endif()
|
${SRC_UTIL_HEADERS}
|
||||||
|
COMMAND
|
||||||
|
${CUDA_NVCC_EXECUTABLE}
|
||||||
|
--ptx
|
||||||
|
-arch=sm_50
|
||||||
|
${cuda_flags}
|
||||||
|
${input}
|
||||||
|
WORKING_DIRECTORY
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}")
|
||||||
|
|
||||||
add_custom_command(
|
|
||||||
OUTPUT ${output}
|
|
||||||
DEPENDS
|
|
||||||
${input}
|
|
||||||
${SRC_KERNEL_HEADERS}
|
|
||||||
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
|
||||||
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
|
|
||||||
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
|
|
||||||
${SRC_UTIL_HEADERS}
|
|
||||||
COMMAND ${CUBIN_CC_ENV}
|
|
||||||
"$<TARGET_FILE:cycles_cubin_cc>"
|
|
||||||
-target 50
|
|
||||||
-ptx
|
|
||||||
-i ${CMAKE_CURRENT_SOURCE_DIR}/${input}
|
|
||||||
${cuda_flags}
|
|
||||||
-v
|
|
||||||
-cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
|
|
||||||
DEPENDS ${kernel_sources} cycles_cubin_cc)
|
|
||||||
else()
|
|
||||||
add_custom_command(
|
|
||||||
OUTPUT
|
|
||||||
${output}
|
|
||||||
DEPENDS
|
|
||||||
${input}
|
|
||||||
${SRC_KERNEL_HEADERS}
|
|
||||||
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
|
||||||
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
|
|
||||||
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
|
|
||||||
${SRC_UTIL_HEADERS}
|
|
||||||
COMMAND
|
|
||||||
${CUDA_NVCC_EXECUTABLE}
|
|
||||||
--ptx
|
|
||||||
-arch=sm_50
|
|
||||||
${cuda_flags}
|
|
||||||
${input}
|
|
||||||
WORKING_DIRECTORY
|
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}")
|
|
||||||
endif()
|
|
||||||
list(APPEND optix_ptx ${output})
|
list(APPEND optix_ptx ${output})
|
||||||
|
|
||||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib)
|
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib)
|
||||||
@@ -708,6 +682,16 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
|
|||||||
kernel_optix_shader_raytrace
|
kernel_optix_shader_raytrace
|
||||||
"device/optix/kernel_shader_raytrace.cu"
|
"device/optix/kernel_shader_raytrace.cu"
|
||||||
"--keep-device-functions")
|
"--keep-device-functions")
|
||||||
|
if(WITH_CYCLES_OSL AND (OSL_LIBRARY_VERSION_MINOR GREATER_EQUAL 13 OR OSL_LIBRARY_VERSION_MAJOR GREATER 1))
|
||||||
|
CYCLES_OPTIX_KERNEL_ADD(
|
||||||
|
kernel_optix_osl
|
||||||
|
"device/optix/kernel_osl.cu"
|
||||||
|
"--relocatable-device-code=true")
|
||||||
|
CYCLES_OPTIX_KERNEL_ADD(
|
||||||
|
kernel_optix_osl_services
|
||||||
|
"osl/services_optix.cu"
|
||||||
|
"--relocatable-device-code=true")
|
||||||
|
endif()
|
||||||
|
|
||||||
add_custom_target(cycles_kernel_optix ALL DEPENDS ${optix_ptx})
|
add_custom_target(cycles_kernel_optix ALL DEPENDS ${optix_ptx})
|
||||||
cycles_set_solution_folder(cycles_kernel_optix)
|
cycles_set_solution_folder(cycles_kernel_optix)
|
||||||
@@ -995,6 +979,7 @@ source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS})
|
|||||||
source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS})
|
source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS})
|
||||||
source_group("kernel" FILES ${SRC_KERNEL_TYPES_HEADERS})
|
source_group("kernel" FILES ${SRC_KERNEL_TYPES_HEADERS})
|
||||||
source_group("light" FILES ${SRC_KERNEL_LIGHT_HEADERS})
|
source_group("light" FILES ${SRC_KERNEL_LIGHT_HEADERS})
|
||||||
|
source_group("osl" FILES ${SRC_KERNEL_OSL_HEADERS})
|
||||||
source_group("sample" FILES ${SRC_KERNEL_SAMPLE_HEADERS})
|
source_group("sample" FILES ${SRC_KERNEL_SAMPLE_HEADERS})
|
||||||
source_group("svm" FILES ${SRC_KERNEL_SVM_HEADERS})
|
source_group("svm" FILES ${SRC_KERNEL_SVM_HEADERS})
|
||||||
source_group("util" FILES ${SRC_KERNEL_UTIL_HEADERS})
|
source_group("util" FILES ${SRC_KERNEL_UTIL_HEADERS})
|
||||||
@@ -1031,6 +1016,7 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_FILM_HEADERS}" ${CYCLE
|
|||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom)
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_INTEGRATOR_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/integrator)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_INTEGRATOR_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/integrator)
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_LIGHT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/light)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_LIGHT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/light)
|
||||||
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_OSL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/osl)
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_SAMPLE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/sample)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_SAMPLE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/sample)
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/svm)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/svm)
|
||||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_TYPES_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_TYPES_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)
|
||||||
|
@@ -297,8 +297,10 @@ ccl_device_inline void bsdf_roughness_eta(const KernelGlobals kg,
|
|||||||
ccl_private float2 *roughness,
|
ccl_private float2 *roughness,
|
||||||
ccl_private float *eta)
|
ccl_private float *eta)
|
||||||
{
|
{
|
||||||
|
#ifdef __SVM__
|
||||||
bool refractive = false;
|
bool refractive = false;
|
||||||
float alpha = 1.0f;
|
float alpha = 1.0f;
|
||||||
|
#endif
|
||||||
switch (sc->type) {
|
switch (sc->type) {
|
||||||
case CLOSURE_BSDF_DIFFUSE_ID:
|
case CLOSURE_BSDF_DIFFUSE_ID:
|
||||||
*roughness = one_float2();
|
*roughness = one_float2();
|
||||||
|
@@ -49,11 +49,11 @@ KERNEL_STRUCT_BEGIN(KernelBVH, bvh)
|
|||||||
KERNEL_STRUCT_MEMBER(bvh, int, root)
|
KERNEL_STRUCT_MEMBER(bvh, int, root)
|
||||||
KERNEL_STRUCT_MEMBER(bvh, int, have_motion)
|
KERNEL_STRUCT_MEMBER(bvh, int, have_motion)
|
||||||
KERNEL_STRUCT_MEMBER(bvh, int, have_curves)
|
KERNEL_STRUCT_MEMBER(bvh, int, have_curves)
|
||||||
|
KERNEL_STRUCT_MEMBER(bvh, int, have_points)
|
||||||
|
KERNEL_STRUCT_MEMBER(bvh, int, have_volumes)
|
||||||
KERNEL_STRUCT_MEMBER(bvh, int, bvh_layout)
|
KERNEL_STRUCT_MEMBER(bvh, int, bvh_layout)
|
||||||
KERNEL_STRUCT_MEMBER(bvh, int, use_bvh_steps)
|
KERNEL_STRUCT_MEMBER(bvh, int, use_bvh_steps)
|
||||||
KERNEL_STRUCT_MEMBER(bvh, int, curve_subdivisions)
|
KERNEL_STRUCT_MEMBER(bvh, int, curve_subdivisions)
|
||||||
KERNEL_STRUCT_MEMBER(bvh, int, pad1)
|
|
||||||
KERNEL_STRUCT_MEMBER(bvh, int, pad2)
|
|
||||||
KERNEL_STRUCT_END(KernelBVH)
|
KERNEL_STRUCT_END(KernelBVH)
|
||||||
|
|
||||||
/* Film. */
|
/* Film. */
|
||||||
@@ -183,6 +183,7 @@ KERNEL_STRUCT_MEMBER(integrator, int, use_lamp_mis)
|
|||||||
KERNEL_STRUCT_MEMBER(integrator, int, use_caustics)
|
KERNEL_STRUCT_MEMBER(integrator, int, use_caustics)
|
||||||
/* Sampling pattern. */
|
/* Sampling pattern. */
|
||||||
KERNEL_STRUCT_MEMBER(integrator, int, sampling_pattern)
|
KERNEL_STRUCT_MEMBER(integrator, int, sampling_pattern)
|
||||||
|
KERNEL_STRUCT_MEMBER(integrator, int, pmj_sequence_size)
|
||||||
KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance)
|
KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance)
|
||||||
/* Volume render. */
|
/* Volume render. */
|
||||||
KERNEL_STRUCT_MEMBER(integrator, int, use_volumes)
|
KERNEL_STRUCT_MEMBER(integrator, int, use_volumes)
|
||||||
@@ -205,6 +206,11 @@ KERNEL_STRUCT_MEMBER(integrator, int, use_surface_guiding)
|
|||||||
KERNEL_STRUCT_MEMBER(integrator, int, use_volume_guiding)
|
KERNEL_STRUCT_MEMBER(integrator, int, use_volume_guiding)
|
||||||
KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_direct_light)
|
KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_direct_light)
|
||||||
KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_mis_weights)
|
KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_mis_weights)
|
||||||
|
|
||||||
|
/* Padding. */
|
||||||
|
KERNEL_STRUCT_MEMBER(integrator, int, pad1)
|
||||||
|
KERNEL_STRUCT_MEMBER(integrator, int, pad2)
|
||||||
|
KERNEL_STRUCT_MEMBER(integrator, int, pad3)
|
||||||
KERNEL_STRUCT_END(KernelIntegrator)
|
KERNEL_STRUCT_END(KernelIntegrator)
|
||||||
|
|
||||||
/* SVM. For shader specialization. */
|
/* SVM. For shader specialization. */
|
||||||
|
@@ -30,6 +30,7 @@ typedef unsigned long long uint64_t;
|
|||||||
/* Qualifiers */
|
/* Qualifiers */
|
||||||
|
|
||||||
#define ccl_device __device__ __inline__
|
#define ccl_device __device__ __inline__
|
||||||
|
#define ccl_device_extern extern "C" __device__
|
||||||
#if __CUDA_ARCH__ < 500
|
#if __CUDA_ARCH__ < 500
|
||||||
# define ccl_device_inline __device__ __forceinline__
|
# define ccl_device_inline __device__ __forceinline__
|
||||||
# define ccl_device_forceinline __device__ __forceinline__
|
# define ccl_device_forceinline __device__ __forceinline__
|
||||||
@@ -109,14 +110,14 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D
|
|||||||
|
|
||||||
typedef unsigned short half;
|
typedef unsigned short half;
|
||||||
|
|
||||||
__device__ half __float2half(const float f)
|
ccl_device_forceinline half __float2half(const float f)
|
||||||
{
|
{
|
||||||
half val;
|
half val;
|
||||||
asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
|
asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ float __half2float(const half h)
|
ccl_device_forceinline float __half2float(const half h)
|
||||||
{
|
{
|
||||||
float val;
|
float val;
|
||||||
asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h));
|
asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h));
|
||||||
|
@@ -28,6 +28,7 @@ typedef unsigned long long uint64_t;
|
|||||||
/* Qualifiers */
|
/* Qualifiers */
|
||||||
|
|
||||||
#define ccl_device __device__ __inline__
|
#define ccl_device __device__ __inline__
|
||||||
|
#define ccl_device_extern extern "C" __device__
|
||||||
#define ccl_device_inline __device__ __inline__
|
#define ccl_device_inline __device__ __inline__
|
||||||
#define ccl_device_forceinline __device__ __forceinline__
|
#define ccl_device_forceinline __device__ __forceinline__
|
||||||
#define ccl_device_noinline __device__ __noinline__
|
#define ccl_device_noinline __device__ __noinline__
|
||||||
|
@@ -79,7 +79,8 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
|||||||
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
|
||||||
metalrt_intersector_type metalrt_intersect;
|
metalrt_intersector_type metalrt_intersect;
|
||||||
|
|
||||||
if (!kernel_data.bvh.have_curves) {
|
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
|
||||||
|
if (triangle_only) {
|
||||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -177,7 +178,9 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
|||||||
metalrt_intersector_type metalrt_intersect;
|
metalrt_intersector_type metalrt_intersect;
|
||||||
|
|
||||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||||
if (!kernel_data.bvh.have_curves) {
|
|
||||||
|
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
|
||||||
|
if (triangle_only) {
|
||||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -205,7 +208,9 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
|||||||
if (lcg_state) {
|
if (lcg_state) {
|
||||||
*lcg_state = payload.lcg_state;
|
*lcg_state = payload.lcg_state;
|
||||||
}
|
}
|
||||||
*local_isect = payload.local_isect;
|
if (local_isect) {
|
||||||
|
*local_isect = payload.local_isect;
|
||||||
|
}
|
||||||
|
|
||||||
return payload.result;
|
return payload.result;
|
||||||
}
|
}
|
||||||
@@ -240,7 +245,9 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
|||||||
metalrt_intersector_type metalrt_intersect;
|
metalrt_intersector_type metalrt_intersect;
|
||||||
|
|
||||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||||
if (!kernel_data.bvh.have_curves) {
|
|
||||||
|
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
|
||||||
|
if (triangle_only) {
|
||||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -307,7 +314,9 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
|
|||||||
metalrt_intersector_type metalrt_intersect;
|
metalrt_intersector_type metalrt_intersect;
|
||||||
|
|
||||||
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||||
if (!kernel_data.bvh.have_curves) {
|
|
||||||
|
bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points;
|
||||||
|
if (triangle_only) {
|
||||||
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -38,6 +38,7 @@ using namespace metal::raytracing;
|
|||||||
# define ccl_device_noinline ccl_device __attribute__((noinline))
|
# define ccl_device_noinline ccl_device __attribute__((noinline))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define ccl_device_extern extern "C"
|
||||||
#define ccl_device_noinline_cpu ccl_device
|
#define ccl_device_noinline_cpu ccl_device
|
||||||
#define ccl_device_inline_method ccl_device
|
#define ccl_device_inline_method ccl_device
|
||||||
#define ccl_global device
|
#define ccl_global device
|
||||||
|
@@ -182,20 +182,20 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
|
|||||||
|
|
||||||
const float u = barycentrics.x;
|
const float u = barycentrics.x;
|
||||||
const float v = barycentrics.y;
|
const float v = barycentrics.y;
|
||||||
int type = 0;
|
const int prim_type = kernel_data_fetch(objects, object).primitive_type;
|
||||||
if (intersection_type == METALRT_HIT_TRIANGLE) {
|
int type = prim_type;
|
||||||
type = kernel_data_fetch(objects, object).primitive_type;
|
|
||||||
}
|
|
||||||
# ifdef __HAIR__
|
# ifdef __HAIR__
|
||||||
else {
|
if (intersection_type != METALRT_HIT_TRIANGLE) {
|
||||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
if ( (prim_type == PRIMITIVE_CURVE_THICK || prim_type == PRIMITIVE_CURVE_RIBBON)) {
|
||||||
type = segment.type;
|
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||||
prim = segment.prim;
|
type = segment.type;
|
||||||
|
prim = segment.prim;
|
||||||
/* Filter out curve endcaps */
|
|
||||||
if (u == 0.0f || u == 1.0f) {
|
/* Filter out curve endcaps */
|
||||||
/* continue search */
|
if (u == 0.0f || u == 1.0f) {
|
||||||
return true;
|
/* continue search */
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
# endif
|
# endif
|
||||||
@@ -279,7 +279,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
|
|||||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
|
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
|
||||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
|
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
|
||||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
|
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
|
||||||
|
|
||||||
/* Continue tracing. */
|
/* Continue tracing. */
|
||||||
# endif /* __TRANSPARENT_SHADOWS__ */
|
# endif /* __TRANSPARENT_SHADOWS__ */
|
||||||
#endif /* __SHADOW_RECORD_ALL__ */
|
#endif /* __SHADOW_RECORD_ALL__ */
|
||||||
@@ -327,7 +327,8 @@ inline TReturnType metalrt_visibility_test(
|
|||||||
TReturnType result;
|
TReturnType result;
|
||||||
|
|
||||||
#ifdef __HAIR__
|
#ifdef __HAIR__
|
||||||
if (intersection_type == METALRT_HIT_BOUNDING_BOX) {
|
const int type = kernel_data_fetch(objects, object).primitive_type;
|
||||||
|
if (intersection_type == METALRT_HIT_BOUNDING_BOX && (type == PRIMITIVE_CURVE_THICK || type == PRIMITIVE_CURVE_RIBBON)) {
|
||||||
/* Filter out curve endcaps. */
|
/* Filter out curve endcaps. */
|
||||||
if (u == 0.0f || u == 1.0f) {
|
if (u == 0.0f || u == 1.0f) {
|
||||||
result.accept = false;
|
result.accept = false;
|
||||||
@@ -463,7 +464,12 @@ ccl_device_inline void metalrt_intersection_curve_shadow(
|
|||||||
const float ray_tmax,
|
const float ray_tmax,
|
||||||
thread BoundingBoxIntersectionResult &result)
|
thread BoundingBoxIntersectionResult &result)
|
||||||
{
|
{
|
||||||
|
# ifdef __VISIBILITY_FLAG__
|
||||||
const uint visibility = payload.visibility;
|
const uint visibility = payload.visibility;
|
||||||
|
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
Intersection isect;
|
Intersection isect;
|
||||||
isect.t = ray_tmax;
|
isect.t = ray_tmax;
|
||||||
@@ -685,7 +691,12 @@ ccl_device_inline void metalrt_intersection_point_shadow(
|
|||||||
const float ray_tmax,
|
const float ray_tmax,
|
||||||
thread BoundingBoxIntersectionResult &result)
|
thread BoundingBoxIntersectionResult &result)
|
||||||
{
|
{
|
||||||
|
# ifdef __VISIBILITY_FLAG__
|
||||||
const uint visibility = payload.visibility;
|
const uint visibility = payload.visibility;
|
||||||
|
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
Intersection isect;
|
Intersection isect;
|
||||||
isect.t = ray_tmax;
|
isect.t = ray_tmax;
|
||||||
|
@@ -28,6 +28,7 @@
|
|||||||
/* Qualifier wrappers for different names on different devices */
|
/* Qualifier wrappers for different names on different devices */
|
||||||
|
|
||||||
#define ccl_device
|
#define ccl_device
|
||||||
|
#define ccl_device_extern extern "C"
|
||||||
#define ccl_global
|
#define ccl_global
|
||||||
#define ccl_always_inline __attribute__((always_inline))
|
#define ccl_always_inline __attribute__((always_inline))
|
||||||
#define ccl_device_inline inline
|
#define ccl_device_inline inline
|
||||||
|
@@ -33,14 +33,16 @@ typedef unsigned long long uint64_t;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define ccl_device \
|
#define ccl_device \
|
||||||
__device__ __forceinline__ // Function calls are bad for OptiX performance, so inline everything
|
static __device__ \
|
||||||
|
__forceinline__ // Function calls are bad for OptiX performance, so inline everything
|
||||||
|
#define ccl_device_extern extern "C" __device__
|
||||||
#define ccl_device_inline ccl_device
|
#define ccl_device_inline ccl_device
|
||||||
#define ccl_device_forceinline ccl_device
|
#define ccl_device_forceinline ccl_device
|
||||||
#define ccl_device_inline_method ccl_device
|
#define ccl_device_inline_method __device__ __forceinline__
|
||||||
#define ccl_device_noinline __device__ __noinline__
|
#define ccl_device_noinline static __device__ __noinline__
|
||||||
#define ccl_device_noinline_cpu ccl_device
|
#define ccl_device_noinline_cpu ccl_device
|
||||||
#define ccl_global
|
#define ccl_global
|
||||||
#define ccl_inline_constant __constant__
|
#define ccl_inline_constant static __constant__
|
||||||
#define ccl_device_constant __constant__ __device__
|
#define ccl_device_constant __constant__ __device__
|
||||||
#define ccl_constant const
|
#define ccl_constant const
|
||||||
#define ccl_gpu_shared __shared__
|
#define ccl_gpu_shared __shared__
|
||||||
@@ -57,23 +59,6 @@ typedef unsigned long long uint64_t;
|
|||||||
|
|
||||||
#define kernel_assert(cond)
|
#define kernel_assert(cond)
|
||||||
|
|
||||||
/* GPU thread, block, grid size and index */
|
|
||||||
|
|
||||||
#define ccl_gpu_thread_idx_x (threadIdx.x)
|
|
||||||
#define ccl_gpu_block_dim_x (blockDim.x)
|
|
||||||
#define ccl_gpu_block_idx_x (blockIdx.x)
|
|
||||||
#define ccl_gpu_grid_dim_x (gridDim.x)
|
|
||||||
#define ccl_gpu_warp_size (warpSize)
|
|
||||||
#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
|
|
||||||
|
|
||||||
#define ccl_gpu_global_id_x() (ccl_gpu_block_idx_x * ccl_gpu_block_dim_x + ccl_gpu_thread_idx_x)
|
|
||||||
#define ccl_gpu_global_size_x() (ccl_gpu_grid_dim_x * ccl_gpu_block_dim_x)
|
|
||||||
|
|
||||||
/* GPU warp synchronization. */
|
|
||||||
|
|
||||||
#define ccl_gpu_syncthreads() __syncthreads()
|
|
||||||
#define ccl_gpu_ballot(predicate) __ballot_sync(0xFFFFFFFF, predicate)
|
|
||||||
|
|
||||||
/* GPU texture objects */
|
/* GPU texture objects */
|
||||||
|
|
||||||
typedef unsigned long long CUtexObject;
|
typedef unsigned long long CUtexObject;
|
||||||
@@ -101,14 +86,14 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D
|
|||||||
|
|
||||||
typedef unsigned short half;
|
typedef unsigned short half;
|
||||||
|
|
||||||
__device__ half __float2half(const float f)
|
ccl_device_forceinline half __float2half(const float f)
|
||||||
{
|
{
|
||||||
half val;
|
half val;
|
||||||
asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
|
asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f));
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ float __half2float(const half h)
|
ccl_device_forceinline float __half2float(const half h)
|
||||||
{
|
{
|
||||||
float val;
|
float val;
|
||||||
asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h));
|
asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h));
|
||||||
|
@@ -25,6 +25,7 @@ struct KernelParamsOptiX {
|
|||||||
/* Kernel arguments */
|
/* Kernel arguments */
|
||||||
const int *path_index_array;
|
const int *path_index_array;
|
||||||
float *render_buffer;
|
float *render_buffer;
|
||||||
|
int offset;
|
||||||
|
|
||||||
/* Global scene data and textures */
|
/* Global scene data and textures */
|
||||||
KernelData data;
|
KernelData data;
|
||||||
@@ -36,7 +37,11 @@ struct KernelParamsOptiX {
|
|||||||
};
|
};
|
||||||
|
|
||||||
#ifdef __NVCC__
|
#ifdef __NVCC__
|
||||||
extern "C" static __constant__ KernelParamsOptiX kernel_params;
|
extern "C"
|
||||||
|
# ifndef __CUDACC_RDC__
|
||||||
|
static
|
||||||
|
# endif
|
||||||
|
__constant__ KernelParamsOptiX kernel_params;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Abstraction macros */
|
/* Abstraction macros */
|
||||||
|
83
intern/cycles/kernel/device/optix/kernel_osl.cu
Normal file
83
intern/cycles/kernel/device/optix/kernel_osl.cu
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
/* SPDX-License-Identifier: Apache-2.0
|
||||||
|
* Copyright 2011-2022 Blender Foundation */
|
||||||
|
|
||||||
|
#define WITH_OSL
|
||||||
|
|
||||||
|
/* Copy of the regular OptiX kernels with additional OSL support. */
|
||||||
|
|
||||||
|
#include "kernel/device/optix/kernel_shader_raytrace.cu"
|
||||||
|
|
||||||
|
#include "kernel/bake/bake.h"
|
||||||
|
#include "kernel/integrator/shade_background.h"
|
||||||
|
#include "kernel/integrator/shade_light.h"
|
||||||
|
#include "kernel/integrator/shade_shadow.h"
|
||||||
|
#include "kernel/integrator/shade_volume.h"
|
||||||
|
|
||||||
|
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_background()
|
||||||
|
{
|
||||||
|
const int global_index = optixGetLaunchIndex().x;
|
||||||
|
const int path_index = (kernel_params.path_index_array) ?
|
||||||
|
kernel_params.path_index_array[global_index] :
|
||||||
|
global_index;
|
||||||
|
integrator_shade_background(nullptr, path_index, kernel_params.render_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_light()
|
||||||
|
{
|
||||||
|
const int global_index = optixGetLaunchIndex().x;
|
||||||
|
const int path_index = (kernel_params.path_index_array) ?
|
||||||
|
kernel_params.path_index_array[global_index] :
|
||||||
|
global_index;
|
||||||
|
integrator_shade_light(nullptr, path_index, kernel_params.render_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface()
|
||||||
|
{
|
||||||
|
const int global_index = optixGetLaunchIndex().x;
|
||||||
|
const int path_index = (kernel_params.path_index_array) ?
|
||||||
|
kernel_params.path_index_array[global_index] :
|
||||||
|
global_index;
|
||||||
|
integrator_shade_surface(nullptr, path_index, kernel_params.render_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_volume()
|
||||||
|
{
|
||||||
|
const int global_index = optixGetLaunchIndex().x;
|
||||||
|
const int path_index = (kernel_params.path_index_array) ?
|
||||||
|
kernel_params.path_index_array[global_index] :
|
||||||
|
global_index;
|
||||||
|
integrator_shade_volume(nullptr, path_index, kernel_params.render_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" __global__ void __raygen__kernel_optix_integrator_shade_shadow()
|
||||||
|
{
|
||||||
|
const int global_index = optixGetLaunchIndex().x;
|
||||||
|
const int path_index = (kernel_params.path_index_array) ?
|
||||||
|
kernel_params.path_index_array[global_index] :
|
||||||
|
global_index;
|
||||||
|
integrator_shade_shadow(nullptr, path_index, kernel_params.render_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" __global__ void __raygen__kernel_optix_shader_eval_displace()
|
||||||
|
{
|
||||||
|
KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array;
|
||||||
|
float *const output = kernel_params.render_buffer;
|
||||||
|
const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
|
||||||
|
kernel_displace_evaluate(nullptr, input, output, global_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" __global__ void __raygen__kernel_optix_shader_eval_background()
|
||||||
|
{
|
||||||
|
KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array;
|
||||||
|
float *const output = kernel_params.render_buffer;
|
||||||
|
const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
|
||||||
|
kernel_background_evaluate(nullptr, input, output, global_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" __global__ void __raygen__kernel_optix_shader_eval_curve_shadow_transparency()
|
||||||
|
{
|
||||||
|
KernelShaderEvalInput *const input = (KernelShaderEvalInput *)kernel_params.path_index_array;
|
||||||
|
float *const output = kernel_params.render_buffer;
|
||||||
|
const int global_index = kernel_params.offset + optixGetLaunchIndex().x;
|
||||||
|
kernel_curve_shadow_transparency_evaluate(nullptr, input, output, global_index);
|
||||||
|
}
|
@@ -58,13 +58,29 @@ ccl_device bool film_adaptive_sampling_convergence_check(KernelGlobals kg,
|
|||||||
const float4 I = kernel_read_pass_float4(buffer + kernel_data.film.pass_combined);
|
const float4 I = kernel_read_pass_float4(buffer + kernel_data.film.pass_combined);
|
||||||
|
|
||||||
const float sample = __float_as_uint(buffer[kernel_data.film.pass_sample_count]);
|
const float sample = __float_as_uint(buffer[kernel_data.film.pass_sample_count]);
|
||||||
const float inv_sample = 1.0f / sample;
|
const float intensity_scale = kernel_data.film.exposure / sample;
|
||||||
|
|
||||||
/* The per pixel error as seen in section 2.1 of
|
/* The per pixel error as seen in section 2.1 of
|
||||||
* "A hierarchical automatic stopping condition for Monte Carlo global illumination" */
|
* "A hierarchical automatic stopping condition for Monte Carlo global illumination" */
|
||||||
const float error_difference = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) *
|
const float error_difference = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) *
|
||||||
inv_sample;
|
intensity_scale;
|
||||||
const float error_normalize = sqrtf((I.x + I.y + I.z) * inv_sample);
|
const float intensity = (I.x + I.y + I.z) * intensity_scale;
|
||||||
|
|
||||||
|
/* Anything with R+G+B > 1 is highly exposed - even in sRGB it's a range that
|
||||||
|
* some displays aren't even able to display without significant losses in
|
||||||
|
* detalization. Everything with R+G+B > 3 is overexposed and should receive
|
||||||
|
* even less samples. Filmic-like curves need maximum sampling rate at
|
||||||
|
* intensity near 0.1-0.2, so threshold of 1 for R+G+B leaves an additional
|
||||||
|
* fstop in case it is needed for compositing.
|
||||||
|
*/
|
||||||
|
float error_normalize;
|
||||||
|
if (intensity < 1.0f) {
|
||||||
|
error_normalize = sqrtf(intensity);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
error_normalize = intensity;
|
||||||
|
}
|
||||||
|
|
||||||
/* A small epsilon is added to the divisor to prevent division by zero. */
|
/* A small epsilon is added to the divisor to prevent division by zero. */
|
||||||
const float error = error_difference / (0.0001f + error_normalize);
|
const float error = error_difference / (0.0001f + error_normalize);
|
||||||
const bool did_converge = (error < threshold);
|
const bool did_converge = (error < threshold);
|
||||||
|
@@ -42,27 +42,27 @@ ccl_device_inline void film_write_data_passes(KernelGlobals kg,
|
|||||||
ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
|
ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
|
||||||
|
|
||||||
if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
|
if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
|
||||||
|
if (INTEGRATOR_STATE(state, path, sample) == 0) {
|
||||||
|
if (flag & PASSMASK(DEPTH)) {
|
||||||
|
const float depth = camera_z_depth(kg, sd->P);
|
||||||
|
film_overwrite_pass_float(buffer + kernel_data.film.pass_depth, depth);
|
||||||
|
}
|
||||||
|
if (flag & PASSMASK(OBJECT_ID)) {
|
||||||
|
const float id = object_pass_id(kg, sd->object);
|
||||||
|
film_overwrite_pass_float(buffer + kernel_data.film.pass_object_id, id);
|
||||||
|
}
|
||||||
|
if (flag & PASSMASK(MATERIAL_ID)) {
|
||||||
|
const float id = shader_pass_id(kg, sd);
|
||||||
|
film_overwrite_pass_float(buffer + kernel_data.film.pass_material_id, id);
|
||||||
|
}
|
||||||
|
if (flag & PASSMASK(POSITION)) {
|
||||||
|
const float3 position = sd->P;
|
||||||
|
film_overwrite_pass_float3(buffer + kernel_data.film.pass_position, position);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f ||
|
if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f ||
|
||||||
average(surface_shader_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) {
|
average(surface_shader_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) {
|
||||||
if (INTEGRATOR_STATE(state, path, sample) == 0) {
|
|
||||||
if (flag & PASSMASK(DEPTH)) {
|
|
||||||
const float depth = camera_z_depth(kg, sd->P);
|
|
||||||
film_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
|
|
||||||
}
|
|
||||||
if (flag & PASSMASK(OBJECT_ID)) {
|
|
||||||
const float id = object_pass_id(kg, sd->object);
|
|
||||||
film_write_pass_float(buffer + kernel_data.film.pass_object_id, id);
|
|
||||||
}
|
|
||||||
if (flag & PASSMASK(MATERIAL_ID)) {
|
|
||||||
const float id = shader_pass_id(kg, sd);
|
|
||||||
film_write_pass_float(buffer + kernel_data.film.pass_material_id, id);
|
|
||||||
}
|
|
||||||
if (flag & PASSMASK(POSITION)) {
|
|
||||||
const float3 position = sd->P;
|
|
||||||
film_write_pass_float3(buffer + kernel_data.film.pass_position, position);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (flag & PASSMASK(NORMAL)) {
|
if (flag & PASSMASK(NORMAL)) {
|
||||||
const float3 normal = surface_shader_average_normal(kg, sd);
|
const float3 normal = surface_shader_average_normal(kg, sd);
|
||||||
film_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
|
film_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
|
||||||
|
@@ -12,6 +12,7 @@
|
|||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
/* Get pointer to pixel in render buffer. */
|
/* Get pointer to pixel in render buffer. */
|
||||||
|
|
||||||
ccl_device_forceinline ccl_global float *film_pass_pixel_render_buffer(
|
ccl_device_forceinline ccl_global float *film_pass_pixel_render_buffer(
|
||||||
KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
|
KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
|
||||||
{
|
{
|
||||||
@@ -21,7 +22,8 @@ ccl_device_forceinline ccl_global float *film_pass_pixel_render_buffer(
|
|||||||
return render_buffer + render_buffer_offset;
|
return render_buffer + render_buffer_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Write to pixel. */
|
/* Accumulate in passes. */
|
||||||
|
|
||||||
ccl_device_inline void film_write_pass_float(ccl_global float *ccl_restrict buffer, float value)
|
ccl_device_inline void film_write_pass_float(ccl_global float *ccl_restrict buffer, float value)
|
||||||
{
|
{
|
||||||
#ifdef __ATOMIC_PASS_WRITE__
|
#ifdef __ATOMIC_PASS_WRITE__
|
||||||
@@ -74,6 +76,25 @@ ccl_device_inline void film_write_pass_float4(ccl_global float *ccl_restrict buf
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Overwrite for passes that only write on sample 0. This assumes only a single thread will write
|
||||||
|
* to this pixel and no atomics are needed. */
|
||||||
|
|
||||||
|
ccl_device_inline void film_overwrite_pass_float(ccl_global float *ccl_restrict buffer,
|
||||||
|
float value)
|
||||||
|
{
|
||||||
|
*buffer = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
ccl_device_inline void film_overwrite_pass_float3(ccl_global float *ccl_restrict buffer,
|
||||||
|
float3 value)
|
||||||
|
{
|
||||||
|
buffer[0] = value.x;
|
||||||
|
buffer[1] = value.y;
|
||||||
|
buffer[2] = value.z;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Read back from passes. */
|
||||||
|
|
||||||
ccl_device_inline float kernel_read_pass_float(ccl_global float *ccl_restrict buffer)
|
ccl_device_inline float kernel_read_pass_float(ccl_global float *ccl_restrict buffer)
|
||||||
{
|
{
|
||||||
return *buffer;
|
return *buffer;
|
||||||
|
@@ -24,8 +24,8 @@ ccl_device void displacement_shader_eval(KernelGlobals kg,
|
|||||||
|
|
||||||
/* this will modify sd->P */
|
/* this will modify sd->P */
|
||||||
#ifdef __OSL__
|
#ifdef __OSL__
|
||||||
if (kg->osl) {
|
if (kernel_data.kernel_features & KERNEL_FEATURE_OSL) {
|
||||||
OSLShader::eval_displacement(kg, state, sd);
|
osl_eval_nodes<SHADER_TYPE_DISPLACEMENT>(kg, state, sd, 0);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
|
@@ -156,6 +156,13 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
|
|||||||
u = v;
|
u = v;
|
||||||
v = 1.0f - tmp - v;
|
v = 1.0f - tmp - v;
|
||||||
|
|
||||||
|
const float tmpdx = dudx;
|
||||||
|
const float tmpdy = dudy;
|
||||||
|
dudx = dvdx;
|
||||||
|
dudy = dvdy;
|
||||||
|
dvdx = -tmpdx - dvdx;
|
||||||
|
dvdy = -tmpdy - dvdy;
|
||||||
|
|
||||||
/* Position and normal on triangle. */
|
/* Position and normal on triangle. */
|
||||||
const int object = kernel_data.bake.object_index;
|
const int object = kernel_data.bake.object_index;
|
||||||
float3 P, Ng;
|
float3 P, Ng;
|
||||||
|
@@ -34,6 +34,9 @@ typedef struct VolumeIntegrateResult {
|
|||||||
Spectrum direct_throughput;
|
Spectrum direct_throughput;
|
||||||
float direct_t;
|
float direct_t;
|
||||||
ShaderVolumePhases direct_phases;
|
ShaderVolumePhases direct_phases;
|
||||||
|
# ifdef __PATH_GUIDING__
|
||||||
|
VolumeSampleMethod direct_sample_method;
|
||||||
|
# endif
|
||||||
|
|
||||||
/* Throughput and offset for indirect light scattering. */
|
/* Throughput and offset for indirect light scattering. */
|
||||||
bool indirect_scatter;
|
bool indirect_scatter;
|
||||||
@@ -580,6 +583,9 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
|
|||||||
result.direct_t = volume_equiangular_sample(
|
result.direct_t = volume_equiangular_sample(
|
||||||
ray, equiangular_light_P, vstate.rscatter, &vstate.equiangular_pdf);
|
ray, equiangular_light_P, vstate.rscatter, &vstate.equiangular_pdf);
|
||||||
}
|
}
|
||||||
|
# ifdef __PATH_GUIDING__
|
||||||
|
result.direct_sample_method = vstate.direct_sample_method;
|
||||||
|
# endif
|
||||||
|
|
||||||
# ifdef __DENOISING_FEATURES__
|
# ifdef __DENOISING_FEATURES__
|
||||||
const bool write_denoising_features = (INTEGRATOR_STATE(state, path, flag) &
|
const bool write_denoising_features = (INTEGRATOR_STATE(state, path, flag) &
|
||||||
@@ -719,6 +725,9 @@ ccl_device_forceinline void integrate_volume_direct_light(
|
|||||||
ccl_private const RNGState *ccl_restrict rng_state,
|
ccl_private const RNGState *ccl_restrict rng_state,
|
||||||
const float3 P,
|
const float3 P,
|
||||||
ccl_private const ShaderVolumePhases *ccl_restrict phases,
|
ccl_private const ShaderVolumePhases *ccl_restrict phases,
|
||||||
|
# ifdef __PATH_GUIDING__
|
||||||
|
ccl_private const Spectrum unlit_throughput,
|
||||||
|
# endif
|
||||||
ccl_private const Spectrum throughput,
|
ccl_private const Spectrum throughput,
|
||||||
ccl_private LightSample *ccl_restrict ls)
|
ccl_private LightSample *ccl_restrict ls)
|
||||||
{
|
{
|
||||||
@@ -851,7 +860,7 @@ ccl_device_forceinline void integrate_volume_direct_light(
|
|||||||
kernel_data.background.lightgroup + 1;
|
kernel_data.background.lightgroup + 1;
|
||||||
|
|
||||||
# ifdef __PATH_GUIDING__
|
# ifdef __PATH_GUIDING__
|
||||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unlit_throughput) = throughput;
|
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unlit_throughput) = unlit_throughput;
|
||||||
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = INTEGRATOR_STATE(
|
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = INTEGRATOR_STATE(
|
||||||
state, guiding, path_segment);
|
state, guiding, path_segment);
|
||||||
# endif
|
# endif
|
||||||
@@ -990,7 +999,13 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
|
|||||||
const float step_size = volume_stack_step_size(kg, volume_read_lambda_pass);
|
const float step_size = volume_stack_step_size(kg, volume_read_lambda_pass);
|
||||||
|
|
||||||
# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
|
# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
|
||||||
|
/* The current path throughput which is used later to calculate per-segment throughput.*/
|
||||||
const float3 initial_throughput = INTEGRATOR_STATE(state, path, throughput);
|
const float3 initial_throughput = INTEGRATOR_STATE(state, path, throughput);
|
||||||
|
/* The path throughput used to calculate the throughput for direct light. */
|
||||||
|
float3 unlit_throughput = initial_throughput;
|
||||||
|
/* If a new path segment is generated at the direct scatter position.*/
|
||||||
|
bool guiding_generated_new_segment = false;
|
||||||
|
float rand_phase_guiding = 0.5f;
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
/* TODO: expensive to zero closures? */
|
/* TODO: expensive to zero closures? */
|
||||||
@@ -1018,41 +1033,48 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
|
|||||||
return VOLUME_PATH_MISSED;
|
return VOLUME_PATH_MISSED;
|
||||||
}
|
}
|
||||||
|
|
||||||
# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
|
|
||||||
bool guiding_generated_new_segment = false;
|
|
||||||
if (kernel_data.integrator.use_guiding) {
|
|
||||||
/* Record transmittance using change in throughput. */
|
|
||||||
float3 transmittance_weight = spectrum_to_rgb(
|
|
||||||
safe_divide_color(result.indirect_throughput, initial_throughput));
|
|
||||||
guiding_record_volume_transmission(kg, state, transmittance_weight);
|
|
||||||
|
|
||||||
if (result.indirect_scatter) {
|
|
||||||
const float3 P = ray->P + result.indirect_t * ray->D;
|
|
||||||
|
|
||||||
/* Record volume segment up to direct scatter position.
|
|
||||||
* TODO: volume segment is wrong when direct_t and indirect_t. */
|
|
||||||
if (result.direct_scatter && (result.direct_t == result.indirect_t)) {
|
|
||||||
guiding_record_volume_segment(kg, state, P, sd.I);
|
|
||||||
guiding_generated_new_segment = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
# if PATH_GUIDING_LEVEL >= 4
|
|
||||||
/* TODO: this position will be wrong for direct light pdf computation,
|
|
||||||
* since the direct light position may be different? */
|
|
||||||
volume_shader_prepare_guiding(
|
|
||||||
kg, state, &sd, &rng_state, P, ray->D, &result.direct_phases, direct_sample_method);
|
|
||||||
# endif
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* No guiding if we don't scatter. */
|
|
||||||
state->guiding.use_volume_guiding = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
# endif
|
|
||||||
|
|
||||||
/* Direct light. */
|
/* Direct light. */
|
||||||
if (result.direct_scatter) {
|
if (result.direct_scatter) {
|
||||||
const float3 direct_P = ray->P + result.direct_t * ray->D;
|
const float3 direct_P = ray->P + result.direct_t * ray->D;
|
||||||
|
|
||||||
|
# ifdef __PATH_GUIDING__
|
||||||
|
if (kernel_data.integrator.use_guiding) {
|
||||||
|
# if PATH_GUIDING_LEVEL >= 1
|
||||||
|
if (result.direct_sample_method == VOLUME_SAMPLE_DISTANCE) {
|
||||||
|
/* If the direct scatter event is generated using VOLUME_SAMPLE_DISTANCE the direct event
|
||||||
|
* will happen at the same position as the indirect event and the direct light contribution
|
||||||
|
* will contribute to the position of the next path segment.*/
|
||||||
|
float3 transmittance_weight = spectrum_to_rgb(
|
||||||
|
safe_divide_color(result.indirect_throughput, initial_throughput));
|
||||||
|
guiding_record_volume_transmission(kg, state, transmittance_weight);
|
||||||
|
guiding_record_volume_segment(kg, state, direct_P, sd.I);
|
||||||
|
guiding_generated_new_segment = true;
|
||||||
|
unlit_throughput = result.indirect_throughput / continuation_probability;
|
||||||
|
rand_phase_guiding = path_state_rng_1D(kg, &rng_state, PRNG_VOLUME_PHASE_GUIDING_DISTANCE);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* If the direct scatter event is generated using VOLUME_SAMPLE_EQUIANGULAR the direct
|
||||||
|
* event will happen at a separate position as the indirect event and the direct light
|
||||||
|
* contribution will contribute to the position of the current/previous path segment. The
|
||||||
|
* unlit_throughput has to be adjusted to include the scattering at the previous segment.*/
|
||||||
|
float3 scatterEval = one_float3();
|
||||||
|
if (state->guiding.path_segment) {
|
||||||
|
pgl_vec3f scatteringWeight = state->guiding.path_segment->scatteringWeight;
|
||||||
|
scatterEval = make_float3(scatteringWeight.x, scatteringWeight.y, scatteringWeight.z);
|
||||||
|
}
|
||||||
|
unlit_throughput /= scatterEval;
|
||||||
|
unlit_throughput *= continuation_probability;
|
||||||
|
rand_phase_guiding = path_state_rng_1D(
|
||||||
|
kg, &rng_state, PRNG_VOLUME_PHASE_GUIDING_EQUIANGULAR);
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
# if PATH_GUIDING_LEVEL >= 4
|
||||||
|
volume_shader_prepare_guiding(
|
||||||
|
kg, state, &sd, rand_phase_guiding, direct_P, ray->D, &result.direct_phases);
|
||||||
|
# endif
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
result.direct_throughput /= continuation_probability;
|
result.direct_throughput /= continuation_probability;
|
||||||
integrate_volume_direct_light(kg,
|
integrate_volume_direct_light(kg,
|
||||||
state,
|
state,
|
||||||
@@ -1060,6 +1082,9 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
|
|||||||
&rng_state,
|
&rng_state,
|
||||||
direct_P,
|
direct_P,
|
||||||
&result.direct_phases,
|
&result.direct_phases,
|
||||||
|
# ifdef __PATH_GUIDING__
|
||||||
|
unlit_throughput,
|
||||||
|
# endif
|
||||||
result.direct_throughput,
|
result.direct_throughput,
|
||||||
&ls);
|
&ls);
|
||||||
}
|
}
|
||||||
@@ -1069,6 +1094,13 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
|
|||||||
* Only divide throughput by continuation_probability if we scatter. For the attenuation
|
* Only divide throughput by continuation_probability if we scatter. For the attenuation
|
||||||
* case the next surface will already do this division. */
|
* case the next surface will already do this division. */
|
||||||
if (result.indirect_scatter) {
|
if (result.indirect_scatter) {
|
||||||
|
# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
|
||||||
|
if (!guiding_generated_new_segment) {
|
||||||
|
float3 transmittance_weight = spectrum_to_rgb(
|
||||||
|
safe_divide_color(result.indirect_throughput, initial_throughput));
|
||||||
|
guiding_record_volume_transmission(kg, state, transmittance_weight);
|
||||||
|
}
|
||||||
|
# endif
|
||||||
result.indirect_throughput /= continuation_probability;
|
result.indirect_throughput /= continuation_probability;
|
||||||
}
|
}
|
||||||
INTEGRATOR_STATE_WRITE(state, path, throughput) = result.indirect_throughput;
|
INTEGRATOR_STATE_WRITE(state, path, throughput) = result.indirect_throughput;
|
||||||
@@ -1076,10 +1108,21 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
|
|||||||
if (result.indirect_scatter) {
|
if (result.indirect_scatter) {
|
||||||
sd.P = ray->P + result.indirect_t * ray->D;
|
sd.P = ray->P + result.indirect_t * ray->D;
|
||||||
|
|
||||||
# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
|
# if defined(__PATH_GUIDING__)
|
||||||
|
# if PATH_GUIDING_LEVEL >= 1
|
||||||
if (!guiding_generated_new_segment) {
|
if (!guiding_generated_new_segment) {
|
||||||
guiding_record_volume_segment(kg, state, sd.P, sd.I);
|
guiding_record_volume_segment(kg, state, sd.P, sd.I);
|
||||||
}
|
}
|
||||||
|
# endif
|
||||||
|
# if PATH_GUIDING_LEVEL >= 4
|
||||||
|
/* If the direct scatter event was generated using VOLUME_SAMPLE_EQUIANGULAR we need to
|
||||||
|
* initialize the guiding distribution at the indirect scatter position. */
|
||||||
|
if (result.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR) {
|
||||||
|
rand_phase_guiding = path_state_rng_1D(kg, &rng_state, PRNG_VOLUME_PHASE_GUIDING_DISTANCE);
|
||||||
|
volume_shader_prepare_guiding(
|
||||||
|
kg, state, &sd, rand_phase_guiding, sd.P, ray->D, &result.indirect_phases);
|
||||||
|
}
|
||||||
|
# endif
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
if (integrate_volume_phase_scatter(kg, state, &sd, &rng_state, &result.indirect_phases)) {
|
if (integrate_volume_phase_scatter(kg, state, &sd, &rng_state, &result.indirect_phases)) {
|
||||||
@@ -1090,6 +1133,10 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
# if defined(__PATH_GUIDING__)
|
||||||
|
/* No guiding if we don't scatter. */
|
||||||
|
state->guiding.use_volume_guiding = false;
|
||||||
|
# endif
|
||||||
return VOLUME_PATH_ATTENUATED;
|
return VOLUME_PATH_ATTENUATED;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -827,13 +827,8 @@ ccl_device void surface_shader_eval(KernelGlobals kg,
|
|||||||
sd->num_closure_left = max_closures;
|
sd->num_closure_left = max_closures;
|
||||||
|
|
||||||
#ifdef __OSL__
|
#ifdef __OSL__
|
||||||
if (kg->osl) {
|
if (kernel_data.kernel_features & KERNEL_FEATURE_OSL) {
|
||||||
if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) {
|
osl_eval_nodes<SHADER_TYPE_SURFACE>(kg, state, sd, path_flag);
|
||||||
OSLShader::eval_background(kg, state, sd, path_flag);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
OSLShader::eval_surface(kg, state, sd, path_flag);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
|
@@ -95,11 +95,10 @@ ccl_device_inline void volume_shader_copy_phases(ccl_private ShaderVolumePhases
|
|||||||
ccl_device_inline void volume_shader_prepare_guiding(KernelGlobals kg,
|
ccl_device_inline void volume_shader_prepare_guiding(KernelGlobals kg,
|
||||||
IntegratorState state,
|
IntegratorState state,
|
||||||
ccl_private ShaderData *sd,
|
ccl_private ShaderData *sd,
|
||||||
ccl_private const RNGState *rng_state,
|
float rand_phase_guiding,
|
||||||
const float3 P,
|
const float3 P,
|
||||||
const float3 D,
|
const float3 D,
|
||||||
ccl_private ShaderVolumePhases *phases,
|
ccl_private ShaderVolumePhases *phases)
|
||||||
const VolumeSampleMethod direct_sample_method)
|
|
||||||
{
|
{
|
||||||
/* Have any phase functions to guide? */
|
/* Have any phase functions to guide? */
|
||||||
const int num_phases = phases->num_closure;
|
const int num_phases = phases->num_closure;
|
||||||
@@ -109,7 +108,6 @@ ccl_device_inline void volume_shader_prepare_guiding(KernelGlobals kg,
|
|||||||
}
|
}
|
||||||
|
|
||||||
const float volume_guiding_probability = kernel_data.integrator.volume_guiding_probability;
|
const float volume_guiding_probability = kernel_data.integrator.volume_guiding_probability;
|
||||||
float rand_phase_guiding = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_PHASE_GUIDING);
|
|
||||||
|
|
||||||
/* If we have more than one phase function we select one random based on its
|
/* If we have more than one phase function we select one random based on its
|
||||||
* sample weight to calculate the product distribution for guiding. */
|
* sample weight to calculate the product distribution for guiding. */
|
||||||
@@ -493,8 +491,8 @@ ccl_device_inline void volume_shader_eval(KernelGlobals kg,
|
|||||||
|
|
||||||
/* evaluate shader */
|
/* evaluate shader */
|
||||||
# ifdef __OSL__
|
# ifdef __OSL__
|
||||||
if (kg->osl) {
|
if (kernel_data.kernel_features & KERNEL_FEATURE_OSL) {
|
||||||
OSLShader::eval_volume(kg, state, sd, path_flag);
|
osl_eval_nodes<SHADER_TYPE_VOLUME>(kg, state, sd, path_flag);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
# endif
|
# endif
|
||||||
|
@@ -25,13 +25,18 @@
|
|||||||
|
|
||||||
#include "kernel/osl/osl.h"
|
#include "kernel/osl/osl.h"
|
||||||
|
|
||||||
#include "kernel/osl/closures_setup.h"
|
|
||||||
|
|
||||||
#define TO_VEC3(v) OSL::Vec3(v.x, v.y, v.z)
|
#define TO_VEC3(v) OSL::Vec3(v.x, v.y, v.z)
|
||||||
#define TO_FLOAT3(v) make_float3(v[0], v[1], v[2])
|
#define TO_FLOAT3(v) make_float3(v[0], v[1], v[2])
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
static_assert(sizeof(OSLClosure) == sizeof(OSL::ClosureColor) &&
|
||||||
|
sizeof(OSLClosureAdd) == sizeof(OSL::ClosureAdd) &&
|
||||||
|
sizeof(OSLClosureMul) == sizeof(OSL::ClosureMul) &&
|
||||||
|
sizeof(OSLClosureComponent) == sizeof(OSL::ClosureComponent));
|
||||||
|
static_assert(sizeof(ShaderGlobals) == sizeof(OSL::ShaderGlobals) &&
|
||||||
|
offsetof(ShaderGlobals, Ci) == offsetof(OSL::ShaderGlobals, Ci));
|
||||||
|
|
||||||
/* Registration */
|
/* Registration */
|
||||||
|
|
||||||
#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) \
|
#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) \
|
||||||
@@ -60,53 +65,18 @@ void OSLRenderServices::register_closures(OSL::ShadingSystem *ss)
|
|||||||
#include "closures_template.h"
|
#include "closures_template.h"
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Globals */
|
/* Surface & Background */
|
||||||
|
|
||||||
static void shaderdata_to_shaderglobals(const KernelGlobalsCPU *kg,
|
template<>
|
||||||
ShaderData *sd,
|
void osl_eval_nodes<SHADER_TYPE_SURFACE>(const KernelGlobalsCPU *kg,
|
||||||
const void *state,
|
const void *state,
|
||||||
uint32_t path_flag,
|
ShaderData *sd,
|
||||||
OSLThreadData *tdata)
|
uint32_t path_flag)
|
||||||
{
|
{
|
||||||
OSL::ShaderGlobals *globals = &tdata->globals;
|
/* setup shader globals from shader data */
|
||||||
|
OSLThreadData *tdata = kg->osl_tdata;
|
||||||
const differential3 dP = differential_from_compact(sd->Ng, sd->dP);
|
shaderdata_to_shaderglobals(
|
||||||
const differential3 dI = differential_from_compact(sd->I, sd->dI);
|
kg, sd, path_flag, reinterpret_cast<ShaderGlobals *>(&tdata->globals));
|
||||||
|
|
||||||
/* copy from shader data to shader globals */
|
|
||||||
globals->P = TO_VEC3(sd->P);
|
|
||||||
globals->dPdx = TO_VEC3(dP.dx);
|
|
||||||
globals->dPdy = TO_VEC3(dP.dy);
|
|
||||||
globals->I = TO_VEC3(sd->I);
|
|
||||||
globals->dIdx = TO_VEC3(dI.dx);
|
|
||||||
globals->dIdy = TO_VEC3(dI.dy);
|
|
||||||
globals->N = TO_VEC3(sd->N);
|
|
||||||
globals->Ng = TO_VEC3(sd->Ng);
|
|
||||||
globals->u = sd->u;
|
|
||||||
globals->dudx = sd->du.dx;
|
|
||||||
globals->dudy = sd->du.dy;
|
|
||||||
globals->v = sd->v;
|
|
||||||
globals->dvdx = sd->dv.dx;
|
|
||||||
globals->dvdy = sd->dv.dy;
|
|
||||||
globals->dPdu = TO_VEC3(sd->dPdu);
|
|
||||||
globals->dPdv = TO_VEC3(sd->dPdv);
|
|
||||||
globals->surfacearea = 1.0f;
|
|
||||||
globals->time = sd->time;
|
|
||||||
|
|
||||||
/* booleans */
|
|
||||||
globals->raytype = path_flag;
|
|
||||||
globals->flipHandedness = 0;
|
|
||||||
globals->backfacing = (sd->flag & SD_BACKFACING);
|
|
||||||
|
|
||||||
/* shader data to be used in services callbacks */
|
|
||||||
globals->renderstate = sd;
|
|
||||||
|
|
||||||
/* hacky, we leave it to services to fetch actual object matrix */
|
|
||||||
globals->shader2common = sd;
|
|
||||||
globals->object2common = sd;
|
|
||||||
|
|
||||||
/* must be set to NULL before execute */
|
|
||||||
globals->Ci = NULL;
|
|
||||||
|
|
||||||
/* clear trace data */
|
/* clear trace data */
|
||||||
tdata->tracedata.init = false;
|
tdata->tracedata.init = false;
|
||||||
@@ -121,53 +91,6 @@ static void shaderdata_to_shaderglobals(const KernelGlobalsCPU *kg,
|
|||||||
sd->osl_path_state = (const IntegratorStateCPU *)state;
|
sd->osl_path_state = (const IntegratorStateCPU *)state;
|
||||||
sd->osl_shadow_path_state = nullptr;
|
sd->osl_shadow_path_state = nullptr;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
static void flatten_closure_tree(const KernelGlobalsCPU *kg,
|
|
||||||
ShaderData *sd,
|
|
||||||
uint32_t path_flag,
|
|
||||||
const OSL::ClosureColor *closure,
|
|
||||||
float3 weight = make_float3(1.0f, 1.0f, 1.0f))
|
|
||||||
{
|
|
||||||
/* OSL gives us a closure tree, we flatten it into arrays per
|
|
||||||
* closure type, for evaluation, sampling, etc later on. */
|
|
||||||
|
|
||||||
switch (closure->id) {
|
|
||||||
case OSL::ClosureColor::MUL: {
|
|
||||||
OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
|
|
||||||
flatten_closure_tree(kg, sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case OSL::ClosureColor::ADD: {
|
|
||||||
OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
|
|
||||||
flatten_closure_tree(kg, sd, path_flag, add->closureA, weight);
|
|
||||||
flatten_closure_tree(kg, sd, path_flag, add->closureB, weight);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) \
|
|
||||||
case OSL_CLOSURE_##Upper##_ID: { \
|
|
||||||
const OSL::ClosureComponent *comp = reinterpret_cast<const OSL::ClosureComponent *>(closure); \
|
|
||||||
weight *= TO_FLOAT3(comp->w); \
|
|
||||||
osl_closure_##lower##_setup( \
|
|
||||||
kg, sd, path_flag, weight, reinterpret_cast<const Upper##Closure *>(comp + 1)); \
|
|
||||||
break; \
|
|
||||||
}
|
|
||||||
#include "closures_template.h"
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Surface */
|
|
||||||
|
|
||||||
void OSLShader::eval_surface(const KernelGlobalsCPU *kg,
|
|
||||||
const void *state,
|
|
||||||
ShaderData *sd,
|
|
||||||
uint32_t path_flag)
|
|
||||||
{
|
|
||||||
/* setup shader globals from shader data */
|
|
||||||
OSLThreadData *tdata = kg->osl_tdata;
|
|
||||||
shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
|
|
||||||
|
|
||||||
/* execute shader for this point */
|
/* execute shader for this point */
|
||||||
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
|
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
|
||||||
@@ -175,101 +98,99 @@ void OSLShader::eval_surface(const KernelGlobalsCPU *kg,
|
|||||||
OSL::ShadingContext *octx = tdata->context;
|
OSL::ShadingContext *octx = tdata->context;
|
||||||
int shader = sd->shader & SHADER_MASK;
|
int shader = sd->shader & SHADER_MASK;
|
||||||
|
|
||||||
/* automatic bump shader */
|
if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) {
|
||||||
if (kg->osl->bump_state[shader]) {
|
/* background */
|
||||||
/* save state */
|
if (kg->osl->background_state) {
|
||||||
const float3 P = sd->P;
|
ss->execute(octx, *(kg->osl->background_state), *globals);
|
||||||
const float dP = sd->dP;
|
}
|
||||||
const OSL::Vec3 dPdx = globals->dPdx;
|
}
|
||||||
const OSL::Vec3 dPdy = globals->dPdy;
|
else {
|
||||||
|
/* automatic bump shader */
|
||||||
|
if (kg->osl->bump_state[shader]) {
|
||||||
|
/* save state */
|
||||||
|
const float3 P = sd->P;
|
||||||
|
const float dP = sd->dP;
|
||||||
|
const OSL::Vec3 dPdx = globals->dPdx;
|
||||||
|
const OSL::Vec3 dPdy = globals->dPdy;
|
||||||
|
|
||||||
/* set state as if undisplaced */
|
/* set state as if undisplaced */
|
||||||
if (sd->flag & SD_HAS_DISPLACEMENT) {
|
if (sd->flag & SD_HAS_DISPLACEMENT) {
|
||||||
float data[9];
|
float data[9];
|
||||||
bool found = kg->osl->services->get_attribute(sd,
|
bool found = kg->osl->services->get_attribute(sd,
|
||||||
true,
|
true,
|
||||||
OSLRenderServices::u_empty,
|
OSLRenderServices::u_empty,
|
||||||
TypeDesc::TypeVector,
|
TypeDesc::TypeVector,
|
||||||
OSLRenderServices::u_geom_undisplaced,
|
OSLRenderServices::u_geom_undisplaced,
|
||||||
data);
|
data);
|
||||||
(void)found;
|
(void)found;
|
||||||
assert(found);
|
assert(found);
|
||||||
|
|
||||||
differential3 tmp_dP;
|
differential3 tmp_dP;
|
||||||
memcpy(&sd->P, data, sizeof(float) * 3);
|
memcpy(&sd->P, data, sizeof(float) * 3);
|
||||||
memcpy(&tmp_dP.dx, data + 3, sizeof(float) * 3);
|
memcpy(&tmp_dP.dx, data + 3, sizeof(float) * 3);
|
||||||
memcpy(&tmp_dP.dy, data + 6, sizeof(float) * 3);
|
memcpy(&tmp_dP.dy, data + 6, sizeof(float) * 3);
|
||||||
|
|
||||||
object_position_transform(kg, sd, &sd->P);
|
object_position_transform(kg, sd, &sd->P);
|
||||||
object_dir_transform(kg, sd, &tmp_dP.dx);
|
object_dir_transform(kg, sd, &tmp_dP.dx);
|
||||||
object_dir_transform(kg, sd, &tmp_dP.dy);
|
object_dir_transform(kg, sd, &tmp_dP.dy);
|
||||||
|
|
||||||
sd->dP = differential_make_compact(tmp_dP);
|
sd->dP = differential_make_compact(tmp_dP);
|
||||||
|
|
||||||
globals->P = TO_VEC3(sd->P);
|
globals->P = TO_VEC3(sd->P);
|
||||||
globals->dPdx = TO_VEC3(tmp_dP.dx);
|
globals->dPdx = TO_VEC3(tmp_dP.dx);
|
||||||
globals->dPdy = TO_VEC3(tmp_dP.dy);
|
globals->dPdy = TO_VEC3(tmp_dP.dy);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* execute bump shader */
|
||||||
|
ss->execute(octx, *(kg->osl->bump_state[shader]), *globals);
|
||||||
|
|
||||||
|
/* reset state */
|
||||||
|
sd->P = P;
|
||||||
|
sd->dP = dP;
|
||||||
|
|
||||||
|
globals->P = TO_VEC3(P);
|
||||||
|
globals->dPdx = TO_VEC3(dPdx);
|
||||||
|
globals->dPdy = TO_VEC3(dPdy);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* execute bump shader */
|
/* surface shader */
|
||||||
ss->execute(octx, *(kg->osl->bump_state[shader]), *globals);
|
if (kg->osl->surface_state[shader]) {
|
||||||
|
ss->execute(octx, *(kg->osl->surface_state[shader]), *globals);
|
||||||
/* reset state */
|
}
|
||||||
sd->P = P;
|
|
||||||
sd->dP = dP;
|
|
||||||
|
|
||||||
globals->P = TO_VEC3(P);
|
|
||||||
globals->dPdx = TO_VEC3(dPdx);
|
|
||||||
globals->dPdy = TO_VEC3(dPdy);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* surface shader */
|
|
||||||
if (kg->osl->surface_state[shader]) {
|
|
||||||
ss->execute(octx, *(kg->osl->surface_state[shader]), *globals);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* flatten closure tree */
|
/* flatten closure tree */
|
||||||
if (globals->Ci) {
|
if (globals->Ci) {
|
||||||
flatten_closure_tree(kg, sd, path_flag, globals->Ci);
|
flatten_closure_tree(kg, sd, path_flag, reinterpret_cast<OSLClosure *>(globals->Ci));
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Background */
|
|
||||||
|
|
||||||
void OSLShader::eval_background(const KernelGlobalsCPU *kg,
|
|
||||||
const void *state,
|
|
||||||
ShaderData *sd,
|
|
||||||
uint32_t path_flag)
|
|
||||||
{
|
|
||||||
/* setup shader globals from shader data */
|
|
||||||
OSLThreadData *tdata = kg->osl_tdata;
|
|
||||||
shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
|
|
||||||
|
|
||||||
/* execute shader for this point */
|
|
||||||
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
|
|
||||||
OSL::ShaderGlobals *globals = &tdata->globals;
|
|
||||||
OSL::ShadingContext *octx = tdata->context;
|
|
||||||
|
|
||||||
if (kg->osl->background_state) {
|
|
||||||
ss->execute(octx, *(kg->osl->background_state), *globals);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* return background color immediately */
|
|
||||||
if (globals->Ci) {
|
|
||||||
flatten_closure_tree(kg, sd, path_flag, globals->Ci);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Volume */
|
/* Volume */
|
||||||
|
|
||||||
void OSLShader::eval_volume(const KernelGlobalsCPU *kg,
|
template<>
|
||||||
const void *state,
|
void osl_eval_nodes<SHADER_TYPE_VOLUME>(const KernelGlobalsCPU *kg,
|
||||||
ShaderData *sd,
|
const void *state,
|
||||||
uint32_t path_flag)
|
ShaderData *sd,
|
||||||
|
uint32_t path_flag)
|
||||||
{
|
{
|
||||||
/* setup shader globals from shader data */
|
/* setup shader globals from shader data */
|
||||||
OSLThreadData *tdata = kg->osl_tdata;
|
OSLThreadData *tdata = kg->osl_tdata;
|
||||||
shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
|
shaderdata_to_shaderglobals(
|
||||||
|
kg, sd, path_flag, reinterpret_cast<ShaderGlobals *>(&tdata->globals));
|
||||||
|
|
||||||
|
/* clear trace data */
|
||||||
|
tdata->tracedata.init = false;
|
||||||
|
|
||||||
|
/* Used by render-services. */
|
||||||
|
sd->osl_globals = kg;
|
||||||
|
if (path_flag & PATH_RAY_SHADOW) {
|
||||||
|
sd->osl_path_state = nullptr;
|
||||||
|
sd->osl_shadow_path_state = (const IntegratorShadowStateCPU *)state;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
sd->osl_path_state = (const IntegratorStateCPU *)state;
|
||||||
|
sd->osl_shadow_path_state = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
/* execute shader */
|
/* execute shader */
|
||||||
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
|
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
|
||||||
@@ -283,17 +204,30 @@ void OSLShader::eval_volume(const KernelGlobalsCPU *kg,
|
|||||||
|
|
||||||
/* flatten closure tree */
|
/* flatten closure tree */
|
||||||
if (globals->Ci) {
|
if (globals->Ci) {
|
||||||
flatten_closure_tree(kg, sd, path_flag, globals->Ci);
|
flatten_closure_tree(kg, sd, path_flag, reinterpret_cast<OSLClosure *>(globals->Ci));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Displacement */
|
/* Displacement */
|
||||||
|
|
||||||
void OSLShader::eval_displacement(const KernelGlobalsCPU *kg, const void *state, ShaderData *sd)
|
template<>
|
||||||
|
void osl_eval_nodes<SHADER_TYPE_DISPLACEMENT>(const KernelGlobalsCPU *kg,
|
||||||
|
const void *state,
|
||||||
|
ShaderData *sd,
|
||||||
|
uint32_t path_flag)
|
||||||
{
|
{
|
||||||
/* setup shader globals from shader data */
|
/* setup shader globals from shader data */
|
||||||
OSLThreadData *tdata = kg->osl_tdata;
|
OSLThreadData *tdata = kg->osl_tdata;
|
||||||
shaderdata_to_shaderglobals(kg, sd, state, 0, tdata);
|
shaderdata_to_shaderglobals(
|
||||||
|
kg, sd, path_flag, reinterpret_cast<ShaderGlobals *>(&tdata->globals));
|
||||||
|
|
||||||
|
/* clear trace data */
|
||||||
|
tdata->tracedata.init = false;
|
||||||
|
|
||||||
|
/* Used by render-services. */
|
||||||
|
sd->osl_globals = kg;
|
||||||
|
sd->osl_path_state = (const IntegratorStateCPU *)state;
|
||||||
|
sd->osl_shadow_path_state = nullptr;
|
||||||
|
|
||||||
/* execute shader */
|
/* execute shader */
|
||||||
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
|
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
|
||||||
|
@@ -40,12 +40,7 @@ CCL_NAMESPACE_BEGIN
|
|||||||
const char *label;
|
const char *label;
|
||||||
#define OSL_CLOSURE_STRUCT_END(Upper, lower) \
|
#define OSL_CLOSURE_STRUCT_END(Upper, lower) \
|
||||||
} \
|
} \
|
||||||
; \
|
;
|
||||||
ccl_device void osl_closure_##lower##_setup(KernelGlobals kg, \
|
|
||||||
ccl_private ShaderData *sd, \
|
|
||||||
uint32_t path_flag, \
|
|
||||||
float3 weight, \
|
|
||||||
ccl_private Upper##Closure *closure);
|
|
||||||
#define OSL_CLOSURE_STRUCT_MEMBER(Upper, TYPE, type, name, key) type name;
|
#define OSL_CLOSURE_STRUCT_MEMBER(Upper, TYPE, type, name, key) type name;
|
||||||
#define OSL_CLOSURE_STRUCT_ARRAY_MEMBER(Upper, TYPE, type, name, key, size) type name[size];
|
#define OSL_CLOSURE_STRUCT_ARRAY_MEMBER(Upper, TYPE, type, name, key, size) type name[size];
|
||||||
|
|
||||||
@@ -210,11 +205,9 @@ ccl_device void osl_closure_microfacet_setup(KernelGlobals kg,
|
|||||||
bsdf->ior = closure->ior;
|
bsdf->ior = closure->ior;
|
||||||
bsdf->T = closure->T;
|
bsdf->T = closure->T;
|
||||||
|
|
||||||
static OSL::ustring u_ggx("ggx");
|
|
||||||
static OSL::ustring u_default("default");
|
|
||||||
|
|
||||||
/* GGX */
|
/* GGX */
|
||||||
if (closure->distribution == u_ggx || closure->distribution == u_default) {
|
if (closure->distribution == make_string("ggx", 11253504724482777663ull) ||
|
||||||
|
closure->distribution == make_string("default", 4430693559278735917ull)) {
|
||||||
if (!closure->refract) {
|
if (!closure->refract) {
|
||||||
if (closure->alpha_x == closure->alpha_y) {
|
if (closure->alpha_x == closure->alpha_y) {
|
||||||
/* Isotropic */
|
/* Isotropic */
|
||||||
@@ -1000,18 +993,14 @@ ccl_device void osl_closure_bssrdf_setup(KernelGlobals kg,
|
|||||||
float3 weight,
|
float3 weight,
|
||||||
ccl_private const BSSRDFClosure *closure)
|
ccl_private const BSSRDFClosure *closure)
|
||||||
{
|
{
|
||||||
static ustring u_burley("burley");
|
|
||||||
static ustring u_random_walk_fixed_radius("random_walk_fixed_radius");
|
|
||||||
static ustring u_random_walk("random_walk");
|
|
||||||
|
|
||||||
ClosureType type;
|
ClosureType type;
|
||||||
if (closure->method == u_burley) {
|
if (closure->method == make_string("burley", 186330084368958868ull)) {
|
||||||
type = CLOSURE_BSSRDF_BURLEY_ID;
|
type = CLOSURE_BSSRDF_BURLEY_ID;
|
||||||
}
|
}
|
||||||
else if (closure->method == u_random_walk_fixed_radius) {
|
else if (closure->method == make_string("random_walk_fixed_radius", 5695810351010063150ull)) {
|
||||||
type = CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID;
|
type = CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID;
|
||||||
}
|
}
|
||||||
else if (closure->method == u_random_walk) {
|
else if (closure->method == make_string("random_walk", 11360609267673527222ull)) {
|
||||||
type = CLOSURE_BSSRDF_RANDOM_WALK_ID;
|
type = CLOSURE_BSSRDF_RANDOM_WALK_ID;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@@ -40,7 +40,7 @@ OSL_CLOSURE_STRUCT_BEGIN(Transparent, transparent)
|
|||||||
OSL_CLOSURE_STRUCT_END(Transparent, transparent)
|
OSL_CLOSURE_STRUCT_END(Transparent, transparent)
|
||||||
|
|
||||||
OSL_CLOSURE_STRUCT_BEGIN(Microfacet, microfacet)
|
OSL_CLOSURE_STRUCT_BEGIN(Microfacet, microfacet)
|
||||||
OSL_CLOSURE_STRUCT_MEMBER(Microfacet, STRING, ustring, distribution, NULL)
|
OSL_CLOSURE_STRUCT_MEMBER(Microfacet, STRING, DeviceString, distribution, NULL)
|
||||||
OSL_CLOSURE_STRUCT_MEMBER(Microfacet, VECTOR, packed_float3, N, NULL)
|
OSL_CLOSURE_STRUCT_MEMBER(Microfacet, VECTOR, packed_float3, N, NULL)
|
||||||
OSL_CLOSURE_STRUCT_MEMBER(Microfacet, VECTOR, packed_float3, T, NULL)
|
OSL_CLOSURE_STRUCT_MEMBER(Microfacet, VECTOR, packed_float3, T, NULL)
|
||||||
OSL_CLOSURE_STRUCT_MEMBER(Microfacet, FLOAT, float, alpha_x, NULL)
|
OSL_CLOSURE_STRUCT_MEMBER(Microfacet, FLOAT, float, alpha_x, NULL)
|
||||||
@@ -210,7 +210,7 @@ OSL_CLOSURE_STRUCT_BEGIN(PhongRamp, phong_ramp)
|
|||||||
OSL_CLOSURE_STRUCT_END(PhongRamp, phong_ramp)
|
OSL_CLOSURE_STRUCT_END(PhongRamp, phong_ramp)
|
||||||
|
|
||||||
OSL_CLOSURE_STRUCT_BEGIN(BSSRDF, bssrdf)
|
OSL_CLOSURE_STRUCT_BEGIN(BSSRDF, bssrdf)
|
||||||
OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, STRING, ustring, method, NULL)
|
OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, STRING, DeviceString, method, NULL)
|
||||||
OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, VECTOR, packed_float3, N, NULL)
|
OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, VECTOR, packed_float3, N, NULL)
|
||||||
OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, VECTOR, packed_float3, radius, NULL)
|
OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, VECTOR, packed_float3, radius, NULL)
|
||||||
OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, VECTOR, packed_float3, albedo, NULL)
|
OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, VECTOR, packed_float3, albedo, NULL)
|
||||||
|
@@ -1,38 +1,171 @@
|
|||||||
/* SPDX-License-Identifier: Apache-2.0
|
/* SPDX-License-Identifier: BSD-3-Clause
|
||||||
* Copyright 2011-2022 Blender Foundation */
|
*
|
||||||
|
* Adapted from Open Shading Language
|
||||||
|
* Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Modifications Copyright 2011-2022 Blender Foundation. */
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
/* OSL Shader Engine
|
/* OSL Shader Engine
|
||||||
*
|
*
|
||||||
* Holds all variables to execute and use OSL shaders from the kernel. These
|
* Holds all variables to execute and use OSL shaders from the kernel.
|
||||||
* are initialized externally by OSLShaderManager before rendering starts.
|
|
||||||
*
|
|
||||||
* Before/after a thread starts rendering, thread_init/thread_free must be
|
|
||||||
* called, which will store any per thread OSL state in thread local storage.
|
|
||||||
* This means no thread state must be passed along in the kernel itself.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "kernel/osl/types.h"
|
#include "kernel/osl/types.h"
|
||||||
|
|
||||||
|
#include "kernel/osl/closures_setup.h"
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
class OSLShader {
|
ccl_device_inline void shaderdata_to_shaderglobals(KernelGlobals kg,
|
||||||
public:
|
ccl_private ShaderData *sd,
|
||||||
/* eval */
|
uint32_t path_flag,
|
||||||
static void eval_surface(const KernelGlobalsCPU *kg,
|
ccl_private ShaderGlobals *globals)
|
||||||
const void *state,
|
{
|
||||||
ShaderData *sd,
|
const differential3 dP = differential_from_compact(sd->Ng, sd->dP);
|
||||||
uint32_t path_flag);
|
const differential3 dI = differential_from_compact(sd->I, sd->dI);
|
||||||
static void eval_background(const KernelGlobalsCPU *kg,
|
|
||||||
const void *state,
|
/* copy from shader data to shader globals */
|
||||||
ShaderData *sd,
|
globals->P = sd->P;
|
||||||
uint32_t path_flag);
|
globals->dPdx = dP.dx;
|
||||||
static void eval_volume(const KernelGlobalsCPU *kg,
|
globals->dPdy = dP.dy;
|
||||||
const void *state,
|
globals->I = sd->I;
|
||||||
ShaderData *sd,
|
globals->dIdx = dI.dx;
|
||||||
uint32_t path_flag);
|
globals->dIdy = dI.dy;
|
||||||
static void eval_displacement(const KernelGlobalsCPU *kg, const void *state, ShaderData *sd);
|
globals->N = sd->N;
|
||||||
};
|
globals->Ng = sd->Ng;
|
||||||
|
globals->u = sd->u;
|
||||||
|
globals->dudx = sd->du.dx;
|
||||||
|
globals->dudy = sd->du.dy;
|
||||||
|
globals->v = sd->v;
|
||||||
|
globals->dvdx = sd->dv.dx;
|
||||||
|
globals->dvdy = sd->dv.dy;
|
||||||
|
globals->dPdu = sd->dPdu;
|
||||||
|
globals->dPdv = sd->dPdv;
|
||||||
|
globals->time = sd->time;
|
||||||
|
globals->dtime = 1.0f;
|
||||||
|
globals->surfacearea = 1.0f;
|
||||||
|
globals->raytype = path_flag;
|
||||||
|
globals->flipHandedness = 0;
|
||||||
|
globals->backfacing = (sd->flag & SD_BACKFACING);
|
||||||
|
|
||||||
|
/* shader data to be used in services callbacks */
|
||||||
|
globals->renderstate = sd;
|
||||||
|
|
||||||
|
/* hacky, we leave it to services to fetch actual object matrix */
|
||||||
|
globals->shader2common = sd;
|
||||||
|
globals->object2common = sd;
|
||||||
|
|
||||||
|
/* must be set to NULL before execute */
|
||||||
|
globals->Ci = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
ccl_device void flatten_closure_tree(KernelGlobals kg,
|
||||||
|
ccl_private ShaderData *sd,
|
||||||
|
uint32_t path_flag,
|
||||||
|
ccl_private const OSLClosure *closure)
|
||||||
|
{
|
||||||
|
int stack_size = 0;
|
||||||
|
float3 weight = one_float3();
|
||||||
|
float3 weight_stack[16];
|
||||||
|
ccl_private const OSLClosure *closure_stack[16];
|
||||||
|
|
||||||
|
while (closure) {
|
||||||
|
switch (closure->id) {
|
||||||
|
case OSL_CLOSURE_MUL_ID: {
|
||||||
|
ccl_private const OSLClosureMul *mul = static_cast<ccl_private const OSLClosureMul *>(
|
||||||
|
closure);
|
||||||
|
weight *= mul->weight;
|
||||||
|
closure = mul->closure;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
case OSL_CLOSURE_ADD_ID: {
|
||||||
|
if (stack_size >= 16) {
|
||||||
|
kernel_assert(!"Exhausted OSL closure stack");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ccl_private const OSLClosureAdd *add = static_cast<ccl_private const OSLClosureAdd *>(
|
||||||
|
closure);
|
||||||
|
closure = add->closureA;
|
||||||
|
weight_stack[stack_size] = weight;
|
||||||
|
closure_stack[stack_size++] = add->closureB;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) \
|
||||||
|
case OSL_CLOSURE_##Upper##_ID: { \
|
||||||
|
ccl_private const OSLClosureComponent *comp = \
|
||||||
|
static_cast<ccl_private const OSLClosureComponent *>(closure); \
|
||||||
|
osl_closure_##lower##_setup(kg, \
|
||||||
|
sd, \
|
||||||
|
path_flag, \
|
||||||
|
weight * comp->weight, \
|
||||||
|
reinterpret_cast<ccl_private const Upper##Closure *>(comp + 1)); \
|
||||||
|
break; \
|
||||||
|
}
|
||||||
|
#include "closures_template.h"
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stack_size > 0) {
|
||||||
|
weight = weight_stack[--stack_size];
|
||||||
|
closure = closure_stack[stack_size];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
closure = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef __KERNEL_GPU__
|
||||||
|
|
||||||
|
template<ShaderType type>
|
||||||
|
void osl_eval_nodes(const KernelGlobalsCPU *kg,
|
||||||
|
const void *state,
|
||||||
|
ShaderData *sd,
|
||||||
|
uint32_t path_flag);
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
template<ShaderType type, typename ConstIntegratorGenericState>
|
||||||
|
ccl_device_inline void osl_eval_nodes(KernelGlobals kg,
|
||||||
|
ConstIntegratorGenericState state,
|
||||||
|
ccl_private ShaderData *sd,
|
||||||
|
uint32_t path_flag)
|
||||||
|
{
|
||||||
|
ShaderGlobals globals;
|
||||||
|
shaderdata_to_shaderglobals(kg, sd, path_flag, &globals);
|
||||||
|
|
||||||
|
const int shader = sd->shader & SHADER_MASK;
|
||||||
|
|
||||||
|
# ifdef __KERNEL_OPTIX__
|
||||||
|
uint8_t group_data[2048];
|
||||||
|
uint8_t closure_pool[1024];
|
||||||
|
sd->osl_closure_pool = closure_pool;
|
||||||
|
|
||||||
|
unsigned int optix_dc_index = 2 /* NUM_CALLABLE_PROGRAM_GROUPS */ +
|
||||||
|
(shader + type * kernel_data.max_shaders) * 2;
|
||||||
|
optixDirectCall<void>(optix_dc_index + 0,
|
||||||
|
/* shaderglobals_ptr = */ &globals,
|
||||||
|
/* groupdata_ptr = */ (void *)group_data,
|
||||||
|
/* userdata_base_ptr = */ (void *)nullptr,
|
||||||
|
/* output_base_ptr = */ (void *)nullptr,
|
||||||
|
/* shadeindex = */ 0);
|
||||||
|
optixDirectCall<void>(optix_dc_index + 1,
|
||||||
|
/* shaderglobals_ptr = */ &globals,
|
||||||
|
/* groupdata_ptr = */ (void *)group_data,
|
||||||
|
/* userdata_base_ptr = */ (void *)nullptr,
|
||||||
|
/* output_base_ptr = */ (void *)nullptr,
|
||||||
|
/* shadeindex = */ 0);
|
||||||
|
# endif
|
||||||
|
|
||||||
|
if (globals.Ci) {
|
||||||
|
flatten_closure_tree(kg, sd, path_flag, globals.Ci);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -119,8 +119,8 @@ ustring OSLRenderServices::u_u("u");
|
|||||||
ustring OSLRenderServices::u_v("v");
|
ustring OSLRenderServices::u_v("v");
|
||||||
ustring OSLRenderServices::u_empty;
|
ustring OSLRenderServices::u_empty;
|
||||||
|
|
||||||
OSLRenderServices::OSLRenderServices(OSL::TextureSystem *texture_system)
|
OSLRenderServices::OSLRenderServices(OSL::TextureSystem *texture_system, int device_type)
|
||||||
: OSL::RendererServices(texture_system)
|
: OSL::RendererServices(texture_system), device_type_(device_type)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -131,6 +131,17 @@ OSLRenderServices::~OSLRenderServices()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int OSLRenderServices::supports(string_view feature) const
|
||||||
|
{
|
||||||
|
#ifdef WITH_OPTIX
|
||||||
|
if (feature == "OptiX") {
|
||||||
|
return device_type_ == DEVICE_OPTIX;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
|
bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
|
||||||
OSL::Matrix44 &result,
|
OSL::Matrix44 &result,
|
||||||
OSL::TransformationPtr xform,
|
OSL::TransformationPtr xform,
|
||||||
@@ -1139,29 +1150,40 @@ TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring file
|
|||||||
{
|
{
|
||||||
OSLTextureHandleMap::iterator it = textures.find(filename);
|
OSLTextureHandleMap::iterator it = textures.find(filename);
|
||||||
|
|
||||||
/* For non-OIIO textures, just return a pointer to our own OSLTextureHandle. */
|
if (device_type_ == DEVICE_CPU) {
|
||||||
if (it != textures.end()) {
|
/* For non-OIIO textures, just return a pointer to our own OSLTextureHandle. */
|
||||||
if (it->second->type != OSLTextureHandle::OIIO) {
|
if (it != textures.end()) {
|
||||||
return (TextureSystem::TextureHandle *)it->second.get();
|
if (it->second->type != OSLTextureHandle::OIIO) {
|
||||||
|
return (TextureSystem::TextureHandle *)it->second.get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get handle from OpenImageIO. */
|
||||||
|
OSL::TextureSystem *ts = m_texturesys;
|
||||||
|
TextureSystem::TextureHandle *handle = ts->get_texture_handle(filename);
|
||||||
|
if (handle == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Insert new OSLTextureHandle if needed. */
|
||||||
|
if (it == textures.end()) {
|
||||||
|
textures.insert(filename, new OSLTextureHandle(OSLTextureHandle::OIIO));
|
||||||
|
it = textures.find(filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assign OIIO texture handle and return. */
|
||||||
|
it->second->oiio_handle = handle;
|
||||||
|
return (TextureSystem::TextureHandle *)it->second.get();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (it != textures.end() && it->second->type == OSLTextureHandle::SVM &&
|
||||||
|
it->second->svm_slots[0].w == -1) {
|
||||||
|
return reinterpret_cast<TextureSystem::TextureHandle *>(
|
||||||
|
static_cast<uintptr_t>(it->second->svm_slots[0].y + 1));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* Get handle from OpenImageIO. */
|
|
||||||
OSL::TextureSystem *ts = m_texturesys;
|
|
||||||
TextureSystem::TextureHandle *handle = ts->get_texture_handle(filename);
|
|
||||||
if (handle == NULL) {
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Insert new OSLTextureHandle if needed. */
|
|
||||||
if (it == textures.end()) {
|
|
||||||
textures.insert(filename, new OSLTextureHandle(OSLTextureHandle::OIIO));
|
|
||||||
it = textures.find(filename);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Assign OIIO texture handle and return. */
|
|
||||||
it->second->oiio_handle = handle;
|
|
||||||
return (TextureSystem::TextureHandle *)it->second.get();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OSLRenderServices::good(TextureSystem::TextureHandle *texture_handle)
|
bool OSLRenderServices::good(TextureSystem::TextureHandle *texture_handle)
|
||||||
|
@@ -22,11 +22,8 @@ class PtexCache;
|
|||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
class Object;
|
|
||||||
class Scene;
|
class Scene;
|
||||||
class Shader;
|
|
||||||
struct ShaderData;
|
struct ShaderData;
|
||||||
struct float3;
|
|
||||||
struct KernelGlobalsCPU;
|
struct KernelGlobalsCPU;
|
||||||
|
|
||||||
/* OSL Texture Handle
|
/* OSL Texture Handle
|
||||||
@@ -73,11 +70,13 @@ typedef OIIO::unordered_map_concurrent<ustring, OSLTextureHandleRef, ustringHash
|
|||||||
|
|
||||||
class OSLRenderServices : public OSL::RendererServices {
|
class OSLRenderServices : public OSL::RendererServices {
|
||||||
public:
|
public:
|
||||||
OSLRenderServices(OSL::TextureSystem *texture_system);
|
OSLRenderServices(OSL::TextureSystem *texture_system, int device_type);
|
||||||
~OSLRenderServices();
|
~OSLRenderServices();
|
||||||
|
|
||||||
static void register_closures(OSL::ShadingSystem *ss);
|
static void register_closures(OSL::ShadingSystem *ss);
|
||||||
|
|
||||||
|
int supports(string_view feature) const override;
|
||||||
|
|
||||||
bool get_matrix(OSL::ShaderGlobals *sg,
|
bool get_matrix(OSL::ShaderGlobals *sg,
|
||||||
OSL::Matrix44 &result,
|
OSL::Matrix44 &result,
|
||||||
OSL::TransformationPtr xform,
|
OSL::TransformationPtr xform,
|
||||||
@@ -324,6 +323,9 @@ class OSLRenderServices : public OSL::RendererServices {
|
|||||||
* and is required because texture handles are cached as part of the shared
|
* and is required because texture handles are cached as part of the shared
|
||||||
* shading system. */
|
* shading system. */
|
||||||
OSLTextureHandleMap textures;
|
OSLTextureHandleMap textures;
|
||||||
|
|
||||||
|
private:
|
||||||
|
int device_type_;
|
||||||
};
|
};
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
2176
intern/cycles/kernel/osl/services_gpu.h
Normal file
2176
intern/cycles/kernel/osl/services_gpu.h
Normal file
File diff suppressed because it is too large
Load Diff
17
intern/cycles/kernel/osl/services_optix.cu
Normal file
17
intern/cycles/kernel/osl/services_optix.cu
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
/* SPDX-License-Identifier: Apache-2.0
|
||||||
|
* Copyright 2011-2022 Blender Foundation */
|
||||||
|
|
||||||
|
#define WITH_OSL
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
#include "kernel/device/optix/compat.h"
|
||||||
|
#include "kernel/device/optix/globals.h"
|
||||||
|
|
||||||
|
#include "kernel/device/gpu/image.h" /* Texture lookup uses normal CUDA intrinsics. */
|
||||||
|
|
||||||
|
#include "kernel/osl/services_gpu.h"
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
extern "C" __device__ void __direct_callable__dummy_services()
|
||||||
|
{
|
||||||
|
}
|
@@ -3,8 +3,7 @@
|
|||||||
|
|
||||||
#include "stdcycles.h"
|
#include "stdcycles.h"
|
||||||
|
|
||||||
shader node_geometry(normal NormalIn = N,
|
shader node_geometry(string bump_offset = "center",
|
||||||
string bump_offset = "center",
|
|
||||||
|
|
||||||
output point Position = point(0.0, 0.0, 0.0),
|
output point Position = point(0.0, 0.0, 0.0),
|
||||||
output normal Normal = normal(0.0, 0.0, 0.0),
|
output normal Normal = normal(0.0, 0.0, 0.0),
|
||||||
@@ -17,7 +16,7 @@ shader node_geometry(normal NormalIn = N,
|
|||||||
output float RandomPerIsland = 0.0)
|
output float RandomPerIsland = 0.0)
|
||||||
{
|
{
|
||||||
Position = P;
|
Position = P;
|
||||||
Normal = NormalIn;
|
Normal = N;
|
||||||
TrueNormal = Ng;
|
TrueNormal = Ng;
|
||||||
Incoming = I;
|
Incoming = I;
|
||||||
Parametric = point(1.0 - u - v, u, 0.0);
|
Parametric = point(1.0 - u - v, u, 0.0);
|
||||||
|
@@ -3,13 +3,12 @@
|
|||||||
|
|
||||||
#include "stdcycles.h"
|
#include "stdcycles.h"
|
||||||
|
|
||||||
shader node_normal_map(normal NormalIn = N,
|
shader node_normal_map(float Strength = 1.0,
|
||||||
float Strength = 1.0,
|
|
||||||
color Color = color(0.5, 0.5, 1.0),
|
color Color = color(0.5, 0.5, 1.0),
|
||||||
string space = "tangent",
|
string space = "tangent",
|
||||||
string attr_name = "geom:tangent",
|
string attr_name = "geom:tangent",
|
||||||
string attr_sign_name = "geom:tangent_sign",
|
string attr_sign_name = "geom:tangent_sign",
|
||||||
output normal Normal = NormalIn)
|
output normal Normal = N)
|
||||||
{
|
{
|
||||||
color mcolor = 2.0 * color(Color[0] - 0.5, Color[1] - 0.5, Color[2] - 0.5);
|
color mcolor = 2.0 * color(Color[0] - 0.5, Color[1] - 0.5, Color[2] - 0.5);
|
||||||
int is_backfacing = backfacing();
|
int is_backfacing = backfacing();
|
||||||
@@ -71,5 +70,5 @@ shader node_normal_map(normal NormalIn = N,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (Strength != 1.0)
|
if (Strength != 1.0)
|
||||||
Normal = normalize(NormalIn + (Normal - NormalIn) * max(Strength, 0.0));
|
Normal = normalize(N + (Normal - N) * max(Strength, 0.0));
|
||||||
}
|
}
|
||||||
|
@@ -3,8 +3,7 @@
|
|||||||
|
|
||||||
#include "stdcycles.h"
|
#include "stdcycles.h"
|
||||||
|
|
||||||
shader node_tangent(normal NormalIn = N,
|
shader node_tangent(string attr_name = "geom:tangent",
|
||||||
string attr_name = "geom:tangent",
|
|
||||||
string direction_type = "radial",
|
string direction_type = "radial",
|
||||||
string axis = "z",
|
string axis = "z",
|
||||||
output normal Tangent = normalize(dPdu))
|
output normal Tangent = normalize(dPdu))
|
||||||
@@ -29,5 +28,5 @@ shader node_tangent(normal NormalIn = N,
|
|||||||
}
|
}
|
||||||
|
|
||||||
T = transform("object", "world", T);
|
T = transform("object", "world", T);
|
||||||
Tangent = cross(NormalIn, normalize(cross(T, NormalIn)));
|
Tangent = cross(N, normalize(cross(T, N)));
|
||||||
}
|
}
|
||||||
|
@@ -4,7 +4,6 @@
|
|||||||
#include "stdcycles.h"
|
#include "stdcycles.h"
|
||||||
|
|
||||||
shader node_texture_coordinate(
|
shader node_texture_coordinate(
|
||||||
normal NormalIn = N,
|
|
||||||
int is_background = 0,
|
int is_background = 0,
|
||||||
int is_volume = 0,
|
int is_volume = 0,
|
||||||
int from_dupli = 0,
|
int from_dupli = 0,
|
||||||
@@ -27,7 +26,7 @@ shader node_texture_coordinate(
|
|||||||
point Pcam = transform("camera", "world", point(0, 0, 0));
|
point Pcam = transform("camera", "world", point(0, 0, 0));
|
||||||
Camera = transform("camera", P + Pcam);
|
Camera = transform("camera", P + Pcam);
|
||||||
getattribute("NDC", Window);
|
getattribute("NDC", Window);
|
||||||
Normal = NormalIn;
|
Normal = N;
|
||||||
Reflection = I;
|
Reflection = I;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -59,8 +58,8 @@ shader node_texture_coordinate(
|
|||||||
}
|
}
|
||||||
Camera = transform("camera", P);
|
Camera = transform("camera", P);
|
||||||
Window = transform("NDC", P);
|
Window = transform("NDC", P);
|
||||||
Normal = transform("world", "object", NormalIn);
|
Normal = transform("world", "object", N);
|
||||||
Reflection = -reflect(I, NormalIn);
|
Reflection = -reflect(I, N);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bump_offset == "dx") {
|
if (bump_offset == "dx") {
|
||||||
|
@@ -5,9 +5,53 @@
|
|||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
struct DeviceString {
|
||||||
|
#if defined(__KERNEL_GPU__)
|
||||||
|
/* Strings are represented by their hashes in CUDA and OptiX. */
|
||||||
|
size_t str_;
|
||||||
|
|
||||||
|
ccl_device_inline_method uint64_t hash() const
|
||||||
|
{
|
||||||
|
return str_;
|
||||||
|
}
|
||||||
|
#elif defined(OPENIMAGEIO_USTRING_H)
|
||||||
|
ustring str_;
|
||||||
|
|
||||||
|
ccl_device_inline_method uint64_t hash() const
|
||||||
|
{
|
||||||
|
return str_.hash();
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
const char *str_;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ccl_device_inline_method bool operator==(DeviceString b) const
|
||||||
|
{
|
||||||
|
return str_ == b.str_;
|
||||||
|
}
|
||||||
|
ccl_device_inline_method bool operator!=(DeviceString b) const
|
||||||
|
{
|
||||||
|
return str_ != b.str_;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ccl_device_inline DeviceString make_string(const char *str, size_t hash)
|
||||||
|
{
|
||||||
|
#if defined(__KERNEL_GPU__)
|
||||||
|
(void)str;
|
||||||
|
return {hash};
|
||||||
|
#elif defined(OPENIMAGEIO_USTRING_H)
|
||||||
|
(void)hash;
|
||||||
|
return {ustring(str)};
|
||||||
|
#else
|
||||||
|
(void)hash;
|
||||||
|
return {str};
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/* Closure */
|
/* Closure */
|
||||||
|
|
||||||
enum ClosureTypeOSL {
|
enum OSLClosureType {
|
||||||
OSL_CLOSURE_MUL_ID = -1,
|
OSL_CLOSURE_MUL_ID = -1,
|
||||||
OSL_CLOSURE_ADD_ID = -2,
|
OSL_CLOSURE_ADD_ID = -2,
|
||||||
|
|
||||||
@@ -17,4 +61,60 @@ enum ClosureTypeOSL {
|
|||||||
#include "closures_template.h"
|
#include "closures_template.h"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct OSLClosure {
|
||||||
|
OSLClosureType id;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ccl_align(8) OSLClosureMul : public OSLClosure
|
||||||
|
{
|
||||||
|
packed_float3 weight;
|
||||||
|
ccl_private const OSLClosure *closure;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ccl_align(8) OSLClosureAdd : public OSLClosure
|
||||||
|
{
|
||||||
|
ccl_private const OSLClosure *closureA;
|
||||||
|
ccl_private const OSLClosure *closureB;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ccl_align(8) OSLClosureComponent : public OSLClosure
|
||||||
|
{
|
||||||
|
packed_float3 weight;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Globals */
|
||||||
|
|
||||||
|
struct ShaderGlobals {
|
||||||
|
packed_float3 P, dPdx, dPdy;
|
||||||
|
packed_float3 dPdz;
|
||||||
|
packed_float3 I, dIdx, dIdy;
|
||||||
|
packed_float3 N;
|
||||||
|
packed_float3 Ng;
|
||||||
|
float u, dudx, dudy;
|
||||||
|
float v, dvdx, dvdy;
|
||||||
|
packed_float3 dPdu, dPdv;
|
||||||
|
float time;
|
||||||
|
float dtime;
|
||||||
|
packed_float3 dPdtime;
|
||||||
|
packed_float3 Ps, dPsdx, dPsdy;
|
||||||
|
ccl_private void *renderstate;
|
||||||
|
ccl_private void *tracedata;
|
||||||
|
ccl_private void *objdata;
|
||||||
|
void *context;
|
||||||
|
void *renderer;
|
||||||
|
ccl_private void *object2common;
|
||||||
|
ccl_private void *shader2common;
|
||||||
|
ccl_private OSLClosure *Ci;
|
||||||
|
float surfacearea;
|
||||||
|
int raytype;
|
||||||
|
int flipHandedness;
|
||||||
|
int backfacing;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct OSLNoiseOptions {
|
||||||
|
};
|
||||||
|
|
||||||
|
struct OSLTextureOptions {
|
||||||
|
};
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -7,6 +7,25 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
ccl_device uint pmj_shuffled_sample_index(KernelGlobals kg, uint sample, uint dimension, uint seed)
|
||||||
|
{
|
||||||
|
const uint sample_count = kernel_data.integrator.pmj_sequence_size;
|
||||||
|
|
||||||
|
/* Shuffle the pattern order and sample index to better decorrelate
|
||||||
|
* dimensions and make the most of the finite patterns we have.
|
||||||
|
* The funky sample mask stuff is to ensure that we only shuffle
|
||||||
|
* *within* the current sample pattern, which is necessary to avoid
|
||||||
|
* early repeat pattern use. */
|
||||||
|
const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
|
||||||
|
/* sample_count should always be a power of two, so this results in a mask. */
|
||||||
|
const uint sample_mask = sample_count - 1;
|
||||||
|
const uint sample_shuffled = nested_uniform_scramble(sample,
|
||||||
|
hash_wang_seeded_uint(dimension, seed));
|
||||||
|
sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask);
|
||||||
|
|
||||||
|
return ((pattern_i * sample_count) + sample) % (sample_count * NUM_PMJ_PATTERNS);
|
||||||
|
}
|
||||||
|
|
||||||
ccl_device float pmj_sample_1D(KernelGlobals kg,
|
ccl_device float pmj_sample_1D(KernelGlobals kg,
|
||||||
uint sample,
|
uint sample,
|
||||||
const uint rng_hash,
|
const uint rng_hash,
|
||||||
@@ -20,22 +39,9 @@ ccl_device float pmj_sample_1D(KernelGlobals kg,
|
|||||||
seed = kernel_data.integrator.seed;
|
seed = kernel_data.integrator.seed;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Shuffle the pattern order and sample index to better decorrelate
|
|
||||||
* dimensions and make the most of the finite patterns we have.
|
|
||||||
* The funky sample mask stuff is to ensure that we only shuffle
|
|
||||||
* *within* the current sample pattern, which is necessary to avoid
|
|
||||||
* early repeat pattern use. */
|
|
||||||
const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
|
|
||||||
/* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */
|
|
||||||
const uint sample_mask = NUM_PMJ_SAMPLES - 1;
|
|
||||||
const uint sample_shuffled = nested_uniform_scramble(sample,
|
|
||||||
hash_wang_seeded_uint(dimension, seed));
|
|
||||||
sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask);
|
|
||||||
|
|
||||||
/* Fetch the sample. */
|
/* Fetch the sample. */
|
||||||
const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) %
|
const uint index = pmj_shuffled_sample_index(kg, sample, dimension, seed);
|
||||||
(NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS);
|
float x = kernel_data_fetch(sample_pattern_lut, index * NUM_PMJ_DIMENSIONS);
|
||||||
float x = kernel_data_fetch(sample_pattern_lut, index * 2);
|
|
||||||
|
|
||||||
/* Do limited Cranley-Patterson rotation when using scrambling distance. */
|
/* Do limited Cranley-Patterson rotation when using scrambling distance. */
|
||||||
if (kernel_data.integrator.scrambling_distance < 1.0f) {
|
if (kernel_data.integrator.scrambling_distance < 1.0f) {
|
||||||
@@ -61,23 +67,10 @@ ccl_device float2 pmj_sample_2D(KernelGlobals kg,
|
|||||||
seed = kernel_data.integrator.seed;
|
seed = kernel_data.integrator.seed;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Shuffle the pattern order and sample index to better decorrelate
|
|
||||||
* dimensions and make the most of the finite patterns we have.
|
|
||||||
* The funky sample mask stuff is to ensure that we only shuffle
|
|
||||||
* *within* the current sample pattern, which is necessary to avoid
|
|
||||||
* early repeat pattern use. */
|
|
||||||
const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
|
|
||||||
/* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */
|
|
||||||
const uint sample_mask = NUM_PMJ_SAMPLES - 1;
|
|
||||||
const uint sample_shuffled = nested_uniform_scramble(sample,
|
|
||||||
hash_wang_seeded_uint(dimension, seed));
|
|
||||||
sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask);
|
|
||||||
|
|
||||||
/* Fetch the sample. */
|
/* Fetch the sample. */
|
||||||
const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) %
|
const uint index = pmj_shuffled_sample_index(kg, sample, dimension, seed);
|
||||||
(NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS);
|
float x = kernel_data_fetch(sample_pattern_lut, index * NUM_PMJ_DIMENSIONS);
|
||||||
float x = kernel_data_fetch(sample_pattern_lut, index * 2);
|
float y = kernel_data_fetch(sample_pattern_lut, index * NUM_PMJ_DIMENSIONS + 1);
|
||||||
float y = kernel_data_fetch(sample_pattern_lut, index * 2 + 1);
|
|
||||||
|
|
||||||
/* Do limited Cranley-Patterson rotation when using scrambling distance. */
|
/* Do limited Cranley-Patterson rotation when using scrambling distance. */
|
||||||
if (kernel_data.integrator.scrambling_distance < 1.0f) {
|
if (kernel_data.integrator.scrambling_distance < 1.0f) {
|
||||||
|
@@ -75,10 +75,14 @@ CCL_NAMESPACE_BEGIN
|
|||||||
#define __VOLUME__
|
#define __VOLUME__
|
||||||
|
|
||||||
/* Device specific features */
|
/* Device specific features */
|
||||||
#ifndef __KERNEL_GPU__
|
#ifdef WITH_OSL
|
||||||
# ifdef WITH_OSL
|
# define __OSL__
|
||||||
# define __OSL__
|
# ifdef __KERNEL_OPTIX__
|
||||||
|
/* Kernels with OSL support are built separately in OptiX and don't need SVM. */
|
||||||
|
# undef __SVM__
|
||||||
# endif
|
# endif
|
||||||
|
#endif
|
||||||
|
#ifndef __KERNEL_GPU__
|
||||||
# ifdef WITH_PATH_GUIDING
|
# ifdef WITH_PATH_GUIDING
|
||||||
# define __PATH_GUIDING__
|
# define __PATH_GUIDING__
|
||||||
# endif
|
# endif
|
||||||
@@ -156,7 +160,8 @@ enum PathTraceDimension {
|
|||||||
PRNG_VOLUME_SCATTER_DISTANCE = 5,
|
PRNG_VOLUME_SCATTER_DISTANCE = 5,
|
||||||
PRNG_VOLUME_OFFSET = 6,
|
PRNG_VOLUME_OFFSET = 6,
|
||||||
PRNG_VOLUME_SHADE_OFFSET = 7,
|
PRNG_VOLUME_SHADE_OFFSET = 7,
|
||||||
PRNG_VOLUME_PHASE_GUIDING = 8,
|
PRNG_VOLUME_PHASE_GUIDING_DISTANCE = 8,
|
||||||
|
PRNG_VOLUME_PHASE_GUIDING_EQUIANGULAR = 9,
|
||||||
|
|
||||||
/* Subsurface random walk bounces */
|
/* Subsurface random walk bounces */
|
||||||
PRNG_SUBSURFACE_BSDF = 0,
|
PRNG_SUBSURFACE_BSDF = 0,
|
||||||
@@ -917,9 +922,13 @@ typedef struct ccl_align(16) ShaderData
|
|||||||
float ray_dP;
|
float ray_dP;
|
||||||
|
|
||||||
#ifdef __OSL__
|
#ifdef __OSL__
|
||||||
|
# ifdef __KERNEL_GPU__
|
||||||
|
ccl_private uint8_t *osl_closure_pool;
|
||||||
|
# else
|
||||||
const struct KernelGlobalsCPU *osl_globals;
|
const struct KernelGlobalsCPU *osl_globals;
|
||||||
const struct IntegratorStateCPU *osl_path_state;
|
const struct IntegratorStateCPU *osl_path_state;
|
||||||
const struct IntegratorShadowStateCPU *osl_shadow_path_state;
|
const struct IntegratorShadowStateCPU *osl_shadow_path_state;
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* LCG state for closures that require additional random numbers. */
|
/* LCG state for closures that require additional random numbers. */
|
||||||
@@ -1382,12 +1391,13 @@ static_assert_align(KernelShaderEvalInput, 16);
|
|||||||
|
|
||||||
/* Pre-computed sample table sizes for PMJ02 sampler.
|
/* Pre-computed sample table sizes for PMJ02 sampler.
|
||||||
*
|
*
|
||||||
* NOTE: divisions *must* be a power of two, and patterns
|
* NOTE: min and max samples *must* be a power of two, and patterns
|
||||||
* ideally should be as well.
|
* ideally should be as well.
|
||||||
*/
|
*/
|
||||||
#define NUM_PMJ_DIVISIONS 32
|
#define MIN_PMJ_SAMPLES 256
|
||||||
#define NUM_PMJ_SAMPLES ((NUM_PMJ_DIVISIONS) * (NUM_PMJ_DIVISIONS))
|
#define MAX_PMJ_SAMPLES 8192
|
||||||
#define NUM_PMJ_PATTERNS 64
|
#define NUM_PMJ_DIMENSIONS 2
|
||||||
|
#define NUM_PMJ_PATTERNS 256
|
||||||
|
|
||||||
/* Device kernels.
|
/* Device kernels.
|
||||||
*
|
*
|
||||||
@@ -1529,6 +1539,9 @@ enum KernelFeatureFlag : uint32_t {
|
|||||||
|
|
||||||
/* Path guiding. */
|
/* Path guiding. */
|
||||||
KERNEL_FEATURE_PATH_GUIDING = (1U << 26U),
|
KERNEL_FEATURE_PATH_GUIDING = (1U << 26U),
|
||||||
|
|
||||||
|
/* OSL. */
|
||||||
|
KERNEL_FEATURE_OSL = (1U << 27U),
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Shader node feature mask, to specialize shader evaluation for kernels. */
|
/* Shader node feature mask, to specialize shader evaluation for kernels. */
|
||||||
|
@@ -257,12 +257,18 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
|
|||||||
kintegrator->light_inv_rr_threshold = 0.0f;
|
kintegrator->light_inv_rr_threshold = 0.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr int num_sequences = NUM_PMJ_PATTERNS;
|
||||||
|
int sequence_size = clamp(next_power_of_two(aa_samples - 1), MIN_PMJ_SAMPLES, MAX_PMJ_SAMPLES);
|
||||||
if (kintegrator->sampling_pattern == SAMPLING_PATTERN_PMJ &&
|
if (kintegrator->sampling_pattern == SAMPLING_PATTERN_PMJ &&
|
||||||
dscene->sample_pattern_lut.size() == 0) {
|
dscene->sample_pattern_lut.size() !=
|
||||||
constexpr int sequence_size = NUM_PMJ_SAMPLES;
|
(sequence_size * NUM_PMJ_DIMENSIONS * NUM_PMJ_PATTERNS)) {
|
||||||
constexpr int num_sequences = NUM_PMJ_PATTERNS;
|
kintegrator->pmj_sequence_size = sequence_size;
|
||||||
|
|
||||||
|
if (dscene->sample_pattern_lut.size() != 0) {
|
||||||
|
dscene->sample_pattern_lut.free();
|
||||||
|
}
|
||||||
float2 *directions = (float2 *)dscene->sample_pattern_lut.alloc(sequence_size * num_sequences *
|
float2 *directions = (float2 *)dscene->sample_pattern_lut.alloc(sequence_size * num_sequences *
|
||||||
2);
|
NUM_PMJ_DIMENSIONS);
|
||||||
TaskPool pool;
|
TaskPool pool;
|
||||||
for (int j = 0; j < num_sequences; ++j) {
|
for (int j = 0; j < num_sequences; ++j) {
|
||||||
float2 *sequence = directions + j * sequence_size;
|
float2 *sequence = directions + j * sequence_size;
|
||||||
|
@@ -57,7 +57,8 @@ struct UpdateObjectTransformState {
|
|||||||
/* Flags which will be synchronized to Integrator. */
|
/* Flags which will be synchronized to Integrator. */
|
||||||
bool have_motion;
|
bool have_motion;
|
||||||
bool have_curves;
|
bool have_curves;
|
||||||
// bool have_points;
|
bool have_points;
|
||||||
|
bool have_volumes;
|
||||||
|
|
||||||
/* ** Scheduling queue. ** */
|
/* ** Scheduling queue. ** */
|
||||||
Scene *scene;
|
Scene *scene;
|
||||||
@@ -545,6 +546,12 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
|
|||||||
if (geom->geometry_type == Geometry::HAIR) {
|
if (geom->geometry_type == Geometry::HAIR) {
|
||||||
state->have_curves = true;
|
state->have_curves = true;
|
||||||
}
|
}
|
||||||
|
if (geom->geometry_type == Geometry::POINTCLOUD) {
|
||||||
|
state->have_points = true;
|
||||||
|
}
|
||||||
|
if (geom->geometry_type == Geometry::VOLUME) {
|
||||||
|
state->have_volumes = true;
|
||||||
|
}
|
||||||
|
|
||||||
/* Light group. */
|
/* Light group. */
|
||||||
auto it = scene->lightgroups.find(ob->lightgroup);
|
auto it = scene->lightgroups.find(ob->lightgroup);
|
||||||
@@ -591,6 +598,8 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene,
|
|||||||
state.need_motion = scene->need_motion();
|
state.need_motion = scene->need_motion();
|
||||||
state.have_motion = false;
|
state.have_motion = false;
|
||||||
state.have_curves = false;
|
state.have_curves = false;
|
||||||
|
state.have_points = false;
|
||||||
|
state.have_volumes = false;
|
||||||
state.scene = scene;
|
state.scene = scene;
|
||||||
state.queue_start_object = 0;
|
state.queue_start_object = 0;
|
||||||
|
|
||||||
@@ -658,6 +667,8 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene,
|
|||||||
|
|
||||||
dscene->data.bvh.have_motion = state.have_motion;
|
dscene->data.bvh.have_motion = state.have_motion;
|
||||||
dscene->data.bvh.have_curves = state.have_curves;
|
dscene->data.bvh.have_curves = state.have_curves;
|
||||||
|
dscene->data.bvh.have_points = state.have_points;
|
||||||
|
dscene->data.bvh.have_volumes = state.have_volumes;
|
||||||
|
|
||||||
dscene->objects.clear_modified();
|
dscene->objects.clear_modified();
|
||||||
dscene->object_motion_pass.clear_modified();
|
dscene->object_motion_pass.clear_modified();
|
||||||
|
@@ -38,16 +38,17 @@ OSL::TextureSystem *OSLShaderManager::ts_shared = NULL;
|
|||||||
int OSLShaderManager::ts_shared_users = 0;
|
int OSLShaderManager::ts_shared_users = 0;
|
||||||
thread_mutex OSLShaderManager::ts_shared_mutex;
|
thread_mutex OSLShaderManager::ts_shared_mutex;
|
||||||
|
|
||||||
OSL::ShadingSystem *OSLShaderManager::ss_shared = NULL;
|
OSL::ErrorHandler OSLShaderManager::errhandler;
|
||||||
OSLRenderServices *OSLShaderManager::services_shared = NULL;
|
map<int, OSL::ShadingSystem *> OSLShaderManager::ss_shared;
|
||||||
int OSLShaderManager::ss_shared_users = 0;
|
int OSLShaderManager::ss_shared_users = 0;
|
||||||
thread_mutex OSLShaderManager::ss_shared_mutex;
|
thread_mutex OSLShaderManager::ss_shared_mutex;
|
||||||
thread_mutex OSLShaderManager::ss_mutex;
|
thread_mutex OSLShaderManager::ss_mutex;
|
||||||
|
|
||||||
int OSLCompiler::texture_shared_unique_id = 0;
|
int OSLCompiler::texture_shared_unique_id = 0;
|
||||||
|
|
||||||
/* Shader Manager */
|
/* Shader Manager */
|
||||||
|
|
||||||
OSLShaderManager::OSLShaderManager()
|
OSLShaderManager::OSLShaderManager(Device *device) : device_(device)
|
||||||
{
|
{
|
||||||
texture_system_init();
|
texture_system_init();
|
||||||
shading_system_init();
|
shading_system_init();
|
||||||
@@ -107,11 +108,12 @@ void OSLShaderManager::device_update_specific(Device *device,
|
|||||||
|
|
||||||
device_free(device, dscene, scene);
|
device_free(device, dscene, scene);
|
||||||
|
|
||||||
/* set texture system */
|
/* set texture system (only on CPU devices, since GPU devices cannot use OIIO) */
|
||||||
scene->image_manager->set_osl_texture_system((void *)ts);
|
if (device->info.type == DEVICE_CPU) {
|
||||||
|
scene->image_manager->set_osl_texture_system((void *)ts_shared);
|
||||||
|
}
|
||||||
|
|
||||||
/* create shaders */
|
/* create shaders */
|
||||||
OSLGlobals *og = (OSLGlobals *)device->get_cpu_osl_memory();
|
|
||||||
Shader *background_shader = scene->background->get_shader(scene);
|
Shader *background_shader = scene->background->get_shader(scene);
|
||||||
|
|
||||||
foreach (Shader *shader, scene->shaders) {
|
foreach (Shader *shader, scene->shaders) {
|
||||||
@@ -125,22 +127,34 @@ void OSLShaderManager::device_update_specific(Device *device,
|
|||||||
* compile shaders alternating */
|
* compile shaders alternating */
|
||||||
thread_scoped_lock lock(ss_mutex);
|
thread_scoped_lock lock(ss_mutex);
|
||||||
|
|
||||||
OSLCompiler compiler(this, services, ss, scene);
|
device->foreach_device(
|
||||||
compiler.background = (shader == background_shader);
|
[this, scene, shader, background = (shader == background_shader)](Device *sub_device) {
|
||||||
compiler.compile(og, shader);
|
OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory();
|
||||||
|
OSL::ShadingSystem *ss = ss_shared[sub_device->info.type];
|
||||||
|
|
||||||
|
OSLCompiler compiler(this, ss, scene);
|
||||||
|
compiler.background = background;
|
||||||
|
compiler.compile(og, shader);
|
||||||
|
});
|
||||||
|
|
||||||
if (shader->get_use_mis() && shader->has_surface_emission)
|
if (shader->get_use_mis() && shader->has_surface_emission)
|
||||||
scene->light_manager->tag_update(scene, LightManager::SHADER_COMPILED);
|
scene->light_manager->tag_update(scene, LightManager::SHADER_COMPILED);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* setup shader engine */
|
/* setup shader engine */
|
||||||
og->ss = ss;
|
|
||||||
og->ts = ts;
|
|
||||||
og->services = services;
|
|
||||||
|
|
||||||
int background_id = scene->shader_manager->get_shader_id(background_shader);
|
int background_id = scene->shader_manager->get_shader_id(background_shader);
|
||||||
og->background_state = og->surface_state[background_id & SHADER_MASK];
|
|
||||||
og->use = true;
|
device->foreach_device([background_id](Device *sub_device) {
|
||||||
|
OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory();
|
||||||
|
OSL::ShadingSystem *ss = ss_shared[sub_device->info.type];
|
||||||
|
|
||||||
|
og->ss = ss;
|
||||||
|
og->ts = ts_shared;
|
||||||
|
og->services = static_cast<OSLRenderServices *>(ss->renderer());
|
||||||
|
|
||||||
|
og->background_state = og->surface_state[background_id & SHADER_MASK];
|
||||||
|
og->use = true;
|
||||||
|
});
|
||||||
|
|
||||||
foreach (Shader *shader, scene->shaders)
|
foreach (Shader *shader, scene->shaders)
|
||||||
shader->clear_modified();
|
shader->clear_modified();
|
||||||
@@ -148,8 +162,12 @@ void OSLShaderManager::device_update_specific(Device *device,
|
|||||||
update_flags = UPDATE_NONE;
|
update_flags = UPDATE_NONE;
|
||||||
|
|
||||||
/* add special builtin texture types */
|
/* add special builtin texture types */
|
||||||
services->textures.insert(ustring("@ao"), new OSLTextureHandle(OSLTextureHandle::AO));
|
for (const auto &[device_type, ss] : ss_shared) {
|
||||||
services->textures.insert(ustring("@bevel"), new OSLTextureHandle(OSLTextureHandle::BEVEL));
|
OSLRenderServices *services = static_cast<OSLRenderServices *>(ss->renderer());
|
||||||
|
|
||||||
|
services->textures.insert(ustring("@ao"), new OSLTextureHandle(OSLTextureHandle::AO));
|
||||||
|
services->textures.insert(ustring("@bevel"), new OSLTextureHandle(OSLTextureHandle::BEVEL));
|
||||||
|
}
|
||||||
|
|
||||||
device_update_common(device, dscene, scene, progress);
|
device_update_common(device, dscene, scene, progress);
|
||||||
|
|
||||||
@@ -166,26 +184,35 @@ void OSLShaderManager::device_update_specific(Device *device,
|
|||||||
* is being freed after the Session is freed.
|
* is being freed after the Session is freed.
|
||||||
*/
|
*/
|
||||||
thread_scoped_lock lock(ss_shared_mutex);
|
thread_scoped_lock lock(ss_shared_mutex);
|
||||||
ss->optimize_all_groups();
|
for (const auto &[device_type, ss] : ss_shared) {
|
||||||
|
ss->optimize_all_groups();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* load kernels */
|
||||||
|
if (!device->load_osl_kernels()) {
|
||||||
|
progress.set_error(device->error_message());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void OSLShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene)
|
void OSLShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene)
|
||||||
{
|
{
|
||||||
OSLGlobals *og = (OSLGlobals *)device->get_cpu_osl_memory();
|
|
||||||
|
|
||||||
device_free_common(device, dscene, scene);
|
device_free_common(device, dscene, scene);
|
||||||
|
|
||||||
/* clear shader engine */
|
/* clear shader engine */
|
||||||
og->use = false;
|
device->foreach_device([](Device *sub_device) {
|
||||||
og->ss = NULL;
|
OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory();
|
||||||
og->ts = NULL;
|
|
||||||
|
|
||||||
og->surface_state.clear();
|
og->use = false;
|
||||||
og->volume_state.clear();
|
og->ss = NULL;
|
||||||
og->displacement_state.clear();
|
og->ts = NULL;
|
||||||
og->bump_state.clear();
|
|
||||||
og->background_state.reset();
|
og->surface_state.clear();
|
||||||
|
og->volume_state.clear();
|
||||||
|
og->displacement_state.clear();
|
||||||
|
og->bump_state.clear();
|
||||||
|
og->background_state.reset();
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void OSLShaderManager::texture_system_init()
|
void OSLShaderManager::texture_system_init()
|
||||||
@@ -193,7 +220,7 @@ void OSLShaderManager::texture_system_init()
|
|||||||
/* create texture system, shared between different renders to reduce memory usage */
|
/* create texture system, shared between different renders to reduce memory usage */
|
||||||
thread_scoped_lock lock(ts_shared_mutex);
|
thread_scoped_lock lock(ts_shared_mutex);
|
||||||
|
|
||||||
if (ts_shared_users == 0) {
|
if (ts_shared_users++ == 0) {
|
||||||
ts_shared = TextureSystem::create(true);
|
ts_shared = TextureSystem::create(true);
|
||||||
|
|
||||||
ts_shared->attribute("automip", 1);
|
ts_shared->attribute("automip", 1);
|
||||||
@@ -203,24 +230,18 @@ void OSLShaderManager::texture_system_init()
|
|||||||
/* effectively unlimited for now, until we support proper mipmap lookups */
|
/* effectively unlimited for now, until we support proper mipmap lookups */
|
||||||
ts_shared->attribute("max_memory_MB", 16384);
|
ts_shared->attribute("max_memory_MB", 16384);
|
||||||
}
|
}
|
||||||
|
|
||||||
ts = ts_shared;
|
|
||||||
ts_shared_users++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void OSLShaderManager::texture_system_free()
|
void OSLShaderManager::texture_system_free()
|
||||||
{
|
{
|
||||||
/* shared texture system decrease users and destroy if no longer used */
|
/* shared texture system decrease users and destroy if no longer used */
|
||||||
thread_scoped_lock lock(ts_shared_mutex);
|
thread_scoped_lock lock(ts_shared_mutex);
|
||||||
ts_shared_users--;
|
|
||||||
|
|
||||||
if (ts_shared_users == 0) {
|
if (--ts_shared_users == 0) {
|
||||||
ts_shared->invalidate_all(true);
|
ts_shared->invalidate_all(true);
|
||||||
OSL::TextureSystem::destroy(ts_shared);
|
OSL::TextureSystem::destroy(ts_shared);
|
||||||
ts_shared = NULL;
|
ts_shared = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
ts = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void OSLShaderManager::shading_system_init()
|
void OSLShaderManager::shading_system_init()
|
||||||
@@ -228,101 +249,105 @@ void OSLShaderManager::shading_system_init()
|
|||||||
/* create shading system, shared between different renders to reduce memory usage */
|
/* create shading system, shared between different renders to reduce memory usage */
|
||||||
thread_scoped_lock lock(ss_shared_mutex);
|
thread_scoped_lock lock(ss_shared_mutex);
|
||||||
|
|
||||||
if (ss_shared_users == 0) {
|
device_->foreach_device([](Device *sub_device) {
|
||||||
/* Must use aligned new due to concurrent hash map. */
|
const DeviceType device_type = sub_device->info.type;
|
||||||
services_shared = util_aligned_new<OSLRenderServices>(ts_shared);
|
|
||||||
|
|
||||||
string shader_path = path_get("shader");
|
if (ss_shared_users++ == 0 || ss_shared.find(device_type) == ss_shared.end()) {
|
||||||
|
/* Must use aligned new due to concurrent hash map. */
|
||||||
|
OSLRenderServices *services = util_aligned_new<OSLRenderServices>(ts_shared, device_type);
|
||||||
|
|
||||||
|
string shader_path = path_get("shader");
|
||||||
# ifdef _WIN32
|
# ifdef _WIN32
|
||||||
/* Annoying thing, Cycles stores paths in UTF-8 codepage, so it can
|
/* Annoying thing, Cycles stores paths in UTF-8 codepage, so it can
|
||||||
* operate with file paths with any character. This requires to use wide
|
* operate with file paths with any character. This requires to use wide
|
||||||
* char functions, but OSL uses old fashioned ANSI functions which means:
|
* char functions, but OSL uses old fashioned ANSI functions which means:
|
||||||
*
|
*
|
||||||
* - We have to convert our paths to ANSI before passing to OSL
|
* - We have to convert our paths to ANSI before passing to OSL
|
||||||
* - OSL can't be used when there's a multi-byte character in the path
|
* - OSL can't be used when there's a multi-byte character in the path
|
||||||
* to the shaders folder.
|
* to the shaders folder.
|
||||||
*/
|
*/
|
||||||
shader_path = string_to_ansi(shader_path);
|
shader_path = string_to_ansi(shader_path);
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
ss_shared = new OSL::ShadingSystem(services_shared, ts_shared, &errhandler);
|
OSL::ShadingSystem *ss = new OSL::ShadingSystem(services, ts_shared, &errhandler);
|
||||||
ss_shared->attribute("lockgeom", 1);
|
ss->attribute("lockgeom", 1);
|
||||||
ss_shared->attribute("commonspace", "world");
|
ss->attribute("commonspace", "world");
|
||||||
ss_shared->attribute("searchpath:shader", shader_path);
|
ss->attribute("searchpath:shader", shader_path);
|
||||||
ss_shared->attribute("greedyjit", 1);
|
ss->attribute("greedyjit", 1);
|
||||||
|
|
||||||
VLOG_INFO << "Using shader search path: " << shader_path;
|
VLOG_INFO << "Using shader search path: " << shader_path;
|
||||||
|
|
||||||
/* our own ray types */
|
/* our own ray types */
|
||||||
static const char *raytypes[] = {
|
static const char *raytypes[] = {
|
||||||
"camera", /* PATH_RAY_CAMERA */
|
"camera", /* PATH_RAY_CAMERA */
|
||||||
"reflection", /* PATH_RAY_REFLECT */
|
"reflection", /* PATH_RAY_REFLECT */
|
||||||
"refraction", /* PATH_RAY_TRANSMIT */
|
"refraction", /* PATH_RAY_TRANSMIT */
|
||||||
"diffuse", /* PATH_RAY_DIFFUSE */
|
"diffuse", /* PATH_RAY_DIFFUSE */
|
||||||
"glossy", /* PATH_RAY_GLOSSY */
|
"glossy", /* PATH_RAY_GLOSSY */
|
||||||
"singular", /* PATH_RAY_SINGULAR */
|
"singular", /* PATH_RAY_SINGULAR */
|
||||||
"transparent", /* PATH_RAY_TRANSPARENT */
|
"transparent", /* PATH_RAY_TRANSPARENT */
|
||||||
"volume_scatter", /* PATH_RAY_VOLUME_SCATTER */
|
"volume_scatter", /* PATH_RAY_VOLUME_SCATTER */
|
||||||
|
|
||||||
"shadow", /* PATH_RAY_SHADOW_OPAQUE */
|
"shadow", /* PATH_RAY_SHADOW_OPAQUE */
|
||||||
"shadow", /* PATH_RAY_SHADOW_TRANSPARENT */
|
"shadow", /* PATH_RAY_SHADOW_TRANSPARENT */
|
||||||
|
|
||||||
"__unused__", /* PATH_RAY_NODE_UNALIGNED */
|
"__unused__", /* PATH_RAY_NODE_UNALIGNED */
|
||||||
"__unused__", /* PATH_RAY_MIS_SKIP */
|
"__unused__", /* PATH_RAY_MIS_SKIP */
|
||||||
|
|
||||||
"diffuse_ancestor", /* PATH_RAY_DIFFUSE_ANCESTOR */
|
"diffuse_ancestor", /* PATH_RAY_DIFFUSE_ANCESTOR */
|
||||||
|
|
||||||
/* Remaining irrelevant bits up to 32. */
|
/* Remaining irrelevant bits up to 32. */
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
"__unused__",
|
"__unused__",
|
||||||
};
|
};
|
||||||
|
|
||||||
const int nraytypes = sizeof(raytypes) / sizeof(raytypes[0]);
|
const int nraytypes = sizeof(raytypes) / sizeof(raytypes[0]);
|
||||||
ss_shared->attribute("raytypes", TypeDesc(TypeDesc::STRING, nraytypes), raytypes);
|
ss->attribute("raytypes", TypeDesc(TypeDesc::STRING, nraytypes), raytypes);
|
||||||
|
|
||||||
OSLRenderServices::register_closures(ss_shared);
|
OSLRenderServices::register_closures(ss);
|
||||||
|
|
||||||
loaded_shaders.clear();
|
ss_shared[device_type] = ss;
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
|
||||||
ss = ss_shared;
|
loaded_shaders.clear();
|
||||||
services = services_shared;
|
|
||||||
ss_shared_users++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void OSLShaderManager::shading_system_free()
|
void OSLShaderManager::shading_system_free()
|
||||||
{
|
{
|
||||||
/* shared shading system decrease users and destroy if no longer used */
|
/* shared shading system decrease users and destroy if no longer used */
|
||||||
thread_scoped_lock lock(ss_shared_mutex);
|
thread_scoped_lock lock(ss_shared_mutex);
|
||||||
ss_shared_users--;
|
|
||||||
|
|
||||||
if (ss_shared_users == 0) {
|
device_->foreach_device([](Device * /*sub_device*/) {
|
||||||
delete ss_shared;
|
if (--ss_shared_users == 0) {
|
||||||
ss_shared = NULL;
|
for (const auto &[device_type, ss] : ss_shared) {
|
||||||
|
OSLRenderServices *services = static_cast<OSLRenderServices *>(ss->renderer());
|
||||||
|
|
||||||
util_aligned_delete(services_shared);
|
delete ss;
|
||||||
services_shared = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
ss = NULL;
|
util_aligned_delete(services);
|
||||||
services = NULL;
|
}
|
||||||
|
|
||||||
|
ss_shared.clear();
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OSLShaderManager::osl_compile(const string &inputfile, const string &outputfile)
|
bool OSLShaderManager::osl_compile(const string &inputfile, const string &outputfile)
|
||||||
@@ -447,7 +472,9 @@ const char *OSLShaderManager::shader_load_filepath(string filepath)
|
|||||||
|
|
||||||
const char *OSLShaderManager::shader_load_bytecode(const string &hash, const string &bytecode)
|
const char *OSLShaderManager::shader_load_bytecode(const string &hash, const string &bytecode)
|
||||||
{
|
{
|
||||||
ss->LoadMemoryCompiledShader(hash.c_str(), bytecode.c_str());
|
for (const auto &[device_type, ss] : ss_shared) {
|
||||||
|
ss->LoadMemoryCompiledShader(hash.c_str(), bytecode.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
OSLShaderInfo info;
|
OSLShaderInfo info;
|
||||||
|
|
||||||
@@ -525,6 +552,7 @@ OSLNode *OSLShaderManager::osl_node(ShaderGraph *graph,
|
|||||||
|
|
||||||
SocketType::Type socket_type;
|
SocketType::Type socket_type;
|
||||||
|
|
||||||
|
/* Read type and default value. */
|
||||||
if (param->isclosure) {
|
if (param->isclosure) {
|
||||||
socket_type = SocketType::CLOSURE;
|
socket_type = SocketType::CLOSURE;
|
||||||
}
|
}
|
||||||
@@ -579,7 +607,21 @@ OSLNode *OSLShaderManager::osl_node(ShaderGraph *graph,
|
|||||||
node->add_output(param->name, socket_type);
|
node->add_output(param->name, socket_type);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
node->add_input(param->name, socket_type);
|
/* Detect if we should leave parameter initialization to OSL, either though
|
||||||
|
* not constant default or widget metadata. */
|
||||||
|
int socket_flags = 0;
|
||||||
|
if (!param->validdefault) {
|
||||||
|
socket_flags |= SocketType::LINK_OSL_INITIALIZER;
|
||||||
|
}
|
||||||
|
for (const OSL::OSLQuery::Parameter &metadata : param->metadata) {
|
||||||
|
if (metadata.type == TypeDesc::STRING) {
|
||||||
|
if (metadata.name == "widget" && metadata.sdefault[0] == "null") {
|
||||||
|
socket_flags |= SocketType::LINK_OSL_INITIALIZER;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node->add_input(param->name, socket_type, socket_flags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -599,11 +641,11 @@ OSLNode *OSLShaderManager::osl_node(ShaderGraph *graph,
|
|||||||
|
|
||||||
/* Graph Compiler */
|
/* Graph Compiler */
|
||||||
|
|
||||||
OSLCompiler::OSLCompiler(OSLShaderManager *manager,
|
OSLCompiler::OSLCompiler(OSLShaderManager *manager, OSL::ShadingSystem *ss, Scene *scene)
|
||||||
OSLRenderServices *services,
|
: scene(scene),
|
||||||
OSL::ShadingSystem *ss,
|
manager(manager),
|
||||||
Scene *scene)
|
services(static_cast<OSLRenderServices *>(ss->renderer())),
|
||||||
: scene(scene), manager(manager), services(services), ss(ss)
|
ss(ss)
|
||||||
{
|
{
|
||||||
current_type = SHADER_TYPE_SURFACE;
|
current_type = SHADER_TYPE_SURFACE;
|
||||||
current_shader = NULL;
|
current_shader = NULL;
|
||||||
@@ -614,6 +656,8 @@ string OSLCompiler::id(ShaderNode *node)
|
|||||||
{
|
{
|
||||||
/* assign layer unique name based on pointer address + bump mode */
|
/* assign layer unique name based on pointer address + bump mode */
|
||||||
stringstream stream;
|
stringstream stream;
|
||||||
|
stream.imbue(std::locale("C")); /* Ensure that no grouping characters (e.g. commas with en_US
|
||||||
|
locale) are added to the pointer string */
|
||||||
stream << "node_" << node->type->name << "_" << node;
|
stream << "node_" << node->type->name << "_" << node;
|
||||||
|
|
||||||
return stream.str();
|
return stream.str();
|
||||||
@@ -702,8 +746,12 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath)
|
|||||||
foreach (ShaderInput *input, node->inputs) {
|
foreach (ShaderInput *input, node->inputs) {
|
||||||
if (!input->link) {
|
if (!input->link) {
|
||||||
/* checks to untangle graphs */
|
/* checks to untangle graphs */
|
||||||
if (node_skip_input(node, input))
|
if (node_skip_input(node, input)) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
if ((input->flags() & SocketType::LINK_OSL_INITIALIZER) && !(input->constant_folded_in)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
string param_name = compatible_name(node, input);
|
string param_name = compatible_name(node, input);
|
||||||
const SocketType &socket = input->socket_type;
|
const SocketType &socket = input->socket_type;
|
||||||
@@ -1105,7 +1153,12 @@ OSL::ShaderGroupRef OSLCompiler::compile_type(Shader *shader, ShaderGraph *graph
|
|||||||
{
|
{
|
||||||
current_type = type;
|
current_type = type;
|
||||||
|
|
||||||
OSL::ShaderGroupRef group = ss->ShaderGroupBegin(shader->name.c_str());
|
/* Use name hash to identify shader group to avoid issues with non-alphanumeric characters */
|
||||||
|
stringstream name;
|
||||||
|
name.imbue(std::locale("C"));
|
||||||
|
name << "shader_" << shader->name.hash();
|
||||||
|
|
||||||
|
OSL::ShaderGroupRef group = ss->ShaderGroupBegin(name.str());
|
||||||
|
|
||||||
ShaderNode *output = graph->output();
|
ShaderNode *output = graph->output();
|
||||||
ShaderNodeSet dependencies;
|
ShaderNodeSet dependencies;
|
||||||
|
@@ -54,7 +54,7 @@ struct OSLShaderInfo {
|
|||||||
|
|
||||||
class OSLShaderManager : public ShaderManager {
|
class OSLShaderManager : public ShaderManager {
|
||||||
public:
|
public:
|
||||||
OSLShaderManager();
|
OSLShaderManager(Device *device);
|
||||||
~OSLShaderManager();
|
~OSLShaderManager();
|
||||||
|
|
||||||
static void free_memory();
|
static void free_memory();
|
||||||
@@ -92,25 +92,22 @@ class OSLShaderManager : public ShaderManager {
|
|||||||
const std::string &bytecode_hash = "",
|
const std::string &bytecode_hash = "",
|
||||||
const std::string &bytecode = "");
|
const std::string &bytecode = "");
|
||||||
|
|
||||||
protected:
|
private:
|
||||||
void texture_system_init();
|
void texture_system_init();
|
||||||
void texture_system_free();
|
void texture_system_free();
|
||||||
|
|
||||||
void shading_system_init();
|
void shading_system_init();
|
||||||
void shading_system_free();
|
void shading_system_free();
|
||||||
|
|
||||||
OSL::ShadingSystem *ss;
|
Device *device_;
|
||||||
OSL::TextureSystem *ts;
|
|
||||||
OSLRenderServices *services;
|
|
||||||
OSL::ErrorHandler errhandler;
|
|
||||||
map<string, OSLShaderInfo> loaded_shaders;
|
map<string, OSLShaderInfo> loaded_shaders;
|
||||||
|
|
||||||
static OSL::TextureSystem *ts_shared;
|
static OSL::TextureSystem *ts_shared;
|
||||||
static thread_mutex ts_shared_mutex;
|
static thread_mutex ts_shared_mutex;
|
||||||
static int ts_shared_users;
|
static int ts_shared_users;
|
||||||
|
|
||||||
static OSL::ShadingSystem *ss_shared;
|
static OSL::ErrorHandler errhandler;
|
||||||
static OSLRenderServices *services_shared;
|
static map<int, OSL::ShadingSystem *> ss_shared;
|
||||||
static thread_mutex ss_shared_mutex;
|
static thread_mutex ss_shared_mutex;
|
||||||
static thread_mutex ss_mutex;
|
static thread_mutex ss_mutex;
|
||||||
static int ss_shared_users;
|
static int ss_shared_users;
|
||||||
@@ -123,10 +120,7 @@ class OSLShaderManager : public ShaderManager {
|
|||||||
class OSLCompiler {
|
class OSLCompiler {
|
||||||
public:
|
public:
|
||||||
#ifdef WITH_OSL
|
#ifdef WITH_OSL
|
||||||
OSLCompiler(OSLShaderManager *manager,
|
OSLCompiler(OSLShaderManager *manager, OSL::ShadingSystem *shadingsys, Scene *scene);
|
||||||
OSLRenderServices *services,
|
|
||||||
OSL::ShadingSystem *shadingsys,
|
|
||||||
Scene *scene);
|
|
||||||
#endif
|
#endif
|
||||||
void compile(OSLGlobals *og, Shader *shader);
|
void compile(OSLGlobals *og, Shader *shader);
|
||||||
|
|
||||||
|
@@ -99,11 +99,8 @@ Scene::Scene(const SceneParams ¶ms_, Device *device)
|
|||||||
{
|
{
|
||||||
memset((void *)&dscene.data, 0, sizeof(dscene.data));
|
memset((void *)&dscene.data, 0, sizeof(dscene.data));
|
||||||
|
|
||||||
/* OSL only works on the CPU */
|
shader_manager = ShaderManager::create(
|
||||||
if (device->info.has_osl)
|
device->info.has_osl ? params.shadingsystem : SHADINGSYSTEM_SVM, device);
|
||||||
shader_manager = ShaderManager::create(params.shadingsystem);
|
|
||||||
else
|
|
||||||
shader_manager = ShaderManager::create(SHADINGSYSTEM_SVM);
|
|
||||||
|
|
||||||
light_manager = new LightManager();
|
light_manager = new LightManager();
|
||||||
geometry_manager = new GeometryManager();
|
geometry_manager = new GeometryManager();
|
||||||
@@ -488,6 +485,8 @@ void Scene::update_kernel_features()
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
thread_scoped_lock scene_lock(mutex);
|
||||||
|
|
||||||
/* These features are not being tweaked as often as shaders,
|
/* These features are not being tweaked as often as shaders,
|
||||||
* so could be done selective magic for the viewport as well. */
|
* so could be done selective magic for the viewport as well. */
|
||||||
uint kernel_features = shader_manager->get_kernel_features(this);
|
uint kernel_features = shader_manager->get_kernel_features(this);
|
||||||
@@ -574,9 +573,6 @@ bool Scene::update(Progress &progress)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Load render kernels, before device update where we upload data to the GPU. */
|
|
||||||
load_kernels(progress, false);
|
|
||||||
|
|
||||||
/* Upload scene data to the GPU. */
|
/* Upload scene data to the GPU. */
|
||||||
progress.set_status("Updating Scene");
|
progress.set_status("Updating Scene");
|
||||||
MEM_GUARDED_CALL(&progress, device_update, device, progress);
|
MEM_GUARDED_CALL(&progress, device_update, device, progress);
|
||||||
@@ -616,13 +612,8 @@ static void log_kernel_features(const uint features)
|
|||||||
<< "\n";
|
<< "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Scene::load_kernels(Progress &progress, bool lock_scene)
|
bool Scene::load_kernels(Progress &progress)
|
||||||
{
|
{
|
||||||
thread_scoped_lock scene_lock;
|
|
||||||
if (lock_scene) {
|
|
||||||
scene_lock = thread_scoped_lock(mutex);
|
|
||||||
}
|
|
||||||
|
|
||||||
update_kernel_features();
|
update_kernel_features();
|
||||||
|
|
||||||
const uint kernel_features = dscene.data.kernel_features;
|
const uint kernel_features = dscene.data.kernel_features;
|
||||||
|
@@ -270,6 +270,7 @@ class Scene : public NodeOwner {
|
|||||||
|
|
||||||
void enable_update_stats();
|
void enable_update_stats();
|
||||||
|
|
||||||
|
bool load_kernels(Progress &progress);
|
||||||
bool update(Progress &progress);
|
bool update(Progress &progress);
|
||||||
|
|
||||||
bool has_shadow_catcher();
|
bool has_shadow_catcher();
|
||||||
@@ -333,7 +334,6 @@ class Scene : public NodeOwner {
|
|||||||
uint loaded_kernel_features;
|
uint loaded_kernel_features;
|
||||||
|
|
||||||
void update_kernel_features();
|
void update_kernel_features();
|
||||||
bool load_kernels(Progress &progress, bool lock_scene = true);
|
|
||||||
|
|
||||||
bool has_shadow_catcher_ = false;
|
bool has_shadow_catcher_ = false;
|
||||||
bool shadow_catcher_modified_ = true;
|
bool shadow_catcher_modified_ = true;
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user