Compare commits
720 Commits
gpu-shader
...
temp-llvm-
Author | SHA1 | Date | |
---|---|---|---|
dbc8b52752 | |||
ab6a116334 | |||
077debe17f | |||
33d6b09d3d | |||
f92a1e20bc | |||
c1e014f2a1 | |||
6a69a32c6d | |||
cc32f73a29 | |||
d4367fa8e0 | |||
bb0da7dbbd | |||
5e8b42bf86 | |||
a94d80716e | |||
a836ded990 | |||
279085e18e | |||
d5b77fd522 | |||
![]() |
1c7d7c9150 | ||
d786b48aab | |||
![]() |
465bd66519 | ||
![]() |
b7f6377e38 | ||
bdcc258305 | |||
4bf74afacc | |||
dc0bf9b702 | |||
ba38b06a97 | |||
b92ef379b7 | |||
53ed7ec7f2 | |||
c34ea3323a | |||
7006d4f0fb | |||
1464eff375 | |||
c32ce881e8 | |||
![]() |
1e9175e1d7 | ||
2668f9181c | |||
715e0faabc | |||
955748ab1e | |||
4cbcfd22f5 | |||
d7c556de32 | |||
2e6ae11326 | |||
1a721c5dbe | |||
0e38002dd5 | |||
bd9d09ca82 | |||
9bacd54312 | |||
23ac79f2c2 | |||
44db9f192e | |||
a7dca135dc | |||
b29e33caa2 | |||
e28222966b | |||
d25fa3250a | |||
f6699bfccf | |||
0a68fa8e14 | |||
8c4edd1b37 | |||
d5b72fb06c | |||
336f6f4bbd | |||
d6dd2f51bb | |||
5814de65f9 | |||
11ac276caa | |||
1c9d8fcb47 | |||
644e6c7a3e | |||
eed45d2a23 | |||
31e120ef49 | |||
51a131ddbc | |||
594438ef0d | |||
7cf5f4cc63 | |||
52585b39a1 | |||
20b438d523 | |||
5cf993f951 | |||
28a8d434d5 | |||
dd3a72f275 | |||
ceed8f7c06 | |||
85abac7e87 | |||
![]() |
f7ddb1ed8a | ||
![]() |
fbd01624e3 | ||
![]() |
c5862da5ad | ||
![]() |
9085b4a731 | ||
e505957b47 | |||
28df0107d4 | |||
f1e04116f0 | |||
67525b88d2 | |||
95c7e8aa13 | |||
fc45b00720 | |||
6e0cf86e73 | |||
79012c6784 | |||
26c7be71d7 | |||
291d2a2222 | |||
d48fc7d156 | |||
dd01ce2cd0 | |||
ba4b7b4319 | |||
c0db8a9a3b | |||
81b3933abb | |||
35bd6fe993 | |||
2df912466c | |||
582f6032fc | |||
05f900e346 | |||
43f5e761a6 | |||
![]() |
7a71a95f32 | ||
d71009d980 | |||
025c921416 | |||
00965c98cb | |||
710e279b19 | |||
41f3164e57 | |||
1931387799 | |||
8f89196be2 | |||
60c59d7d61 | |||
c593db5a2f | |||
6b662ebdb9 | |||
9033d270d5 | |||
dca5be9b94 | |||
14621e7720 | |||
![]() |
c6e7fc9744 | ||
0fd72a98ac | |||
b4f978e901 | |||
586e2face6 | |||
e2a9e7e803 | |||
6a71b2af66 | |||
3579a9e0fc | |||
921708fc76 | |||
f577abc5cd | |||
8d3e57f338 | |||
902318f0fd | |||
978a930d9c | |||
aba91a745a | |||
dbbf0e7f66 | |||
fdc4a1a590 | |||
d6224db8f1 | |||
2ce2bffc4d | |||
d2bf60cc17 | |||
![]() |
6db0919724 | ||
aa7105f759 | |||
bdbd0cffda | |||
8cf1994455 | |||
d66a6525c3 | |||
7e712b2d6a | |||
1abf2f3c7c | |||
d13970de86 | |||
fac42e3fa1 | |||
68f1b2c671 | |||
bb4de77b82 | |||
c0f06ba614 | |||
e4de5b4657 | |||
5457b66301 | |||
7acd3ad7d8 | |||
59221476b0 | |||
399f84d479 | |||
d93dd85951 | |||
![]() |
e9092110ff | ||
bdb5852e28 | |||
![]() |
70de992afe | ||
76cb11e332 | |||
82fa2bdf3f | |||
cb96435047 | |||
1d25ba175e | |||
6c33a0f6d6 | |||
edb3ab0617 | |||
e2e7f7ea52 | |||
![]() |
5adc06d2d8 | ||
deb3d566a5 | |||
ffd1a7d8c8 | |||
fdb2167b4a | |||
5c63c0a58c | |||
528fc35fed | |||
0b69793487 | |||
34fac7a670 | |||
cc367908cd | |||
0966eab8e9 | |||
c27d0cb7b9 | |||
eac6aff741 | |||
6878897a6d | |||
8a91673562 | |||
491b9cd2b9 | |||
b9861055ad | |||
d15d512a68 | |||
4c3f57ffa7 | |||
3471b0016c | |||
214a56ce8c | |||
![]() |
8e31e53aa0 | ||
76fd2ff9fa | |||
4b21067aea | |||
2648d920d8 | |||
![]() |
0aabaa4583 | ||
552dce0de7 | |||
77760194fe | |||
a3ad5abf2f | |||
![]() |
c0d96ca9a5 | ||
![]() |
3b965ba10b | ||
b386f960f6 | |||
![]() |
67734d1853 | ||
f3c1d0e3a3 | |||
7c9e409985 | |||
9690b7c91b | |||
367b484841 | |||
b9d6271916 | |||
7788293954 | |||
0e1bb232e6 | |||
c536eb410c | |||
6b3c3454d3 | |||
![]() |
1f50c2876e | ||
b7e151d876 | |||
da2c564bb0 | |||
35b1e9fc3a | |||
2229179faa | |||
bd12855110 | |||
197b3502b0 | |||
9765ddf4eb | |||
0624fad0f3 | |||
b1f865f9d3 | |||
0f589f8d3c | |||
d6902668e3 | |||
6da23db5e0 | |||
3e04d37529 | |||
59774d64f0 | |||
21c7689b77 | |||
1818110459 | |||
8dbd406ea0 | |||
15c3617009 | |||
![]() |
4e98d974b5 | ||
5de109cc2d | |||
4b12f521e3 | |||
807efa8538 | |||
366ec5f0f8 | |||
b265b447b6 | |||
36a830b4d3 | |||
49311a73b8 | |||
aa55cb2996 | |||
43875e8dd1 | |||
b32f5a922f | |||
11be151d58 | |||
474adc6f88 | |||
d79868c4e6 | |||
![]() |
5f52684a0f | ||
1a833dbdb9 | |||
e85d7d5a28 | |||
c101ded463 | |||
a9d8ff6e21 | |||
883e4c089a | |||
6a885e5d89 | |||
3a856f7967 | |||
6051b80dd2 | |||
723fb16343 | |||
67b657f07c | |||
644eb68524 | |||
40aee0b2e9 | |||
4cfa21f09b | |||
7afd84df40 | |||
7e8912eb96 | |||
![]() |
5b61737a8f | ||
44232a2ce6 | |||
d56bbfea7b | |||
fdd41ac49e | |||
f5ce243a56 | |||
8e2c9f2dd3 | |||
b44a500988 | |||
b5c18288f5 | |||
5cce6894d2 | |||
381cef1773 | |||
c9e5897bb0 | |||
f6fd3a84c2 | |||
a207c1cdaf | |||
c097c7b855 | |||
c1f5d8d023 | |||
b647509913 | |||
2a0a6a0541 | |||
a603bb3459 | |||
e688c927eb | |||
![]() |
b8952ecec9 | ||
0606adceb3 | |||
![]() |
a90c356467 | ||
459af75d1e | |||
49802af7cd | |||
![]() |
3f96555123 | ||
8ba6302696 | |||
8709cbb73e | |||
1686979747 | |||
e549d6c1bd | |||
bea5a9997d | |||
36c6b2e893 | |||
8ad2642c47 | |||
27231afce5 | |||
80f1527762 | |||
d9d4b9899e | |||
f5679838bc | |||
bc1e517bb3 | |||
03015a9b22 | |||
92237f11eb | |||
b444e1be0f | |||
![]() |
8c55481e33 | ||
9df13fba69 | |||
23be5fd449 | |||
96387a2eff | |||
7b88a4a3ba | |||
4c705ab8fe | |||
8c7d970e2c | |||
1a02c0d7dd | |||
a8b730e04c | |||
7c2fb00e66 | |||
5a3d5f751f | |||
5ca38fd612 | |||
![]() |
f886f29355 | ||
943aed0de3 | |||
60a9703de8 | |||
05df6366a4 | |||
f76e04bf4d | |||
37e799e299 | |||
bd2b48e98d | |||
63f8d18c0f | |||
3060217d39 | |||
715f57371b | |||
b87b33adbf | |||
fcf8fc3eaa | |||
![]() |
57f46b9d5f | ||
566a458950 | |||
dffd032bc9 | |||
18412744c8 | |||
74a566d211 | |||
![]() |
b8f41825e8 | ||
![]() |
dbd64a5592 | ||
![]() |
20987b0f29 | ||
bd3bd776c8 | |||
ad44f22397 | |||
4b00a779ec | |||
56fa6f58a0 | |||
e427e4dbb1 | |||
fc14d02bc5 | |||
5ce1c63e1b | |||
74fa4ee92b | |||
d812e46f40 | |||
74e57efb2d | |||
69f55b1b62 | |||
9183f9f860 | |||
50f378e5c8 | |||
973dac9b5f | |||
7f4878ac7f | |||
![]() |
b8bad3549d | ||
65de17ece4 | |||
bc01003673 | |||
cd4a7be5b2 | |||
3647a1e621 | |||
8ef8f3a60a | |||
15a428eab0 | |||
8aed5dbcf8 | |||
7c76bdca1b | |||
9f546d6908 | |||
9e365069af | |||
![]() |
d8b4275162 | ||
894269ad12 | |||
30cebf5747 | |||
cf6be711e2 | |||
3753a0b72b | |||
be2213472f | |||
f32b63ec58 | |||
6ebc581b52 | |||
069d63561a | |||
72b39d3f92 | |||
e23b54a59f | |||
![]() |
5e9dba822d | ||
98bb8e6955 | |||
5b06759473 | |||
61776befc3 | |||
8f1997975d | |||
32b1a13fa1 | |||
e89d42ddff | |||
a46ff1dd38 | |||
2c0ccb0159 | |||
07726ef1b6 | |||
ca0c9757f2 | |||
a7b64a714d | |||
4f48b2992b | |||
93ba5e2375 | |||
7e92717439 | |||
db795a4727 | |||
da67a19ed9 | |||
2545119112 | |||
d6c3ea9e7a | |||
00f3957b8e | |||
c1279768a7 | |||
cbcd74de22 | |||
b71e29b3da | |||
47b36ddcce | |||
2964c4e1d0 | |||
333dc7b5c4 | |||
0d8c479225 | |||
1552c92fb1 | |||
5568455d63 | |||
204ae33d75 | |||
b815088416 | |||
![]() |
24e0165463 | ||
205254150a | |||
763cd2e0be | |||
5bd41b2e25 | |||
63bd356faf | |||
b9641cfc37 | |||
7fbb767259 | |||
![]() |
5e37f70307 | ||
![]() |
a92805bf24 | ||
484714992c | |||
13af88b23f | |||
e14f8c2dd7 | |||
a37dac0a88 | |||
fee4b58627 | |||
c4cee2e221 | |||
9558fa5196 | |||
565b33c0ad | |||
a8e0fe6a54 | |||
2309fa20af | |||
![]() |
6a9775ec6f | ||
0f48b37aae | |||
ae5a89e80a | |||
4312cb8545 | |||
cd494087c1 | |||
![]() |
b069218a55 | ||
![]() |
e2f0b4a0cb | ||
1de3636624 | |||
512a560cde | |||
a55d318d71 | |||
c6a200c693 | |||
1e7ef83e46 | |||
a37a6fb445 | |||
6483dee141 | |||
ffc4c126f5 | |||
![]() |
f159d49f56 | ||
![]() |
92dae5775f | ||
50fb0fd636 | |||
5cad004d71 | |||
97e3a2d935 | |||
![]() |
b1696702cd | ||
![]() |
ae6f3056fc | ||
78ae587649 | |||
aa23e870ec | |||
01779970c2 | |||
b91ac86cfc | |||
5705db5bb3 | |||
0ed254574f | |||
![]() |
f60b95b532 | ||
c3c69fee09 | |||
86d520f268 | |||
![]() |
9792994311 | ||
2d4c7fa896 | |||
2814740f5b | |||
ee4ed99866 | |||
![]() |
0bd3cad04e | ||
![]() |
f72cc47d8e | ||
0c703b856b | |||
86992a96b8 | |||
c2292b2cd6 | |||
7d1a10a9bb | |||
a5e3899853 | |||
![]() |
477631d9ec | ||
b9c6ef4e8f | |||
0e52af097f | |||
989d510e3e | |||
9a69c456bd | |||
3d8dea9ff9 | |||
![]() |
3f7014ecc9 | ||
0a8a22fc04 | |||
9a312ba192 | |||
338c1060d5 | |||
0578921063 | |||
c20098e6ec | |||
![]() |
d5efda72f5 | ||
b32f9bf801 | |||
d19443074a | |||
![]() |
5ef5a9fc24 | ||
![]() |
d5920744f4 | ||
56ed4c14d3 | |||
2d8606b360 | |||
ca0dbf8c26 | |||
d1f118d228 | |||
be3f3812dc | |||
cb0fbe1fde | |||
ab927f5ca7 | |||
c4e041da23 | |||
![]() |
7c4fc5b58d | ||
de5d36560f | |||
56ff954030 | |||
c0122cc888 | |||
a159f67ccc | |||
f1cca30557 | |||
0988711575 | |||
fca6a9fe3f | |||
e38532773b | |||
cef8f5ff50 | |||
1e98a0cee5 | |||
431255e5e8 | |||
27b70428c1 | |||
e6a732daad | |||
7e60d8a713 | |||
2fd657db5b | |||
aec56e562a | |||
a1f0f2eacb | |||
23ffcb242d | |||
61e92eeb3e | |||
1620dcd208 | |||
c5ec3738d8 | |||
e130903060 | |||
0f89d05848 | |||
42a6b2fd06 | |||
1766549418 | |||
7da979c070 | |||
67c490daaf | |||
68e3755209 | |||
9f290467ca | |||
4fe8c62b56 | |||
198e571e87 | |||
f1b0b0ffb8 | |||
8f69c91408 | |||
0de1d2e84e | |||
![]() |
bb3d03973a | ||
![]() |
26d2caee3b | ||
218360a892 | |||
ca9cdba2df | |||
b869da0c10 | |||
70a7685d04 | |||
2b6c01d98c | |||
70a0d45b69 | |||
594656e7a3 | |||
fed4fc9c42 | |||
8ca8380699 | |||
![]() |
cb334428b0 | ||
9afd6e7b70 | |||
![]() |
7336af3259 | ||
128ebdb062 | |||
bc48da3235 | |||
be6e56b36b | |||
506d672524 | |||
![]() |
d54a08c8af | ||
1757840843 | |||
f8dd03d3dd | |||
![]() |
fd8418385c | ||
1ef8ef4941 | |||
d5d91b4ae4 | |||
02ab4ad991 | |||
fd22404837 | |||
51791004ea | |||
9cec9b4d6e | |||
24b84e4688 | |||
5eeaf4cce6 | |||
386b112f76 | |||
![]() |
d723e331f1 | ||
1cb99f5808 | |||
b3d101ac29 | |||
88e9e97ee9 | |||
247f37f765 | |||
221b7b27fc | |||
88b37b639e | |||
1b4734c57d | |||
3f08488244 | |||
b9f54dd48a | |||
581fb2da10 | |||
c822e03e2a | |||
2fbb52dd86 | |||
3788003cda | |||
74039388cd | |||
7863e03e89 | |||
![]() |
de7f1e8e07 | ||
![]() |
251c017534 | ||
0704570721 | |||
8cf0d15b60 | |||
1cd9fcd98d | |||
2f7bec04e8 | |||
3692c0521c | |||
b67dca9b76 | |||
4b971bb87c | |||
![]() |
2e53f8b4b1 | ||
e1cb2a226c | |||
d8edc2c634 | |||
6f460b76fe | |||
bc1e3238c4 | |||
c12d8a72ce | |||
d7f0de0e3a | |||
![]() |
7168a4fa5c | ||
500ec993f5 | |||
![]() |
da279927b1 | ||
ac447ba1a3 | |||
e2473d3baf | |||
76471dbd5e | |||
4e45265dc6 | |||
262ef26ea3 | |||
35124acd19 | |||
c2001ec275 | |||
e7ae9f493a | |||
4fac3be146 | |||
2e6f914e37 | |||
0adb356f2e | |||
aa7051c8f2 | |||
aff6227412 | |||
dae9917915 | |||
2206b6b9a0 | |||
f613c4c095 | |||
98a5c924fc | |||
![]() |
03c9563582 | ||
f9add2d63e | |||
![]() |
e5e8db73df | ||
6ae34bb071 | |||
7c703b4699 | |||
f1118ee51e | |||
ab2a7aa0da | |||
cebe5f5bf4 | |||
0cbcddd91e | |||
b31250feba | |||
444971aa8e | |||
f7f558e293 | |||
e121b5b66c | |||
![]() |
d2e6087335 | ||
aa6c922d99 | |||
d2f4fb68f5 | |||
7b4867d1ba | |||
468bba3d2b | |||
2531358297 | |||
08264aaf82 | |||
61bd5882a2 | |||
a0bb6bb4d6 | |||
97465046c6 | |||
92daff6ac2 | |||
2efc2221cc | |||
0789f61373 | |||
602ecbdf9a | |||
eb7827e797 | |||
12a83db83c | |||
f86331a033 | |||
ef88047a97 | |||
a773cd3850 | |||
35c3644e78 | |||
63342861e7 | |||
b066d58216 | |||
236be8e9f1 | |||
dcc500e5a2 | |||
![]() |
c6eaa9c552 | ||
658fd8df0b | |||
![]() |
004172de38 | ||
6b5e1cfaca | |||
2cda65a35a | |||
8015433f81 | |||
e3cf7ebdb1 | |||
a073e1e401 | |||
7b5a6f452a | |||
466b50dbc9 | |||
2fb8c6805a | |||
e3d3296327 | |||
e6a41e1c80 | |||
b2bb3e4b72 | |||
4a3f99ad5a | |||
5a11c6e558 | |||
5514ca58a4 | |||
94e8db1e86 | |||
3652f5f758 | |||
c91d196159 | |||
9812a08848 | |||
e216660382 | |||
e253fb2143 | |||
3bf10e5d0a | |||
2378f057a0 | |||
bc4c20d414 | |||
ffddf9e5c9 | |||
447378753d | |||
845716e600 | |||
d6646f7a8a | |||
1e2376f41f | |||
82808e18e6 | |||
5ffb9b6dc4 | |||
![]() |
a0acb9bd0c | ||
e6cd4761e7 | |||
726bc3a46b | |||
610e68590b | |||
ce5561b815 | |||
f12a6ff5cb | |||
40d28b40df | |||
![]() |
c49d2cbe92 | ||
![]() |
b41c72b710 | ||
827c5b399e | |||
8f2db94627 | |||
c26011efcb | |||
6b1b3383c6 | |||
![]() |
a9642f8d61 | ||
c155a5f9d7 | |||
752c6d668b | |||
bbadee6fc1 | |||
7a7ae4df43 | |||
338408772a | |||
71c39a9e2e | |||
cae3b581b0 | |||
499c24ce75 | |||
cbe9a87d28 | |||
01ab36ebc1 | |||
e206a0ae96 | |||
4930cd5db6 | |||
a07089dcb1 | |||
7a97e925fd | |||
56b068a664 | |||
64d9291d26 | |||
![]() |
2cc56495f3 | ||
72acce43bc | |||
5d59b38605 | |||
057cb7e5e7 | |||
65f547c3fc | |||
9fc5a9c78f | |||
7ea4342e73 | |||
833eb90820 | |||
![]() |
25478bdc9a | ||
5a50b46376 | |||
f5e5a0987a | |||
050b205a97 | |||
3850fdd5b9 | |||
98a5658239 | |||
cd818fd081 | |||
75b53542f2 | |||
17770192fb | |||
e0763760e4 | |||
f8dea3fe64 | |||
2ddbf81c47 | |||
f9db7675e0 | |||
785503a7e4 | |||
1a887b0088 | |||
4d46e8a5e0 | |||
4b259edb0a | |||
de35a90f9f | |||
ebe5a5eca8 | |||
![]() |
dbeab82a89 | ||
![]() |
16fc0da0e7 | ||
21f22759ea | |||
60c0b79256 | |||
![]() |
6b4405d757 | ||
d144983f8c | |||
56f66602c7 |
@@ -269,9 +269,5 @@ StatementMacros:
|
|||||||
- PyObject_HEAD
|
- PyObject_HEAD
|
||||||
- PyObject_VAR_HEAD
|
- PyObject_VAR_HEAD
|
||||||
|
|
||||||
StatementMacros:
|
|
||||||
- GPU_STAGE_INTERFACE_CREATE
|
|
||||||
- GPU_SHADER_DESCRIPTOR
|
|
||||||
|
|
||||||
MacroBlockBegin: "^BSDF_CLOSURE_CLASS_BEGIN$"
|
MacroBlockBegin: "^BSDF_CLOSURE_CLASS_BEGIN$"
|
||||||
MacroBlockEnd: "^BSDF_CLOSURE_CLASS_END$"
|
MacroBlockEnd: "^BSDF_CLOSURE_CLASS_END$"
|
||||||
|
@@ -187,6 +187,13 @@ mark_as_advanced(CPACK_OVERRIDE_PACKAGENAME)
|
|||||||
mark_as_advanced(BUILDINFO_OVERRIDE_DATE)
|
mark_as_advanced(BUILDINFO_OVERRIDE_DATE)
|
||||||
mark_as_advanced(BUILDINFO_OVERRIDE_TIME)
|
mark_as_advanced(BUILDINFO_OVERRIDE_TIME)
|
||||||
|
|
||||||
|
if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.16")
|
||||||
|
option(WITH_UNITY_BUILD "Enable unity build for modules that support it to improve compile times" ON)
|
||||||
|
mark_as_advanced(WITH_UNITY_BUILD)
|
||||||
|
else()
|
||||||
|
set(WITH_UNITY_BUILD OFF)
|
||||||
|
endif()
|
||||||
|
|
||||||
option(WITH_IK_ITASC "Enable ITASC IK solver (only disable for development & for incompatible C++ compilers)" ON)
|
option(WITH_IK_ITASC "Enable ITASC IK solver (only disable for development & for incompatible C++ compilers)" ON)
|
||||||
option(WITH_IK_SOLVER "Enable Legacy IK solver (only disable for development)" ON)
|
option(WITH_IK_SOLVER "Enable Legacy IK solver (only disable for development)" ON)
|
||||||
option(WITH_FFTW3 "Enable FFTW3 support (Used for smoke, ocean sim, and audio effects)" ON)
|
option(WITH_FFTW3 "Enable FFTW3 support (Used for smoke, ocean sim, and audio effects)" ON)
|
||||||
@@ -426,30 +433,40 @@ mark_as_advanced(WITH_CYCLES_DEBUG_NAN)
|
|||||||
mark_as_advanced(WITH_CYCLES_NATIVE_ONLY)
|
mark_as_advanced(WITH_CYCLES_NATIVE_ONLY)
|
||||||
|
|
||||||
# NVIDIA CUDA & OptiX
|
# NVIDIA CUDA & OptiX
|
||||||
option(WITH_CYCLES_DEVICE_CUDA "Enable Cycles NVIDIA CUDA compute support" ON)
|
if(NOT APPLE)
|
||||||
option(WITH_CYCLES_DEVICE_OPTIX "Enable Cycles NVIDIA OptiX support" ON)
|
option(WITH_CYCLES_DEVICE_CUDA "Enable Cycles NVIDIA CUDA compute support" ON)
|
||||||
mark_as_advanced(WITH_CYCLES_DEVICE_CUDA)
|
option(WITH_CYCLES_DEVICE_OPTIX "Enable Cycles NVIDIA OptiX support" ON)
|
||||||
|
mark_as_advanced(WITH_CYCLES_DEVICE_CUDA)
|
||||||
|
|
||||||
option(WITH_CYCLES_CUDA_BINARIES "Build Cycles NVIDIA CUDA binaries" OFF)
|
option(WITH_CYCLES_CUDA_BINARIES "Build Cycles NVIDIA CUDA binaries" OFF)
|
||||||
set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for")
|
set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for")
|
||||||
option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF)
|
option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF)
|
||||||
option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF)
|
option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF)
|
||||||
option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime (for developers, makes cuda-gdb work)" ON)
|
option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime (for developers, makes cuda-gdb work)" ON)
|
||||||
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
|
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
|
||||||
mark_as_advanced(WITH_CYCLES_CUBIN_COMPILER)
|
mark_as_advanced(WITH_CYCLES_CUBIN_COMPILER)
|
||||||
mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
|
mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
|
||||||
mark_as_advanced(WITH_CUDA_DYNLOAD)
|
mark_as_advanced(WITH_CUDA_DYNLOAD)
|
||||||
|
endif()
|
||||||
|
|
||||||
# AMD HIP
|
# AMD HIP
|
||||||
if(WIN32)
|
if(NOT APPLE)
|
||||||
|
if(WIN32)
|
||||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
|
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
|
||||||
else()
|
else()
|
||||||
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF)
|
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
||||||
|
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 CACHE STRING "AMD HIP architectures to build binaries for")
|
||||||
|
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
||||||
|
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Apple Metal
|
||||||
|
if(APPLE)
|
||||||
|
option(WITH_CYCLES_DEVICE_METAL "Enable Cycles Apple Metal compute support" ON)
|
||||||
endif()
|
endif()
|
||||||
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
|
|
||||||
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 CACHE STRING "AMD HIP architectures to build binaries for")
|
|
||||||
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
|
||||||
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
|
|
||||||
|
|
||||||
# Draw Manager
|
# Draw Manager
|
||||||
option(WITH_DRAW_DEBUG "Add extra debug capabilities to Draw Manager" OFF)
|
option(WITH_DRAW_DEBUG "Add extra debug capabilities to Draw Manager" OFF)
|
||||||
@@ -494,11 +511,10 @@ if(WIN32)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
# This should be turned off when Blender enter beta/rc/release
|
# This should be turned off when Blender enter beta/rc/release
|
||||||
if("${BLENDER_VERSION_CYCLE}" STREQUAL "release" OR
|
if("${BLENDER_VERSION_CYCLE}" STREQUAL "alpha")
|
||||||
"${BLENDER_VERSION_CYCLE}" STREQUAL "rc")
|
|
||||||
set(WITH_EXPERIMENTAL_FEATURES OFF)
|
|
||||||
else()
|
|
||||||
set(WITH_EXPERIMENTAL_FEATURES ON)
|
set(WITH_EXPERIMENTAL_FEATURES ON)
|
||||||
|
else()
|
||||||
|
set(WITH_EXPERIMENTAL_FEATURES OFF)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Unit testsing
|
# Unit testsing
|
||||||
@@ -840,7 +856,7 @@ if(WITH_AUDASPACE)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Auto-enable CUDA dynload if toolkit is not found.
|
# Auto-enable CUDA dynload if toolkit is not found.
|
||||||
if(NOT WITH_CUDA_DYNLOAD)
|
if(WITH_CYCLES AND WITH_CYCLES_DEVICE_CUDA AND NOT WITH_CUDA_DYNLOAD)
|
||||||
find_package(CUDA)
|
find_package(CUDA)
|
||||||
if(NOT CUDA_FOUND)
|
if(NOT CUDA_FOUND)
|
||||||
message(STATUS "CUDA toolkit not found, using dynamic runtime loading of libraries (WITH_CUDA_DYNLOAD) instead")
|
message(STATUS "CUDA toolkit not found, using dynamic runtime loading of libraries (WITH_CUDA_DYNLOAD) instead")
|
||||||
|
@@ -2083,9 +2083,9 @@ compile_OIIO() {
|
|||||||
cmake_d="$cmake_d -D OPENEXR_VERSION=$OPENEXR_VERSION"
|
cmake_d="$cmake_d -D OPENEXR_VERSION=$OPENEXR_VERSION"
|
||||||
|
|
||||||
if [ "$_with_built_openexr" = true ]; then
|
if [ "$_with_built_openexr" = true ]; then
|
||||||
cmake_d="$cmake_d -D ILMBASE_HOME=$INST/openexr"
|
cmake_d="$cmake_d -D ILMBASE_ROOT=$INST/openexr"
|
||||||
cmake_d="$cmake_d -D OPENEXR_HOME=$INST/openexr"
|
cmake_d="$cmake_d -D OPENEXR_ROOT=$INST/openexr"
|
||||||
INFO "ILMBASE_HOME=$INST/openexr"
|
INFO "Ilmbase_ROOT=$INST/openexr"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# ptex is only needed when nicholas bishop is ready
|
# ptex is only needed when nicholas bishop is ready
|
||||||
@@ -2374,9 +2374,9 @@ compile_OSL() {
|
|||||||
#~ cmake_d="$cmake_d -D ILMBASE_VERSION=$ILMBASE_VERSION"
|
#~ cmake_d="$cmake_d -D ILMBASE_VERSION=$ILMBASE_VERSION"
|
||||||
|
|
||||||
if [ "$_with_built_openexr" = true ]; then
|
if [ "$_with_built_openexr" = true ]; then
|
||||||
INFO "ILMBASE_HOME=$INST/openexr"
|
cmake_d="$cmake_d -D ILMBASE_ROOT=$INST/openexr"
|
||||||
cmake_d="$cmake_d -D OPENEXR_ROOT_DIR=$INST/openexr"
|
cmake_d="$cmake_d -D OPENEXR_ROOT=$INST/openexr"
|
||||||
cmake_d="$cmake_d -D ILMBASE_ROOT_DIR=$INST/openexr"
|
INFO "Ilmbase_ROOT=$INST/openexr"
|
||||||
# XXX Temp workaround... sigh, ILMBase really messed the things up by defining their custom names ON by default :(
|
# XXX Temp workaround... sigh, ILMBase really messed the things up by defining their custom names ON by default :(
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@@ -197,3 +197,38 @@ index 67ec0d15f..6dc3e85a0 100644
|
|||||||
#else
|
#else
|
||||||
#error Unknown architecture.
|
#error Unknown architecture.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
diff --git a/pxr/base/arch/demangle.cpp b/pxr/base/arch/demangle.cpp
|
||||||
|
index 67ec0d15f..6dc3e85a0 100644
|
||||||
|
--- a/pxr/base/arch/demangle.cpp
|
||||||
|
+++ b/pxr/base/arch/demangle.cpp
|
||||||
|
@@ -36,6 +36,7 @@
|
||||||
|
#if (ARCH_COMPILER_GCC_MAJOR == 3 && ARCH_COMPILER_GCC_MINOR >= 1) || \
|
||||||
|
ARCH_COMPILER_GCC_MAJOR > 3 || defined(ARCH_COMPILER_CLANG)
|
||||||
|
#define _AT_LEAST_GCC_THREE_ONE_OR_CLANG
|
||||||
|
+#include <cxxabi.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
PXR_NAMESPACE_OPEN_SCOPE
|
||||||
|
@@ -138,7 +139,6 @@
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(_AT_LEAST_GCC_THREE_ONE_OR_CLANG)
|
||||||
|
-#include <cxxabi.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This routine doesn't work when you get to gcc3.4.
|
||||||
|
|
||||||
|
diff --git a/pxr/base/work/singularTask.h b/pxr/base/work/singularTask.h
|
||||||
|
index 67ec0d15f..6dc3e85a0 100644
|
||||||
|
--- a/pxr/base/work/singularTask.h
|
||||||
|
+++ b/pxr/base/work/singularTask.h
|
||||||
|
@@ -120,7 +120,7 @@
|
||||||
|
// case we go again to ensure the task can do whatever it
|
||||||
|
// was awakened to do. Once we successfully take the count
|
||||||
|
// to zero, we stop.
|
||||||
|
- size_t old = count;
|
||||||
|
+ std::size_t old = count;
|
||||||
|
do { _fn(); } while (
|
||||||
|
!count.compare_exchange_strong(old, 0));
|
||||||
|
});
|
||||||
|
@@ -19,9 +19,6 @@ set(WITH_CODEC_SNDFILE OFF CACHE BOOL "" FORCE)
|
|||||||
set(WITH_COMPOSITOR OFF CACHE BOOL "" FORCE)
|
set(WITH_COMPOSITOR OFF CACHE BOOL "" FORCE)
|
||||||
set(WITH_COREAUDIO OFF CACHE BOOL "" FORCE)
|
set(WITH_COREAUDIO OFF CACHE BOOL "" FORCE)
|
||||||
set(WITH_CYCLES OFF CACHE BOOL "" FORCE)
|
set(WITH_CYCLES OFF CACHE BOOL "" FORCE)
|
||||||
set(WITH_CYCLES_DEVICE_OPTIX OFF CACHE BOOL "" FORCE)
|
|
||||||
set(WITH_CYCLES_EMBREE OFF CACHE BOOL "" FORCE)
|
|
||||||
set(WITH_CYCLES_OSL OFF CACHE BOOL "" FORCE)
|
|
||||||
set(WITH_DRACO OFF CACHE BOOL "" FORCE)
|
set(WITH_DRACO OFF CACHE BOOL "" FORCE)
|
||||||
set(WITH_FFTW3 OFF CACHE BOOL "" FORCE)
|
set(WITH_FFTW3 OFF CACHE BOOL "" FORCE)
|
||||||
set(WITH_FREESTYLE OFF CACHE BOOL "" FORCE)
|
set(WITH_FREESTYLE OFF CACHE BOOL "" FORCE)
|
||||||
|
@@ -61,6 +61,7 @@ set(WITH_MEM_JEMALLOC ON CACHE BOOL "" FORCE)
|
|||||||
# platform dependent options
|
# platform dependent options
|
||||||
if(APPLE)
|
if(APPLE)
|
||||||
set(WITH_COREAUDIO ON CACHE BOOL "" FORCE)
|
set(WITH_COREAUDIO ON CACHE BOOL "" FORCE)
|
||||||
|
set(WITH_CYCLES_DEVICE_METAL ON CACHE BOOL "" FORCE)
|
||||||
endif()
|
endif()
|
||||||
if(NOT WIN32)
|
if(NOT WIN32)
|
||||||
set(WITH_JACK ON CACHE BOOL "" FORCE)
|
set(WITH_JACK ON CACHE BOOL "" FORCE)
|
||||||
|
@@ -257,9 +257,6 @@ if(WITH_BOOST)
|
|||||||
if(WITH_INTERNATIONAL)
|
if(WITH_INTERNATIONAL)
|
||||||
list(APPEND _boost_FIND_COMPONENTS locale)
|
list(APPEND _boost_FIND_COMPONENTS locale)
|
||||||
endif()
|
endif()
|
||||||
if(WITH_CYCLES_NETWORK)
|
|
||||||
list(APPEND _boost_FIND_COMPONENTS serialization)
|
|
||||||
endif()
|
|
||||||
if(WITH_OPENVDB)
|
if(WITH_OPENVDB)
|
||||||
list(APPEND _boost_FIND_COMPONENTS iostreams)
|
list(APPEND _boost_FIND_COMPONENTS iostreams)
|
||||||
endif()
|
endif()
|
||||||
@@ -339,7 +336,7 @@ if(WITH_LLVM)
|
|||||||
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_OSL)
|
if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||||
set(CYCLES_OSL ${LIBDIR}/osl)
|
set(CYCLES_OSL ${LIBDIR}/osl)
|
||||||
|
|
||||||
find_library(OSL_LIB_EXEC NAMES oslexec PATHS ${CYCLES_OSL}/lib)
|
find_library(OSL_LIB_EXEC NAMES oslexec PATHS ${CYCLES_OSL}/lib)
|
||||||
@@ -359,7 +356,7 @@ if(WITH_CYCLES_OSL)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_EMBREE)
|
if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
|
||||||
find_package(Embree 3.8.0 REQUIRED)
|
find_package(Embree 3.8.0 REQUIRED)
|
||||||
# Increase stack size for Embree, only works for executables.
|
# Increase stack size for Embree, only works for executables.
|
||||||
if(NOT WITH_PYTHON_MODULE)
|
if(NOT WITH_PYTHON_MODULE)
|
||||||
|
@@ -241,7 +241,7 @@ if(WITH_INPUT_NDOF)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_OSL)
|
if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||||
set(CYCLES_OSL ${LIBDIR}/osl CACHE PATH "Path to OpenShadingLanguage installation")
|
set(CYCLES_OSL ${LIBDIR}/osl CACHE PATH "Path to OpenShadingLanguage installation")
|
||||||
if(EXISTS ${CYCLES_OSL} AND NOT OSL_ROOT)
|
if(EXISTS ${CYCLES_OSL} AND NOT OSL_ROOT)
|
||||||
set(OSL_ROOT ${CYCLES_OSL})
|
set(OSL_ROOT ${CYCLES_OSL})
|
||||||
@@ -314,7 +314,7 @@ if(WITH_BOOST)
|
|||||||
endif()
|
endif()
|
||||||
set(Boost_USE_MULTITHREADED ON)
|
set(Boost_USE_MULTITHREADED ON)
|
||||||
set(__boost_packages filesystem regex thread date_time)
|
set(__boost_packages filesystem regex thread date_time)
|
||||||
if(WITH_CYCLES_OSL)
|
if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||||
if(NOT (${OSL_LIBRARY_VERSION_MAJOR} EQUAL "1" AND ${OSL_LIBRARY_VERSION_MINOR} LESS "6"))
|
if(NOT (${OSL_LIBRARY_VERSION_MAJOR} EQUAL "1" AND ${OSL_LIBRARY_VERSION_MINOR} LESS "6"))
|
||||||
list(APPEND __boost_packages wave)
|
list(APPEND __boost_packages wave)
|
||||||
else()
|
else()
|
||||||
@@ -323,9 +323,6 @@ if(WITH_BOOST)
|
|||||||
if(WITH_INTERNATIONAL)
|
if(WITH_INTERNATIONAL)
|
||||||
list(APPEND __boost_packages locale)
|
list(APPEND __boost_packages locale)
|
||||||
endif()
|
endif()
|
||||||
if(WITH_CYCLES_NETWORK)
|
|
||||||
list(APPEND __boost_packages serialization)
|
|
||||||
endif()
|
|
||||||
if(WITH_OPENVDB)
|
if(WITH_OPENVDB)
|
||||||
list(APPEND __boost_packages iostreams)
|
list(APPEND __boost_packages iostreams)
|
||||||
endif()
|
endif()
|
||||||
@@ -403,7 +400,7 @@ if(WITH_OPENCOLORIO)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_EMBREE)
|
if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
|
||||||
find_package(Embree 3.8.0 REQUIRED)
|
find_package(Embree 3.8.0 REQUIRED)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@@ -477,7 +477,7 @@ if(WITH_PYTHON)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_BOOST)
|
if(WITH_BOOST)
|
||||||
if(WITH_CYCLES_OSL)
|
if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||||
set(boost_extra_libs wave)
|
set(boost_extra_libs wave)
|
||||||
endif()
|
endif()
|
||||||
if(WITH_INTERNATIONAL)
|
if(WITH_INTERNATIONAL)
|
||||||
@@ -520,7 +520,7 @@ if(WITH_BOOST)
|
|||||||
debug ${BOOST_LIBPATH}/libboost_thread-${BOOST_DEBUG_POSTFIX}
|
debug ${BOOST_LIBPATH}/libboost_thread-${BOOST_DEBUG_POSTFIX}
|
||||||
debug ${BOOST_LIBPATH}/libboost_chrono-${BOOST_DEBUG_POSTFIX}
|
debug ${BOOST_LIBPATH}/libboost_chrono-${BOOST_DEBUG_POSTFIX}
|
||||||
)
|
)
|
||||||
if(WITH_CYCLES_OSL)
|
if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||||
set(BOOST_LIBRARIES ${BOOST_LIBRARIES}
|
set(BOOST_LIBRARIES ${BOOST_LIBRARIES}
|
||||||
optimized ${BOOST_LIBPATH}/libboost_wave-${BOOST_POSTFIX}
|
optimized ${BOOST_LIBPATH}/libboost_wave-${BOOST_POSTFIX}
|
||||||
debug ${BOOST_LIBPATH}/libboost_wave-${BOOST_DEBUG_POSTFIX})
|
debug ${BOOST_LIBPATH}/libboost_wave-${BOOST_DEBUG_POSTFIX})
|
||||||
@@ -708,7 +708,7 @@ if(WITH_CODEC_SNDFILE)
|
|||||||
set(LIBSNDFILE_LIBRARIES ${LIBSNDFILE_LIBPATH}/libsndfile-1.lib)
|
set(LIBSNDFILE_LIBRARIES ${LIBSNDFILE_LIBPATH}/libsndfile-1.lib)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_OSL)
|
if(WITH_CYCLES AND WITH_CYCLES_OSL)
|
||||||
set(CYCLES_OSL ${LIBDIR}/osl CACHE PATH "Path to OpenShadingLanguage installation")
|
set(CYCLES_OSL ${LIBDIR}/osl CACHE PATH "Path to OpenShadingLanguage installation")
|
||||||
set(OSL_SHADER_DIR ${CYCLES_OSL}/shaders)
|
set(OSL_SHADER_DIR ${CYCLES_OSL}/shaders)
|
||||||
# Shaders have moved around a bit between OSL versions, check multiple locations
|
# Shaders have moved around a bit between OSL versions, check multiple locations
|
||||||
@@ -741,7 +741,7 @@ if(WITH_CYCLES_OSL)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_EMBREE)
|
if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
|
||||||
windows_find_package(Embree)
|
windows_find_package(Embree)
|
||||||
if(NOT EMBREE_FOUND)
|
if(NOT EMBREE_FOUND)
|
||||||
set(EMBREE_INCLUDE_DIRS ${LIBDIR}/embree/include)
|
set(EMBREE_INCLUDE_DIRS ${LIBDIR}/embree/include)
|
||||||
|
@@ -6,91 +6,90 @@
|
|||||||
* as part of the normal development process.
|
* as part of the normal development process.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** \defgroup MEM Guarded memory (de)allocation
|
/* TODO: other modules.
|
||||||
* \ingroup intern
|
* - `libmv`
|
||||||
|
* - `cycles`
|
||||||
|
* - `opencolorio`
|
||||||
|
* - `opensubdiv`
|
||||||
|
* - `openvdb`
|
||||||
|
* - `quadriflow`
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** \defgroup clog C-Logging (CLOG)
|
/** \defgroup intern_atomic Atomic Operations
|
||||||
* \ingroup intern
|
* \ingroup intern */
|
||||||
*/
|
|
||||||
|
|
||||||
/** \defgroup ctr container
|
/** \defgroup intern_clog C-Logging (CLOG)
|
||||||
* \ingroup intern
|
* \ingroup intern */
|
||||||
*/
|
|
||||||
|
|
||||||
/** \defgroup iksolver iksolver
|
/** \defgroup intern_eigen Eigen
|
||||||
* \ingroup intern
|
* \ingroup intern */
|
||||||
*/
|
|
||||||
|
|
||||||
/** \defgroup itasc itasc
|
/** \defgroup intern_glew-mx GLEW with Multiple Rendering Context's
|
||||||
* \ingroup intern
|
* \ingroup intern */
|
||||||
*/
|
|
||||||
|
|
||||||
/** \defgroup memutil memutil
|
/** \defgroup intern_iksolver Inverse Kinematics (Solver)
|
||||||
* \ingroup intern
|
* \ingroup intern */
|
||||||
*/
|
|
||||||
|
|
||||||
/** \defgroup mikktspace mikktspace
|
/** \defgroup intern_itasc Inverse Kinematics (ITASC)
|
||||||
* \ingroup intern
|
* \ingroup intern */
|
||||||
*/
|
|
||||||
|
|
||||||
/** \defgroup moto moto
|
/** \defgroup intern_libc_compat libc Compatibility For Linux
|
||||||
* \ingroup intern
|
* \ingroup intern */
|
||||||
*/
|
|
||||||
|
|
||||||
/** \defgroup eigen eigen
|
/** \defgroup intern_locale Locale
|
||||||
* \ingroup intern
|
* \ingroup intern */
|
||||||
*/
|
|
||||||
|
|
||||||
/** \defgroup smoke smoke
|
/** \defgroup intern_mantaflow Manta-Flow Fluid Simulation
|
||||||
* \ingroup intern
|
* \ingroup intern */
|
||||||
*/
|
|
||||||
|
|
||||||
/** \defgroup string string
|
/** \defgroup intern_mem Guarded Memory (de)allocation
|
||||||
* \ingroup intern
|
* \ingroup intern */
|
||||||
*/
|
|
||||||
|
/** \defgroup intern_memutil Memory Utilities (memutil)
|
||||||
|
* \ingroup intern */
|
||||||
|
|
||||||
|
/** \defgroup intern_mikktspace MikktSpace
|
||||||
|
* \ingroup intern */
|
||||||
|
|
||||||
|
/** \defgroup intern_numaapi NUMA (Non Uniform Memory Architecture)
|
||||||
|
* \ingroup intern */
|
||||||
|
|
||||||
|
/** \defgroup intern_rigidbody Rigid-Body C-API
|
||||||
|
* \ingroup intern */
|
||||||
|
|
||||||
|
/** \defgroup intern_sky_model Sky Model
|
||||||
|
* \ingroup intern */
|
||||||
|
|
||||||
|
/** \defgroup intern_utf_conv UTF-8/16 Conversion (utfconv)
|
||||||
|
* \ingroup intern */
|
||||||
|
|
||||||
/** \defgroup audaspace Audaspace
|
/** \defgroup audaspace Audaspace
|
||||||
* \ingroup intern undoc
|
* \ingroup intern undoc
|
||||||
* \todo add to doxygen
|
* \todo add to doxygen */
|
||||||
*/
|
|
||||||
/** \defgroup audcoreaudio Audaspace CoreAudio
|
/** \defgroup audcoreaudio Audaspace CoreAudio
|
||||||
* \ingroup audaspace
|
* \ingroup audaspace */
|
||||||
*/
|
|
||||||
/** \defgroup audfx Audaspace FX
|
/** \defgroup audfx Audaspace FX
|
||||||
* \ingroup audaspace
|
* \ingroup audaspace */
|
||||||
*/
|
|
||||||
/** \defgroup audopenal Audaspace OpenAL
|
/** \defgroup audopenal Audaspace OpenAL
|
||||||
* \ingroup audaspace
|
* \ingroup audaspace */
|
||||||
*/
|
|
||||||
/** \defgroup audpulseaudio Audaspace PulseAudio
|
/** \defgroup audpulseaudio Audaspace PulseAudio
|
||||||
* \ingroup audaspace
|
* \ingroup audaspace */
|
||||||
*/
|
|
||||||
/** \defgroup audwasapi Audaspace WASAPI
|
/** \defgroup audwasapi Audaspace WASAPI
|
||||||
* \ingroup audaspace
|
* \ingroup audaspace */
|
||||||
*/
|
|
||||||
/** \defgroup audpython Audaspace Python
|
/** \defgroup audpython Audaspace Python
|
||||||
* \ingroup audaspace
|
* \ingroup audaspace */
|
||||||
*/
|
|
||||||
/** \defgroup audsdl Audaspace SDL
|
/** \defgroup audsdl Audaspace SDL
|
||||||
* \ingroup audaspace
|
* \ingroup audaspace */
|
||||||
*/
|
|
||||||
/** \defgroup audsrc Audaspace SRC
|
/** \defgroup audsrc Audaspace SRC
|
||||||
*
|
* \ingroup audaspace */
|
||||||
* \ingroup audaspace
|
|
||||||
*/
|
|
||||||
/** \defgroup audffmpeg Audaspace FFMpeg
|
/** \defgroup audffmpeg Audaspace FFMpeg
|
||||||
* \ingroup audaspace
|
* \ingroup audaspace */
|
||||||
*/
|
|
||||||
/** \defgroup audfftw Audaspace FFTW
|
/** \defgroup audfftw Audaspace FFTW
|
||||||
* \ingroup audaspace
|
* \ingroup audaspace */
|
||||||
*/
|
|
||||||
/** \defgroup audjack Audaspace Jack
|
/** \defgroup audjack Audaspace Jack
|
||||||
* \ingroup audaspace
|
* \ingroup audaspace */
|
||||||
*/
|
|
||||||
/** \defgroup audsndfile Audaspace sndfile
|
/** \defgroup audsndfile Audaspace sndfile
|
||||||
* \ingroup audaspace
|
* \ingroup audaspace */
|
||||||
*/
|
|
||||||
|
|
||||||
/** \defgroup GHOST GHOST API
|
/** \defgroup GHOST GHOST API
|
||||||
* \ingroup intern GUI
|
* \ingroup intern GUI
|
||||||
|
@@ -5,7 +5,8 @@
|
|||||||
/** \defgroup bmesh BMesh
|
/** \defgroup bmesh BMesh
|
||||||
* \ingroup blender
|
* \ingroup blender
|
||||||
*/
|
*/
|
||||||
/** \defgroup compositor Compositing */
|
/** \defgroup compositor Compositing
|
||||||
|
* \ingroup blender */
|
||||||
|
|
||||||
/** \defgroup python Python
|
/** \defgroup python Python
|
||||||
* \ingroup blender
|
* \ingroup blender
|
||||||
@@ -78,7 +79,8 @@
|
|||||||
* \ingroup blender
|
* \ingroup blender
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** \defgroup data DNA, RNA and .blend access*/
|
/** \defgroup data DNA, RNA and .blend access
|
||||||
|
* \ingroup blender */
|
||||||
|
|
||||||
/** \defgroup gpu GPU
|
/** \defgroup gpu GPU
|
||||||
* \ingroup blender
|
* \ingroup blender
|
||||||
@@ -101,11 +103,12 @@
|
|||||||
* merged in docs.
|
* merged in docs.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** \defgroup gui GUI */
|
/**
|
||||||
|
* \defgroup gui GUI
|
||||||
|
* \ingroup blender */
|
||||||
|
|
||||||
/** \defgroup wm Window Manager
|
/** \defgroup wm Window Manager
|
||||||
* \ingroup blender gui
|
* \ingroup gui */
|
||||||
*/
|
|
||||||
|
|
||||||
/* ================================ */
|
/* ================================ */
|
||||||
|
|
||||||
@@ -279,7 +282,8 @@
|
|||||||
* \ingroup gui
|
* \ingroup gui
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** \defgroup externformats External Formats */
|
/** \defgroup externformats External Formats
|
||||||
|
* \ingroup blender */
|
||||||
|
|
||||||
/** \defgroup collada COLLADA
|
/** \defgroup collada COLLADA
|
||||||
* \ingroup externformats
|
* \ingroup externformats
|
||||||
@@ -308,4 +312,7 @@
|
|||||||
/* ================================ */
|
/* ================================ */
|
||||||
|
|
||||||
/** \defgroup undoc Undocumented
|
/** \defgroup undoc Undocumented
|
||||||
* \brief Modules and libraries that are still undocumented, or lacking proper integration into the doxygen system, are marked in this group. */
|
*
|
||||||
|
* \brief Modules and libraries that are still undocumented,
|
||||||
|
* or lacking proper integration into the doxygen system, are marked in this group.
|
||||||
|
*/
|
||||||
|
@@ -61,7 +61,7 @@ def blender_extract_info(blender_bin: str) -> Dict[str, str]:
|
|||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
).stdout.decode(encoding="utf-8")
|
).stdout.decode(encoding="utf-8")
|
||||||
|
|
||||||
blender_version_ouput = subprocess.run(
|
blender_version_output = subprocess.run(
|
||||||
[blender_bin, "--version"],
|
[blender_bin, "--version"],
|
||||||
env=blender_env,
|
env=blender_env,
|
||||||
check=True,
|
check=True,
|
||||||
@@ -73,7 +73,7 @@ def blender_extract_info(blender_bin: str) -> Dict[str, str]:
|
|||||||
# check for each lines prefix to ensure these aren't included.
|
# check for each lines prefix to ensure these aren't included.
|
||||||
blender_version = ""
|
blender_version = ""
|
||||||
blender_date = ""
|
blender_date = ""
|
||||||
for l in blender_version_ouput.split("\n"):
|
for l in blender_version_output.split("\n"):
|
||||||
if l.startswith("Blender "):
|
if l.startswith("Blender "):
|
||||||
# Remove 'Blender' prefix.
|
# Remove 'Blender' prefix.
|
||||||
blender_version = l.split(" ", 1)[1].strip()
|
blender_version = l.split(" ", 1)[1].strip()
|
||||||
|
@@ -1103,6 +1103,7 @@ context_type_map = {
|
|||||||
"selectable_objects": ("Object", True),
|
"selectable_objects": ("Object", True),
|
||||||
"selected_asset_files": ("FileSelectEntry", True),
|
"selected_asset_files": ("FileSelectEntry", True),
|
||||||
"selected_bones": ("EditBone", True),
|
"selected_bones": ("EditBone", True),
|
||||||
|
"selected_editable_actions": ("Action", True),
|
||||||
"selected_editable_bones": ("EditBone", True),
|
"selected_editable_bones": ("EditBone", True),
|
||||||
"selected_editable_fcurves": ("FCurve", True),
|
"selected_editable_fcurves": ("FCurve", True),
|
||||||
"selected_editable_keyframes": ("Keyframe", True),
|
"selected_editable_keyframes": ("Keyframe", True),
|
||||||
@@ -1118,12 +1119,13 @@ context_type_map = {
|
|||||||
"selected_pose_bones": ("PoseBone", True),
|
"selected_pose_bones": ("PoseBone", True),
|
||||||
"selected_pose_bones_from_active_object": ("PoseBone", True),
|
"selected_pose_bones_from_active_object": ("PoseBone", True),
|
||||||
"selected_sequences": ("Sequence", True),
|
"selected_sequences": ("Sequence", True),
|
||||||
|
"selected_visible_actions": ("Action", True),
|
||||||
"selected_visible_fcurves": ("FCurve", True),
|
"selected_visible_fcurves": ("FCurve", True),
|
||||||
"sequences": ("Sequence", True),
|
"sequences": ("Sequence", True),
|
||||||
"soft_body": ("SoftBodyModifier", False),
|
"soft_body": ("SoftBodyModifier", False),
|
||||||
"speaker": ("Speaker", False),
|
"speaker": ("Speaker", False),
|
||||||
"texture": ("Texture", False),
|
"texture": ("Texture", False),
|
||||||
"texture_slot": ("MaterialTextureSlot", False),
|
"texture_slot": ("TextureSlot", False),
|
||||||
"texture_user": ("ID", False),
|
"texture_user": ("ID", False),
|
||||||
"texture_user_property": ("Property", False),
|
"texture_user_property": ("Property", False),
|
||||||
"ui_list": ("UIList", False),
|
"ui_list": ("UIList", False),
|
||||||
|
12
extern/hipew/README
vendored
Normal file
12
extern/hipew/README
vendored
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
The HIP Extension Wrangler Library (HIPEW) is a cross-platform open-source
|
||||||
|
C/C++ library to dynamically load the HIP library.
|
||||||
|
|
||||||
|
HIP (Heterogeneous-Compute Interface for Portability) is an API for C++
|
||||||
|
programming on AMD GPUs.
|
||||||
|
|
||||||
|
It is maintained as part of the Blender project, but included in extern/
|
||||||
|
for consistency with CUEW and CLEW libraries.
|
||||||
|
|
||||||
|
LICENSE
|
||||||
|
|
||||||
|
HIPEW is released under the Apache 2.0 license.
|
5
extern/hipew/README.blender
vendored
Normal file
5
extern/hipew/README.blender
vendored
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
Project: Blender
|
||||||
|
URL: https://git.blender.org/blender.git
|
||||||
|
License: Apache 2.0
|
||||||
|
Upstream version: N/A
|
||||||
|
Local modifications: None
|
10
extern/hipew/src/hipew.c
vendored
10
extern/hipew/src/hipew.c
vendored
@@ -219,17 +219,17 @@ static int hipewHasOldDriver(const char *hip_path) {
|
|||||||
DWORD verHandle = 0;
|
DWORD verHandle = 0;
|
||||||
DWORD verSize = GetFileVersionInfoSize(hip_path, &verHandle);
|
DWORD verSize = GetFileVersionInfoSize(hip_path, &verHandle);
|
||||||
int old_driver = 0;
|
int old_driver = 0;
|
||||||
if(verSize != 0) {
|
if (verSize != 0) {
|
||||||
LPSTR verData = (LPSTR)malloc(verSize);
|
LPSTR verData = (LPSTR)malloc(verSize);
|
||||||
if(GetFileVersionInfo(hip_path, verHandle, verSize, verData)) {
|
if (GetFileVersionInfo(hip_path, verHandle, verSize, verData)) {
|
||||||
LPBYTE lpBuffer = NULL;
|
LPBYTE lpBuffer = NULL;
|
||||||
UINT size = 0;
|
UINT size = 0;
|
||||||
if(VerQueryValue(verData, "\\", (VOID FAR * FAR *)&lpBuffer, &size)) {
|
if (VerQueryValue(verData, "\\", (VOID FAR * FAR *)&lpBuffer, &size)) {
|
||||||
if(size) {
|
if (size) {
|
||||||
VS_FIXEDFILEINFO *verInfo = (VS_FIXEDFILEINFO *)lpBuffer;
|
VS_FIXEDFILEINFO *verInfo = (VS_FIXEDFILEINFO *)lpBuffer;
|
||||||
/* Magic value from
|
/* Magic value from
|
||||||
* https://docs.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-vs_fixedfileinfo */
|
* https://docs.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-vs_fixedfileinfo */
|
||||||
if(verInfo->dwSignature == 0xfeef04bd) {
|
if (verInfo->dwSignature == 0xfeef04bd) {
|
||||||
unsigned int fileVersionLS0 = (verInfo->dwFileVersionLS >> 16) & 0xffff;
|
unsigned int fileVersionLS0 = (verInfo->dwFileVersionLS >> 16) & 0xffff;
|
||||||
unsigned int fileversionLS1 = (verInfo->dwFileVersionLS >> 0) & 0xffff;
|
unsigned int fileversionLS1 = (verInfo->dwFileVersionLS >> 0) & 0xffff;
|
||||||
/* Corresponds to versions older than AMD Radeon Pro 21.Q4. */
|
/* Corresponds to versions older than AMD Radeon Pro 21.Q4. */
|
||||||
|
2
extern/nanosvg/README.blender
vendored
2
extern/nanosvg/README.blender
vendored
@@ -1,7 +1,7 @@
|
|||||||
Project: NanoSVG
|
Project: NanoSVG
|
||||||
URL: https://github.com/memononen/nanosvg
|
URL: https://github.com/memononen/nanosvg
|
||||||
License: zlib
|
License: zlib
|
||||||
Upstream version:
|
Upstream version: 3cdd4a9d7886
|
||||||
Local modifications: Added some functionality to manage grease pencil layers
|
Local modifications: Added some functionality to manage grease pencil layers
|
||||||
|
|
||||||
Added a fix to SVG import arc and float errors (https://developer.blender.org/rB11dc674c78b49fc4e0b7c134c375b6c8b8eacbcc)
|
Added a fix to SVG import arc and float errors (https://developer.blender.org/rB11dc674c78b49fc4e0b7c134c375b6c8b8eacbcc)
|
||||||
|
@@ -45,7 +45,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/** \file
|
/** \file
|
||||||
* \ingroup Atomic
|
* \ingroup intern_atomic
|
||||||
*
|
*
|
||||||
* \brief Provides wrapper around system-specific atomic primitives,
|
* \brief Provides wrapper around system-specific atomic primitives,
|
||||||
* and some extensions (faked-atomic operations over float numbers).
|
* and some extensions (faked-atomic operations over float numbers).
|
||||||
|
@@ -44,6 +44,10 @@
|
|||||||
* The Original Code is: adapted from jemalloc.
|
* The Original Code is: adapted from jemalloc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/** \file
|
||||||
|
* \ingroup intern_atomic
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef __ATOMIC_OPS_EXT_H__
|
#ifndef __ATOMIC_OPS_EXT_H__
|
||||||
#define __ATOMIC_OPS_EXT_H__
|
#define __ATOMIC_OPS_EXT_H__
|
||||||
|
|
||||||
|
@@ -5,7 +5,7 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved.
|
* Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved.
|
||||||
* Copyright (C) 2009-2013 Facebook, Inc. All rights reserved.
|
* Copyright (C) 2009-2013 Facebook, Inc. All rights reserved.
|
||||||
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
* 1. Redistributions of source code must retain the above copyright notice(s),
|
* 1. Redistributions of source code must retain the above copyright notice(s),
|
||||||
@@ -13,7 +13,7 @@
|
|||||||
* 2. Redistributions in binary form must reproduce the above copyright notice(s),
|
* 2. Redistributions in binary form must reproduce the above copyright notice(s),
|
||||||
* this list of conditions and the following disclaimer in the documentation
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
* and/or other materials provided with the distribution.
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
|
||||||
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||||
@@ -26,6 +26,10 @@
|
|||||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/** \file
|
||||||
|
* \ingroup intern_atomic
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef __ATOMIC_OPS_MSVC_H__
|
#ifndef __ATOMIC_OPS_MSVC_H__
|
||||||
#define __ATOMIC_OPS_MSVC_H__
|
#define __ATOMIC_OPS_MSVC_H__
|
||||||
|
|
||||||
|
@@ -44,6 +44,10 @@
|
|||||||
* The Original Code is: adapted from jemalloc.
|
* The Original Code is: adapted from jemalloc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/** \file
|
||||||
|
* \ingroup intern_atomic
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef __ATOMIC_OPS_UNIX_H__
|
#ifndef __ATOMIC_OPS_UNIX_H__
|
||||||
#define __ATOMIC_OPS_UNIX_H__
|
#define __ATOMIC_OPS_UNIX_H__
|
||||||
|
|
||||||
|
@@ -44,6 +44,10 @@
|
|||||||
* The Original Code is: adapted from jemalloc.
|
* The Original Code is: adapted from jemalloc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/** \file
|
||||||
|
* \ingroup intern_atomic
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef __ATOMIC_OPS_UTILS_H__
|
#ifndef __ATOMIC_OPS_UTILS_H__
|
||||||
#define __ATOMIC_OPS_UTILS_H__
|
#define __ATOMIC_OPS_UTILS_H__
|
||||||
|
|
||||||
|
@@ -14,11 +14,8 @@
|
|||||||
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __CLG_LOG_H__
|
|
||||||
#define __CLG_LOG_H__
|
|
||||||
|
|
||||||
/** \file
|
/** \file
|
||||||
* \ingroup clog
|
* \ingroup intern_clog
|
||||||
*
|
*
|
||||||
* C Logging Library (clog)
|
* C Logging Library (clog)
|
||||||
* ========================
|
* ========================
|
||||||
@@ -68,6 +65,9 @@
|
|||||||
* - 4+: May be used for more details than 3, should be avoided but not prevented.
|
* - 4+: May be used for more details than 3, should be avoided but not prevented.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifndef __CLG_LOG_H__
|
||||||
|
#define __CLG_LOG_H__
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif /* __cplusplus */
|
#endif /* __cplusplus */
|
||||||
|
@@ -15,7 +15,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/** \file
|
/** \file
|
||||||
* \ingroup clog
|
* \ingroup intern_clog
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
@@ -388,7 +388,7 @@ static void clg_ctx_fatal_action(CLogContext *ctx)
|
|||||||
|
|
||||||
static void clg_ctx_backtrace(CLogContext *ctx)
|
static void clg_ctx_backtrace(CLogContext *ctx)
|
||||||
{
|
{
|
||||||
/* Note: we avoid writing to 'FILE', for back-trace we make an exception,
|
/* NOTE: we avoid writing to 'FILE', for back-trace we make an exception,
|
||||||
* if necessary we could have a version of the callback that writes to file
|
* if necessary we could have a version of the callback that writes to file
|
||||||
* descriptor all at once. */
|
* descriptor all at once. */
|
||||||
ctx->callbacks.backtrace_fn(ctx->output_file);
|
ctx->callbacks.backtrace_fn(ctx->output_file);
|
||||||
|
@@ -40,6 +40,7 @@ set(SRC
|
|||||||
object_cull.cpp
|
object_cull.cpp
|
||||||
output_driver.cpp
|
output_driver.cpp
|
||||||
particles.cpp
|
particles.cpp
|
||||||
|
pointcloud.cpp
|
||||||
curves.cpp
|
curves.cpp
|
||||||
logging.cpp
|
logging.cpp
|
||||||
python.cpp
|
python.cpp
|
||||||
@@ -87,6 +88,7 @@ endif()
|
|||||||
|
|
||||||
set(ADDON_FILES
|
set(ADDON_FILES
|
||||||
addon/__init__.py
|
addon/__init__.py
|
||||||
|
addon/camera.py
|
||||||
addon/engine.py
|
addon/engine.py
|
||||||
addon/operators.py
|
addon/operators.py
|
||||||
addon/osl.py
|
addon/osl.py
|
||||||
@@ -101,6 +103,11 @@ add_definitions(${GL_DEFINITIONS})
|
|||||||
if(WITH_CYCLES_DEVICE_HIP)
|
if(WITH_CYCLES_DEVICE_HIP)
|
||||||
add_definitions(-DWITH_HIP)
|
add_definitions(-DWITH_HIP)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(WITH_CYCLES_DEVICE_METAL)
|
||||||
|
add_definitions(-DWITH_METAL)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(WITH_MOD_FLUID)
|
if(WITH_MOD_FLUID)
|
||||||
add_definitions(-DWITH_FLUID)
|
add_definitions(-DWITH_FLUID)
|
||||||
endif()
|
endif()
|
||||||
|
84
intern/cycles/blender/addon/camera.py
Normal file
84
intern/cycles/blender/addon/camera.py
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2011-2021 Blender Foundation
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
# <pep8 compliant>
|
||||||
|
|
||||||
|
# Fit to match default projective camera with focal_length 50 and sensor_width 36.
|
||||||
|
default_fisheye_polynomial = [-1.1735143712967577e-05,
|
||||||
|
-0.019988736953434998,
|
||||||
|
-3.3525322965709175e-06,
|
||||||
|
3.099275275886036e-06,
|
||||||
|
-2.6064646454854524e-08]
|
||||||
|
|
||||||
|
# Utilities to generate lens polynomials to match built-in camera types, only here
|
||||||
|
# for reference at the moment, not used by the code.
|
||||||
|
def create_grid(sensor_height, sensor_width):
|
||||||
|
import numpy as np
|
||||||
|
if sensor_height is None:
|
||||||
|
sensor_height = sensor_width / (16 / 9) # Default aspect ration 16:9
|
||||||
|
uu, vv = np.meshgrid(np.linspace(0, 1, 100), np.linspace(0, 1, 100))
|
||||||
|
uu = (uu - 0.5) * sensor_width
|
||||||
|
vv = (vv - 0.5) * sensor_height
|
||||||
|
rr = np.sqrt(uu ** 2 + vv ** 2)
|
||||||
|
return rr
|
||||||
|
|
||||||
|
|
||||||
|
def fisheye_lens_polynomial_from_projective(focal_length=50, sensor_width=36, sensor_height=None):
|
||||||
|
import numpy as np
|
||||||
|
rr = create_grid(sensor_height, sensor_width)
|
||||||
|
polynomial = np.polyfit(rr.flat, (-np.arctan(rr / focal_length)).flat, 4)
|
||||||
|
return list(reversed(polynomial))
|
||||||
|
|
||||||
|
|
||||||
|
def fisheye_lens_polynomial_from_projective_fov(fov, sensor_width=36, sensor_height=None):
|
||||||
|
import numpy as np
|
||||||
|
f = sensor_width / 2 / np.tan(fov / 2)
|
||||||
|
return fisheye_lens_polynomial_from_projective(f, sensor_width, sensor_height)
|
||||||
|
|
||||||
|
|
||||||
|
def fisheye_lens_polynomial_from_equisolid(lens=10.5, sensor_width=36, sensor_height=None):
|
||||||
|
import numpy as np
|
||||||
|
rr = create_grid(sensor_height, sensor_width)
|
||||||
|
x = rr.reshape(-1)
|
||||||
|
x = np.stack([x**i for i in [1, 2, 3, 4]])
|
||||||
|
y = (-2 * np.arcsin(rr / (2 * lens))).reshape(-1)
|
||||||
|
polynomial = np.linalg.lstsq(x.T, y.T, rcond=None)[0]
|
||||||
|
return [0] + list(polynomial)
|
||||||
|
|
||||||
|
|
||||||
|
def fisheye_lens_polynomial_from_equidistant(fov=180, sensor_width=36, sensor_height=None):
|
||||||
|
import numpy as np
|
||||||
|
return [0, -np.radians(fov) / sensor_width, 0, 0, 0]
|
||||||
|
|
||||||
|
|
||||||
|
def fisheye_lens_polynomial_from_distorted_projective_polynomial(k1, k2, k3, focal_length=50, sensor_width=36, sensor_height=None):
|
||||||
|
import numpy as np
|
||||||
|
rr = create_grid(sensor_height, sensor_width)
|
||||||
|
r2 = (rr / focal_length) ** 2
|
||||||
|
r4 = r2 * r2
|
||||||
|
r6 = r4 * r2
|
||||||
|
r_coeff = 1 + k1 * r2 + k2 * r4 + k3 * r6
|
||||||
|
polynomial = np.polyfit(rr.flat, (-np.arctan(rr / focal_length * r_coeff)).flat, 4)
|
||||||
|
return list(reversed(polynomial))
|
||||||
|
|
||||||
|
def fisheye_lens_polynomial_from_distorted_projective_divisions(k1, k2, focal_length=50, sensor_width=36, sensor_height=None):
|
||||||
|
import numpy as np
|
||||||
|
rr = create_grid(sensor_height, sensor_width)
|
||||||
|
r2 = (rr / focal_length) ** 2
|
||||||
|
r4 = r2 * r2
|
||||||
|
r_coeff = 1 + k1 * r2 + k2 * r4
|
||||||
|
polynomial = np.polyfit(rr.flat, (-np.arctan(rr / focal_length / r_coeff)).flat, 4)
|
||||||
|
return list(reversed(polynomial))
|
@@ -28,7 +28,7 @@ def _configure_argument_parser():
|
|||||||
action='store_true')
|
action='store_true')
|
||||||
parser.add_argument("--cycles-device",
|
parser.add_argument("--cycles-device",
|
||||||
help="Set the device to use for Cycles, overriding user preferences and the scene setting."
|
help="Set the device to use for Cycles, overriding user preferences and the scene setting."
|
||||||
"Valid options are 'CPU', 'CUDA', 'OPTIX', or 'HIP'"
|
"Valid options are 'CPU', 'CUDA', 'OPTIX', 'HIP' or 'METAL'."
|
||||||
"Additionally, you can append '+CPU' to any GPU type for hybrid rendering.",
|
"Additionally, you can append '+CPU' to any GPU type for hybrid rendering.",
|
||||||
default=None)
|
default=None)
|
||||||
return parser
|
return parser
|
||||||
|
@@ -33,6 +33,7 @@ from math import pi
|
|||||||
# enums
|
# enums
|
||||||
|
|
||||||
from . import engine
|
from . import engine
|
||||||
|
from . import camera
|
||||||
|
|
||||||
enum_devices = (
|
enum_devices = (
|
||||||
('CPU', "CPU", "Use CPU for rendering"),
|
('CPU', "CPU", "Use CPU for rendering"),
|
||||||
@@ -72,6 +73,8 @@ enum_panorama_types = (
|
|||||||
('FISHEYE_EQUISOLID', "Fisheye Equisolid",
|
('FISHEYE_EQUISOLID', "Fisheye Equisolid",
|
||||||
"Similar to most fisheye modern lens, takes sensor dimensions into consideration"),
|
"Similar to most fisheye modern lens, takes sensor dimensions into consideration"),
|
||||||
('MIRRORBALL', "Mirror Ball", "Uses the mirror ball mapping"),
|
('MIRRORBALL', "Mirror Ball", "Uses the mirror ball mapping"),
|
||||||
|
('FISHEYE_LENS_POLYNOMIAL', "Fisheye Lens Polynomial",
|
||||||
|
"Defines the lens projection as polynomial to allow real world camera lenses to be mimicked."),
|
||||||
)
|
)
|
||||||
|
|
||||||
enum_curve_shape = (
|
enum_curve_shape = (
|
||||||
@@ -111,7 +114,8 @@ enum_device_type = (
|
|||||||
('CPU', "CPU", "CPU", 0),
|
('CPU', "CPU", "CPU", 0),
|
||||||
('CUDA', "CUDA", "CUDA", 1),
|
('CUDA', "CUDA", "CUDA", 1),
|
||||||
('OPTIX', "OptiX", "OptiX", 3),
|
('OPTIX', "OptiX", "OptiX", 3),
|
||||||
("HIP", "HIP", "HIP", 4)
|
('HIP', "HIP", "HIP", 4),
|
||||||
|
('METAL', "Metal", "Metal", 5)
|
||||||
)
|
)
|
||||||
|
|
||||||
enum_texture_limit = (
|
enum_texture_limit = (
|
||||||
@@ -429,7 +433,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
|||||||
)
|
)
|
||||||
|
|
||||||
direct_light_sampling_type: EnumProperty(
|
direct_light_sampling_type: EnumProperty(
|
||||||
name="Direct Light Sampling Type",
|
name="Direct Light Sampling",
|
||||||
description="The type of strategy used for sampling direct light contributions",
|
description="The type of strategy used for sampling direct light contributions",
|
||||||
items=enum_direct_light_sampling_type,
|
items=enum_direct_light_sampling_type,
|
||||||
default='MULTIPLE_IMPORTANCE_SAMPLING',
|
default='MULTIPLE_IMPORTANCE_SAMPLING',
|
||||||
@@ -790,7 +794,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
|
|||||||
)
|
)
|
||||||
|
|
||||||
use_auto_tile: BoolProperty(
|
use_auto_tile: BoolProperty(
|
||||||
name="Using Tiling",
|
name="Use Tiling",
|
||||||
description="Render high resolution images in tiles to reduce memory usage, using the specified tile size. Tiles are cached to disk while rendering to save memory",
|
description="Render high resolution images in tiles to reduce memory usage, using the specified tile size. Tiles are cached to disk while rendering to save memory",
|
||||||
default=True,
|
default=True,
|
||||||
)
|
)
|
||||||
@@ -890,6 +894,32 @@ class CyclesCameraSettings(bpy.types.PropertyGroup):
|
|||||||
default=pi,
|
default=pi,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
fisheye_polynomial_k0: FloatProperty(
|
||||||
|
name="Fisheye Polynomial K0",
|
||||||
|
description="Coefficient K0 of the lens polinomial",
|
||||||
|
default=camera.default_fisheye_polynomial[0], precision=6, step=0.1, subtype='ANGLE',
|
||||||
|
)
|
||||||
|
fisheye_polynomial_k1: FloatProperty(
|
||||||
|
name="Fisheye Polynomial K1",
|
||||||
|
description="Coefficient K1 of the lens polinomial",
|
||||||
|
default=camera.default_fisheye_polynomial[1], precision=6, step=0.1, subtype='ANGLE',
|
||||||
|
)
|
||||||
|
fisheye_polynomial_k2: FloatProperty(
|
||||||
|
name="Fisheye Polynomial K2",
|
||||||
|
description="Coefficient K2 of the lens polinomial",
|
||||||
|
default=camera.default_fisheye_polynomial[2], precision=6, step=0.1, subtype='ANGLE',
|
||||||
|
)
|
||||||
|
fisheye_polynomial_k3: FloatProperty(
|
||||||
|
name="Fisheye Polynomial K3",
|
||||||
|
description="Coefficient K3 of the lens polinomial",
|
||||||
|
default=camera.default_fisheye_polynomial[3], precision=6, step=0.1, subtype='ANGLE',
|
||||||
|
)
|
||||||
|
fisheye_polynomial_k4: FloatProperty(
|
||||||
|
name="Fisheye Polynomial K4",
|
||||||
|
description="Coefficient K4 of the lens polinomial",
|
||||||
|
default=camera.default_fisheye_polynomial[4], precision=6, step=0.1, subtype='ANGLE',
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def register(cls):
|
def register(cls):
|
||||||
bpy.types.Camera.cycles = PointerProperty(
|
bpy.types.Camera.cycles = PointerProperty(
|
||||||
@@ -1312,8 +1342,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
|||||||
|
|
||||||
def get_device_types(self, context):
|
def get_device_types(self, context):
|
||||||
import _cycles
|
import _cycles
|
||||||
has_cuda, has_optix, has_hip = _cycles.get_device_types()
|
has_cuda, has_optix, has_hip, has_metal = _cycles.get_device_types()
|
||||||
|
|
||||||
list = [('NONE', "None", "Don't use compute device", 0)]
|
list = [('NONE', "None", "Don't use compute device", 0)]
|
||||||
if has_cuda:
|
if has_cuda:
|
||||||
list.append(('CUDA', "CUDA", "Use CUDA for GPU acceleration", 1))
|
list.append(('CUDA', "CUDA", "Use CUDA for GPU acceleration", 1))
|
||||||
@@ -1321,6 +1350,8 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
|||||||
list.append(('OPTIX', "OptiX", "Use OptiX for GPU acceleration", 3))
|
list.append(('OPTIX', "OptiX", "Use OptiX for GPU acceleration", 3))
|
||||||
if has_hip:
|
if has_hip:
|
||||||
list.append(('HIP', "HIP", "Use HIP for GPU acceleration", 4))
|
list.append(('HIP', "HIP", "Use HIP for GPU acceleration", 4))
|
||||||
|
if has_metal:
|
||||||
|
list.append(('METAL', "Metal", "Use Metal for GPU acceleration", 5))
|
||||||
|
|
||||||
return list
|
return list
|
||||||
|
|
||||||
@@ -1346,7 +1377,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
|||||||
|
|
||||||
def update_device_entries(self, device_list):
|
def update_device_entries(self, device_list):
|
||||||
for device in device_list:
|
for device in device_list:
|
||||||
if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP'}:
|
if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP', 'METAL'}:
|
||||||
continue
|
continue
|
||||||
# Try to find existing Device entry
|
# Try to find existing Device entry
|
||||||
entry = self.find_existing_device_entry(device)
|
entry = self.find_existing_device_entry(device)
|
||||||
@@ -1390,7 +1421,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
|||||||
import _cycles
|
import _cycles
|
||||||
# Ensure `self.devices` is not re-allocated when the second call to
|
# Ensure `self.devices` is not re-allocated when the second call to
|
||||||
# get_devices_for_type is made, freeing items from the first list.
|
# get_devices_for_type is made, freeing items from the first list.
|
||||||
for device_type in ('CUDA', 'OPTIX', 'HIP'):
|
for device_type in ('CUDA', 'OPTIX', 'HIP', 'METAL'):
|
||||||
self.update_device_entries(_cycles.available_devices(device_type))
|
self.update_device_entries(_cycles.available_devices(device_type))
|
||||||
|
|
||||||
# Deprecated: use refresh_devices instead.
|
# Deprecated: use refresh_devices instead.
|
||||||
@@ -1442,6 +1473,8 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
|||||||
col.label(text="Requires discrete AMD GPU with RDNA architecture", icon='BLANK1')
|
col.label(text="Requires discrete AMD GPU with RDNA architecture", icon='BLANK1')
|
||||||
if sys.platform[:3] == "win":
|
if sys.platform[:3] == "win":
|
||||||
col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
|
col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
|
||||||
|
elif device_type == 'METAL':
|
||||||
|
col.label(text="Requires Apple Silicon and macOS 12.0 or newer", icon='BLANK1')
|
||||||
return
|
return
|
||||||
|
|
||||||
for device in devices:
|
for device in devices:
|
||||||
|
@@ -97,6 +97,11 @@ def use_cpu(context):
|
|||||||
return (get_device_type(context) == 'NONE' or cscene.device == 'CPU')
|
return (get_device_type(context) == 'NONE' or cscene.device == 'CPU')
|
||||||
|
|
||||||
|
|
||||||
|
def use_metal(context):
|
||||||
|
cscene = context.scene.cycles
|
||||||
|
|
||||||
|
return (get_device_type(context) == 'METAL' and cscene.device == 'GPU')
|
||||||
|
|
||||||
def use_cuda(context):
|
def use_cuda(context):
|
||||||
cscene = context.scene.cycles
|
cscene = context.scene.cycles
|
||||||
|
|
||||||
@@ -1015,7 +1020,7 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
|
|||||||
def poll(cls, context):
|
def poll(cls, context):
|
||||||
ob = context.object
|
ob = context.object
|
||||||
if CyclesButtonsPanel.poll(context) and ob:
|
if CyclesButtonsPanel.poll(context) and ob:
|
||||||
if ob.type in {'MESH', 'CURVE', 'CURVE', 'SURFACE', 'FONT', 'META', 'CAMERA'}:
|
if ob.type in {'MESH', 'CURVE', 'CURVE', 'SURFACE', 'FONT', 'META', 'CAMERA', 'HAIR', 'POINTCLOUD'}:
|
||||||
return True
|
return True
|
||||||
if ob.instance_type == 'COLLECTION' and ob.instance_collection:
|
if ob.instance_type == 'COLLECTION' and ob.instance_collection:
|
||||||
return True
|
return True
|
||||||
@@ -1819,37 +1824,38 @@ class CYCLES_RENDER_PT_debug(CyclesDebugButtonsPanel, Panel):
|
|||||||
|
|
||||||
def draw(self, context):
|
def draw(self, context):
|
||||||
layout = self.layout
|
layout = self.layout
|
||||||
|
layout.use_property_split = True
|
||||||
|
layout.use_property_decorate = False # No animation.
|
||||||
|
|
||||||
scene = context.scene
|
scene = context.scene
|
||||||
cscene = scene.cycles
|
cscene = scene.cycles
|
||||||
|
|
||||||
col = layout.column()
|
col = layout.column(heading="CPU")
|
||||||
|
|
||||||
col.label(text="CPU Flags:")
|
|
||||||
row = col.row(align=True)
|
row = col.row(align=True)
|
||||||
row.prop(cscene, "debug_use_cpu_sse2", toggle=True)
|
row.prop(cscene, "debug_use_cpu_sse2", toggle=True)
|
||||||
row.prop(cscene, "debug_use_cpu_sse3", toggle=True)
|
row.prop(cscene, "debug_use_cpu_sse3", toggle=True)
|
||||||
row.prop(cscene, "debug_use_cpu_sse41", toggle=True)
|
row.prop(cscene, "debug_use_cpu_sse41", toggle=True)
|
||||||
row.prop(cscene, "debug_use_cpu_avx", toggle=True)
|
row.prop(cscene, "debug_use_cpu_avx", toggle=True)
|
||||||
row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
|
row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
|
||||||
col.prop(cscene, "debug_bvh_layout")
|
col.prop(cscene, "debug_bvh_layout", text="BVH")
|
||||||
|
|
||||||
col.separator()
|
col.separator()
|
||||||
|
|
||||||
col = layout.column()
|
col = layout.column(heading="CUDA")
|
||||||
col.label(text="CUDA Flags:")
|
|
||||||
col.prop(cscene, "debug_use_cuda_adaptive_compile")
|
col.prop(cscene, "debug_use_cuda_adaptive_compile")
|
||||||
|
col = layout.column(heading="OptiX")
|
||||||
|
col.prop(cscene, "debug_use_optix_debug", text="Module Debug")
|
||||||
|
|
||||||
col.separator()
|
col.separator()
|
||||||
|
|
||||||
col = layout.column()
|
col.prop(cscene, "debug_bvh_type", text="Viewport BVH")
|
||||||
col.label(text="OptiX Flags:")
|
|
||||||
col.prop(cscene, "debug_use_optix_debug")
|
|
||||||
|
|
||||||
col.separator()
|
col.separator()
|
||||||
|
|
||||||
col = layout.column()
|
import _cycles
|
||||||
col.prop(cscene, "debug_bvh_type")
|
if _cycles.with_debug:
|
||||||
|
col.prop(cscene, "direct_light_sampling_type")
|
||||||
|
|
||||||
|
|
||||||
class CYCLES_RENDER_PT_simplify(CyclesButtonsPanel, Panel):
|
class CYCLES_RENDER_PT_simplify(CyclesButtonsPanel, Panel):
|
||||||
|
@@ -69,6 +69,12 @@ struct BlenderCamera {
|
|||||||
float pole_merge_angle_from;
|
float pole_merge_angle_from;
|
||||||
float pole_merge_angle_to;
|
float pole_merge_angle_to;
|
||||||
|
|
||||||
|
float fisheye_polynomial_k0;
|
||||||
|
float fisheye_polynomial_k1;
|
||||||
|
float fisheye_polynomial_k2;
|
||||||
|
float fisheye_polynomial_k3;
|
||||||
|
float fisheye_polynomial_k4;
|
||||||
|
|
||||||
enum { AUTO, HORIZONTAL, VERTICAL } sensor_fit;
|
enum { AUTO, HORIZONTAL, VERTICAL } sensor_fit;
|
||||||
float sensor_width;
|
float sensor_width;
|
||||||
float sensor_height;
|
float sensor_height;
|
||||||
@@ -200,6 +206,12 @@ static void blender_camera_from_object(BlenderCamera *bcam,
|
|||||||
bcam->longitude_min = RNA_float_get(&ccamera, "longitude_min");
|
bcam->longitude_min = RNA_float_get(&ccamera, "longitude_min");
|
||||||
bcam->longitude_max = RNA_float_get(&ccamera, "longitude_max");
|
bcam->longitude_max = RNA_float_get(&ccamera, "longitude_max");
|
||||||
|
|
||||||
|
bcam->fisheye_polynomial_k0 = RNA_float_get(&ccamera, "fisheye_polynomial_k0");
|
||||||
|
bcam->fisheye_polynomial_k1 = RNA_float_get(&ccamera, "fisheye_polynomial_k1");
|
||||||
|
bcam->fisheye_polynomial_k2 = RNA_float_get(&ccamera, "fisheye_polynomial_k2");
|
||||||
|
bcam->fisheye_polynomial_k3 = RNA_float_get(&ccamera, "fisheye_polynomial_k3");
|
||||||
|
bcam->fisheye_polynomial_k4 = RNA_float_get(&ccamera, "fisheye_polynomial_k4");
|
||||||
|
|
||||||
bcam->interocular_distance = b_camera.stereo().interocular_distance();
|
bcam->interocular_distance = b_camera.stereo().interocular_distance();
|
||||||
if (b_camera.stereo().convergence_mode() == BL::CameraStereoData::convergence_mode_PARALLEL) {
|
if (b_camera.stereo().convergence_mode() == BL::CameraStereoData::convergence_mode_PARALLEL) {
|
||||||
bcam->convergence_distance = FLT_MAX;
|
bcam->convergence_distance = FLT_MAX;
|
||||||
@@ -422,7 +434,8 @@ static void blender_camera_sync(Camera *cam,
|
|||||||
cam->set_full_height(height);
|
cam->set_full_height(height);
|
||||||
|
|
||||||
/* panorama sensor */
|
/* panorama sensor */
|
||||||
if (bcam->type == CAMERA_PANORAMA && bcam->panorama_type == PANORAMA_FISHEYE_EQUISOLID) {
|
if (bcam->type == CAMERA_PANORAMA && (bcam->panorama_type == PANORAMA_FISHEYE_EQUISOLID ||
|
||||||
|
bcam->panorama_type == PANORAMA_FISHEYE_LENS_POLYNOMIAL)) {
|
||||||
float fit_xratio = (float)bcam->render_width * bcam->pixelaspect.x;
|
float fit_xratio = (float)bcam->render_width * bcam->pixelaspect.x;
|
||||||
float fit_yratio = (float)bcam->render_height * bcam->pixelaspect.y;
|
float fit_yratio = (float)bcam->render_height * bcam->pixelaspect.y;
|
||||||
bool horizontal_fit;
|
bool horizontal_fit;
|
||||||
@@ -465,6 +478,12 @@ static void blender_camera_sync(Camera *cam,
|
|||||||
cam->set_latitude_min(bcam->latitude_min);
|
cam->set_latitude_min(bcam->latitude_min);
|
||||||
cam->set_latitude_max(bcam->latitude_max);
|
cam->set_latitude_max(bcam->latitude_max);
|
||||||
|
|
||||||
|
cam->set_fisheye_polynomial_k0(bcam->fisheye_polynomial_k0);
|
||||||
|
cam->set_fisheye_polynomial_k1(bcam->fisheye_polynomial_k1);
|
||||||
|
cam->set_fisheye_polynomial_k2(bcam->fisheye_polynomial_k2);
|
||||||
|
cam->set_fisheye_polynomial_k3(bcam->fisheye_polynomial_k3);
|
||||||
|
cam->set_fisheye_polynomial_k4(bcam->fisheye_polynomial_k4);
|
||||||
|
|
||||||
cam->set_longitude_min(bcam->longitude_min);
|
cam->set_longitude_min(bcam->longitude_min);
|
||||||
cam->set_longitude_max(bcam->longitude_max);
|
cam->set_longitude_max(bcam->longitude_max);
|
||||||
|
|
||||||
|
@@ -819,11 +819,14 @@ void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, H
|
|||||||
new_hair.set_used_shaders(used_shaders);
|
new_hair.set_used_shaders(used_shaders);
|
||||||
|
|
||||||
if (view_layer.use_hair) {
|
if (view_layer.use_hair) {
|
||||||
|
#ifdef WITH_HAIR_NODES
|
||||||
if (b_ob_info.object_data.is_a(&RNA_Hair)) {
|
if (b_ob_info.object_data.is_a(&RNA_Hair)) {
|
||||||
/* Hair object. */
|
/* Hair object. */
|
||||||
sync_hair(&new_hair, b_ob_info, false);
|
sync_hair(&new_hair, b_ob_info, false);
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
/* Particle hair. */
|
/* Particle hair. */
|
||||||
bool need_undeformed = new_hair.need_attribute(scene, ATTR_STD_GENERATED);
|
bool need_undeformed = new_hair.need_attribute(scene, ATTR_STD_GENERATED);
|
||||||
BL::Mesh b_mesh = object_to_mesh(
|
BL::Mesh b_mesh = object_to_mesh(
|
||||||
@@ -870,12 +873,15 @@ void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph,
|
|||||||
|
|
||||||
/* Export deformed coordinates. */
|
/* Export deformed coordinates. */
|
||||||
if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
|
if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
|
||||||
|
#ifdef WITH_HAIR_NODES
|
||||||
if (b_ob_info.object_data.is_a(&RNA_Hair)) {
|
if (b_ob_info.object_data.is_a(&RNA_Hair)) {
|
||||||
/* Hair object. */
|
/* Hair object. */
|
||||||
sync_hair(hair, b_ob_info, true, motion_step);
|
sync_hair(hair, b_ob_info, true, motion_step);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
/* Particle hair. */
|
/* Particle hair. */
|
||||||
BL::Mesh b_mesh = object_to_mesh(
|
BL::Mesh b_mesh = object_to_mesh(
|
||||||
b_data, b_ob_info, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
|
b_data, b_ob_info, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
|
||||||
|
@@ -27,6 +27,7 @@ enum ComputeDevice {
|
|||||||
COMPUTE_DEVICE_CUDA = 1,
|
COMPUTE_DEVICE_CUDA = 1,
|
||||||
COMPUTE_DEVICE_OPTIX = 3,
|
COMPUTE_DEVICE_OPTIX = 3,
|
||||||
COMPUTE_DEVICE_HIP = 4,
|
COMPUTE_DEVICE_HIP = 4,
|
||||||
|
COMPUTE_DEVICE_METAL = 5,
|
||||||
|
|
||||||
COMPUTE_DEVICE_NUM
|
COMPUTE_DEVICE_NUM
|
||||||
};
|
};
|
||||||
@@ -85,6 +86,9 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
|
|||||||
else if (compute_device == COMPUTE_DEVICE_HIP) {
|
else if (compute_device == COMPUTE_DEVICE_HIP) {
|
||||||
mask |= DEVICE_MASK_HIP;
|
mask |= DEVICE_MASK_HIP;
|
||||||
}
|
}
|
||||||
|
else if (compute_device == COMPUTE_DEVICE_METAL) {
|
||||||
|
mask |= DEVICE_MASK_METAL;
|
||||||
|
}
|
||||||
vector<DeviceInfo> devices = Device::available_devices(mask);
|
vector<DeviceInfo> devices = Device::available_devices(mask);
|
||||||
|
|
||||||
/* Match device preferences and available devices. */
|
/* Match device preferences and available devices. */
|
||||||
|
@@ -19,6 +19,7 @@
|
|||||||
#include "scene/hair.h"
|
#include "scene/hair.h"
|
||||||
#include "scene/mesh.h"
|
#include "scene/mesh.h"
|
||||||
#include "scene/object.h"
|
#include "scene/object.h"
|
||||||
|
#include "scene/pointcloud.h"
|
||||||
#include "scene/volume.h"
|
#include "scene/volume.h"
|
||||||
|
|
||||||
#include "blender/sync.h"
|
#include "blender/sync.h"
|
||||||
@@ -31,10 +32,18 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
static Geometry::Type determine_geom_type(BObjectInfo &b_ob_info, bool use_particle_hair)
|
static Geometry::Type determine_geom_type(BObjectInfo &b_ob_info, bool use_particle_hair)
|
||||||
{
|
{
|
||||||
|
#ifdef WITH_HAIR_NODES
|
||||||
if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
|
if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
|
||||||
|
#else
|
||||||
|
if (use_particle_hair) {
|
||||||
|
#endif
|
||||||
return Geometry::HAIR;
|
return Geometry::HAIR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (b_ob_info.object_data.is_a(&RNA_PointCloud)) {
|
||||||
|
return Geometry::POINTCLOUD;
|
||||||
|
}
|
||||||
|
|
||||||
if (b_ob_info.object_data.is_a(&RNA_Volume) ||
|
if (b_ob_info.object_data.is_a(&RNA_Volume) ||
|
||||||
(b_ob_info.object_data == b_ob_info.real_object.data() &&
|
(b_ob_info.object_data == b_ob_info.real_object.data() &&
|
||||||
object_fluid_gas_domain_find(b_ob_info.real_object))) {
|
object_fluid_gas_domain_find(b_ob_info.real_object))) {
|
||||||
@@ -107,6 +116,9 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
|
|||||||
else if (geom_type == Geometry::VOLUME) {
|
else if (geom_type == Geometry::VOLUME) {
|
||||||
geom = scene->create_node<Volume>();
|
geom = scene->create_node<Volume>();
|
||||||
}
|
}
|
||||||
|
else if (geom_type == Geometry::POINTCLOUD) {
|
||||||
|
geom = scene->create_node<PointCloud>();
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
geom = scene->create_node<Mesh>();
|
geom = scene->create_node<Mesh>();
|
||||||
}
|
}
|
||||||
@@ -166,6 +178,10 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
|
|||||||
Volume *volume = static_cast<Volume *>(geom);
|
Volume *volume = static_cast<Volume *>(geom);
|
||||||
sync_volume(b_ob_info, volume);
|
sync_volume(b_ob_info, volume);
|
||||||
}
|
}
|
||||||
|
else if (geom_type == Geometry::POINTCLOUD) {
|
||||||
|
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||||
|
sync_pointcloud(pointcloud, b_ob_info);
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
Mesh *mesh = static_cast<Mesh *>(geom);
|
Mesh *mesh = static_cast<Mesh *>(geom);
|
||||||
sync_mesh(b_depsgraph, b_ob_info, mesh);
|
sync_mesh(b_depsgraph, b_ob_info, mesh);
|
||||||
@@ -215,7 +231,11 @@ void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
|
|||||||
if (progress.get_cancel())
|
if (progress.get_cancel())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
#ifdef WITH_HAIR_NODES
|
||||||
if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
|
if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
|
||||||
|
#else
|
||||||
|
if (use_particle_hair) {
|
||||||
|
#endif
|
||||||
Hair *hair = static_cast<Hair *>(geom);
|
Hair *hair = static_cast<Hair *>(geom);
|
||||||
sync_hair_motion(b_depsgraph, b_ob_info, hair, motion_step);
|
sync_hair_motion(b_depsgraph, b_ob_info, hair, motion_step);
|
||||||
}
|
}
|
||||||
@@ -223,6 +243,10 @@ void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
|
|||||||
object_fluid_gas_domain_find(b_ob_info.real_object)) {
|
object_fluid_gas_domain_find(b_ob_info.real_object)) {
|
||||||
/* No volume motion blur support yet. */
|
/* No volume motion blur support yet. */
|
||||||
}
|
}
|
||||||
|
else if (b_ob_info.object_data.is_a(&RNA_PointCloud)) {
|
||||||
|
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||||
|
sync_pointcloud_motion(pointcloud, b_ob_info, motion_step);
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
Mesh *mesh = static_cast<Mesh *>(geom);
|
Mesh *mesh = static_cast<Mesh *>(geom);
|
||||||
sync_mesh_motion(b_depsgraph, b_ob_info, mesh, motion_step);
|
sync_mesh_motion(b_depsgraph, b_ob_info, mesh, motion_step);
|
||||||
|
@@ -24,8 +24,14 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
/* Packed Images */
|
/* Packed Images */
|
||||||
|
|
||||||
BlenderImageLoader::BlenderImageLoader(BL::Image b_image, int frame)
|
BlenderImageLoader::BlenderImageLoader(BL::Image b_image,
|
||||||
: b_image(b_image), frame(frame), free_cache(!b_image.has_data())
|
const int frame,
|
||||||
|
const bool is_preview_render)
|
||||||
|
: b_image(b_image),
|
||||||
|
frame(frame),
|
||||||
|
/* Don't free cache for preview render to avoid race condition from T93560, to be fixed
|
||||||
|
properly later as we are close to release. */
|
||||||
|
free_cache(!is_preview_render && !b_image.has_data())
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
class BlenderImageLoader : public ImageLoader {
|
class BlenderImageLoader : public ImageLoader {
|
||||||
public:
|
public:
|
||||||
BlenderImageLoader(BL::Image b_image, int frame);
|
BlenderImageLoader(BL::Image b_image, const int frame, const bool is_preview_render);
|
||||||
|
|
||||||
bool load_metadata(const ImageDeviceFeatures &features, ImageMetaData &metadata) override;
|
bool load_metadata(const ImageDeviceFeatures &features, ImageMetaData &metadata) override;
|
||||||
bool load_pixels(const ImageMetaData &metadata,
|
bool load_pixels(const ImageMetaData &metadata,
|
||||||
|
@@ -72,7 +72,8 @@ bool BlenderSync::object_is_geometry(BObjectInfo &b_ob_info)
|
|||||||
|
|
||||||
BL::Object::type_enum type = b_ob_info.iter_object.type();
|
BL::Object::type_enum type = b_ob_info.iter_object.type();
|
||||||
|
|
||||||
if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR) {
|
if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR ||
|
||||||
|
type == BL::Object::type_POINTCLOUD) {
|
||||||
/* Will be exported attached to mesh. */
|
/* Will be exported attached to mesh. */
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -206,7 +207,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* only interested in object that we can create meshes from */
|
/* only interested in object that we can create geometry from */
|
||||||
if (!object_is_geometry(b_ob_info)) {
|
if (!object_is_geometry(b_ob_info)) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@@ -66,7 +66,7 @@ bool BlenderOutputDriver::read_render_tile(const Tile &tile)
|
|||||||
|
|
||||||
bool BlenderOutputDriver::update_render_tile(const Tile &tile)
|
bool BlenderOutputDriver::update_render_tile(const Tile &tile)
|
||||||
{
|
{
|
||||||
/* Use final write for preview renders, otherwise render result wouldn't be be updated
|
/* Use final write for preview renders, otherwise render result wouldn't be updated
|
||||||
* quickly on Blender side. For all other cases we use the display driver. */
|
* quickly on Blender side. For all other cases we use the display driver. */
|
||||||
if (b_engine_.is_preview()) {
|
if (b_engine_.is_preview()) {
|
||||||
write_render_tile(tile);
|
write_render_tile(tile);
|
||||||
|
253
intern/cycles/blender/pointcloud.cpp
Normal file
253
intern/cycles/blender/pointcloud.cpp
Normal file
@@ -0,0 +1,253 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2011-2013 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "scene/pointcloud.h"
|
||||||
|
#include "scene/attribute.h"
|
||||||
|
#include "scene/scene.h"
|
||||||
|
|
||||||
|
#include "blender/sync.h"
|
||||||
|
#include "blender/util.h"
|
||||||
|
|
||||||
|
#include "util/foreach.h"
|
||||||
|
#include "util/hash.h"
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
template<typename TypeInCycles, typename GetValueAtIndex>
|
||||||
|
static void fill_generic_attribute(BL::PointCloud &b_pointcloud,
|
||||||
|
TypeInCycles *data,
|
||||||
|
const GetValueAtIndex &get_value_at_index)
|
||||||
|
{
|
||||||
|
const int num_points = b_pointcloud.points.length();
|
||||||
|
for (int i = 0; i < num_points; i++) {
|
||||||
|
data[i] = get_value_at_index(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void copy_attributes(PointCloud *pointcloud, BL::PointCloud b_pointcloud)
|
||||||
|
{
|
||||||
|
AttributeSet &attributes = pointcloud->attributes;
|
||||||
|
for (BL::Attribute &b_attribute : b_pointcloud.attributes) {
|
||||||
|
const ustring name{b_attribute.name().c_str()};
|
||||||
|
|
||||||
|
if (attributes.find(name)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const AttributeElement element = ATTR_ELEMENT_VERTEX;
|
||||||
|
const BL::Attribute::data_type_enum b_data_type = b_attribute.data_type();
|
||||||
|
switch (b_data_type) {
|
||||||
|
case BL::Attribute::data_type_FLOAT: {
|
||||||
|
BL::FloatAttribute b_float_attribute{b_attribute};
|
||||||
|
Attribute *attr = attributes.add(name, TypeFloat, element);
|
||||||
|
float *data = attr->data_float();
|
||||||
|
fill_generic_attribute(
|
||||||
|
b_pointcloud, data, [&](int i) { return b_float_attribute.data[i].value(); });
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case BL::Attribute::data_type_BOOLEAN: {
|
||||||
|
BL::BoolAttribute b_bool_attribute{b_attribute};
|
||||||
|
Attribute *attr = attributes.add(name, TypeFloat, element);
|
||||||
|
float *data = attr->data_float();
|
||||||
|
fill_generic_attribute(
|
||||||
|
b_pointcloud, data, [&](int i) { return (float)b_bool_attribute.data[i].value(); });
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case BL::Attribute::data_type_INT: {
|
||||||
|
BL::IntAttribute b_int_attribute{b_attribute};
|
||||||
|
Attribute *attr = attributes.add(name, TypeFloat, element);
|
||||||
|
float *data = attr->data_float();
|
||||||
|
fill_generic_attribute(
|
||||||
|
b_pointcloud, data, [&](int i) { return (float)b_int_attribute.data[i].value(); });
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case BL::Attribute::data_type_FLOAT_VECTOR: {
|
||||||
|
BL::FloatVectorAttribute b_vector_attribute{b_attribute};
|
||||||
|
Attribute *attr = attributes.add(name, TypeVector, element);
|
||||||
|
float3 *data = attr->data_float3();
|
||||||
|
fill_generic_attribute(b_pointcloud, data, [&](int i) {
|
||||||
|
BL::Array<float, 3> v = b_vector_attribute.data[i].vector();
|
||||||
|
return make_float3(v[0], v[1], v[2]);
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case BL::Attribute::data_type_FLOAT_COLOR: {
|
||||||
|
BL::FloatColorAttribute b_color_attribute{b_attribute};
|
||||||
|
Attribute *attr = attributes.add(name, TypeRGBA, element);
|
||||||
|
float4 *data = attr->data_float4();
|
||||||
|
fill_generic_attribute(b_pointcloud, data, [&](int i) {
|
||||||
|
BL::Array<float, 4> v = b_color_attribute.data[i].color();
|
||||||
|
return make_float4(v[0], v[1], v[2], v[3]);
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case BL::Attribute::data_type_FLOAT2: {
|
||||||
|
BL::Float2Attribute b_float2_attribute{b_attribute};
|
||||||
|
Attribute *attr = attributes.add(name, TypeFloat2, element);
|
||||||
|
float2 *data = attr->data_float2();
|
||||||
|
fill_generic_attribute(b_pointcloud, data, [&](int i) {
|
||||||
|
BL::Array<float, 2> v = b_float2_attribute.data[i].vector();
|
||||||
|
return make_float2(v[0], v[1]);
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
/* Not supported. */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void export_pointcloud(Scene *scene, PointCloud *pointcloud, BL::PointCloud b_pointcloud)
|
||||||
|
{
|
||||||
|
/* TODO: optimize so we can straight memcpy arrays from Blender? */
|
||||||
|
|
||||||
|
/* Add requested attributes. */
|
||||||
|
Attribute *attr_random = NULL;
|
||||||
|
if (pointcloud->need_attribute(scene, ATTR_STD_POINT_RANDOM)) {
|
||||||
|
attr_random = pointcloud->attributes.add(ATTR_STD_POINT_RANDOM);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Reserve memory. */
|
||||||
|
const int num_points = b_pointcloud.points.length();
|
||||||
|
pointcloud->reserve(num_points);
|
||||||
|
|
||||||
|
/* Export points. */
|
||||||
|
BL::PointCloud::points_iterator b_point_iter;
|
||||||
|
for (b_pointcloud.points.begin(b_point_iter); b_point_iter != b_pointcloud.points.end();
|
||||||
|
++b_point_iter) {
|
||||||
|
BL::Point b_point = *b_point_iter;
|
||||||
|
const float3 co = get_float3(b_point.co());
|
||||||
|
const float radius = b_point.radius();
|
||||||
|
pointcloud->add_point(co, radius);
|
||||||
|
|
||||||
|
/* Random number per point. */
|
||||||
|
if (attr_random != NULL) {
|
||||||
|
attr_random->add(hash_uint2_to_float(b_point.index(), 0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Export attributes */
|
||||||
|
copy_attributes(pointcloud, b_pointcloud);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void export_pointcloud_motion(PointCloud *pointcloud,
|
||||||
|
BL::PointCloud b_pointcloud,
|
||||||
|
int motion_step)
|
||||||
|
{
|
||||||
|
/* Find or add attribute. */
|
||||||
|
Attribute *attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||||
|
bool new_attribute = false;
|
||||||
|
|
||||||
|
if (!attr_mP) {
|
||||||
|
attr_mP = pointcloud->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||||
|
new_attribute = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Export motion points. */
|
||||||
|
const int num_points = pointcloud->num_points();
|
||||||
|
float3 *mP = attr_mP->data_float3() + motion_step * num_points;
|
||||||
|
bool have_motion = false;
|
||||||
|
int num_motion_points = 0;
|
||||||
|
const array<float3> &pointcloud_points = pointcloud->get_points();
|
||||||
|
|
||||||
|
BL::PointCloud::points_iterator b_point_iter;
|
||||||
|
for (b_pointcloud.points.begin(b_point_iter); b_point_iter != b_pointcloud.points.end();
|
||||||
|
++b_point_iter) {
|
||||||
|
BL::Point b_point = *b_point_iter;
|
||||||
|
|
||||||
|
if (num_motion_points < num_points) {
|
||||||
|
float3 P = get_float3(b_point.co());
|
||||||
|
P.w = b_point.radius();
|
||||||
|
mP[num_motion_points] = P;
|
||||||
|
have_motion = have_motion || (P != pointcloud_points[num_motion_points]);
|
||||||
|
num_motion_points++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* In case of new attribute, we verify if there really was any motion. */
|
||||||
|
if (new_attribute) {
|
||||||
|
if (num_motion_points != num_points || !have_motion) {
|
||||||
|
pointcloud->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||||
|
}
|
||||||
|
else if (motion_step > 0) {
|
||||||
|
/* Motion, fill up previous steps that we might have skipped because
|
||||||
|
* they had no motion, but we need them anyway now. */
|
||||||
|
for (int step = 0; step < motion_step; step++) {
|
||||||
|
pointcloud->copy_center_to_motion_step(step);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Export attributes */
|
||||||
|
copy_attributes(pointcloud, b_pointcloud);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlenderSync::sync_pointcloud(PointCloud *pointcloud, BObjectInfo &b_ob_info)
|
||||||
|
{
|
||||||
|
size_t old_numpoints = pointcloud->num_points();
|
||||||
|
|
||||||
|
array<Node *> used_shaders = pointcloud->get_used_shaders();
|
||||||
|
|
||||||
|
PointCloud new_pointcloud;
|
||||||
|
new_pointcloud.set_used_shaders(used_shaders);
|
||||||
|
|
||||||
|
/* TODO: add option to filter out points in the view layer. */
|
||||||
|
BL::PointCloud b_pointcloud(b_ob_info.object_data);
|
||||||
|
export_pointcloud(scene, &new_pointcloud, b_pointcloud);
|
||||||
|
|
||||||
|
/* update original sockets */
|
||||||
|
for (const SocketType &socket : new_pointcloud.type->inputs) {
|
||||||
|
/* Those sockets are updated in sync_object, so do not modify them. */
|
||||||
|
if (socket.name == "use_motion_blur" || socket.name == "motion_steps" ||
|
||||||
|
socket.name == "used_shaders") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
pointcloud->set_value(socket, new_pointcloud, socket);
|
||||||
|
}
|
||||||
|
|
||||||
|
pointcloud->attributes.clear();
|
||||||
|
foreach (Attribute &attr, new_pointcloud.attributes.attributes) {
|
||||||
|
pointcloud->attributes.attributes.push_back(std::move(attr));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* tag update */
|
||||||
|
const bool rebuild = (pointcloud && old_numpoints != pointcloud->num_points());
|
||||||
|
pointcloud->tag_update(scene, rebuild);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BlenderSync::sync_pointcloud_motion(PointCloud *pointcloud,
|
||||||
|
BObjectInfo &b_ob_info,
|
||||||
|
int motion_step)
|
||||||
|
{
|
||||||
|
/* Skip if nothing exported. */
|
||||||
|
if (pointcloud->num_points() == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Export deformed coordinates. */
|
||||||
|
if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
|
||||||
|
/* PointCloud object. */
|
||||||
|
BL::PointCloud b_pointcloud(b_ob_info.object_data);
|
||||||
|
export_pointcloud_motion(pointcloud, b_pointcloud, motion_step);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* No deformation on this frame, copy coordinates if other frames did have it. */
|
||||||
|
pointcloud->copy_center_to_motion_step(motion_step);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
@@ -906,16 +906,18 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
|
|||||||
static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
|
static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
|
||||||
{
|
{
|
||||||
vector<DeviceType> device_types = Device::available_types();
|
vector<DeviceType> device_types = Device::available_types();
|
||||||
bool has_cuda = false, has_optix = false, has_hip = false;
|
bool has_cuda = false, has_optix = false, has_hip = false, has_metal = false;
|
||||||
foreach (DeviceType device_type, device_types) {
|
foreach (DeviceType device_type, device_types) {
|
||||||
has_cuda |= (device_type == DEVICE_CUDA);
|
has_cuda |= (device_type == DEVICE_CUDA);
|
||||||
has_optix |= (device_type == DEVICE_OPTIX);
|
has_optix |= (device_type == DEVICE_OPTIX);
|
||||||
has_hip |= (device_type == DEVICE_HIP);
|
has_hip |= (device_type == DEVICE_HIP);
|
||||||
|
has_metal |= (device_type == DEVICE_METAL);
|
||||||
}
|
}
|
||||||
PyObject *list = PyTuple_New(3);
|
PyObject *list = PyTuple_New(4);
|
||||||
PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
|
PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
|
||||||
PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
|
PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
|
||||||
PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
|
PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
|
||||||
|
PyTuple_SET_ITEM(list, 3, PyBool_FromLong(has_metal));
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -944,6 +946,9 @@ static PyObject *set_device_override_func(PyObject * /*self*/, PyObject *arg)
|
|||||||
else if (override == "HIP") {
|
else if (override == "HIP") {
|
||||||
BlenderSession::device_override = DEVICE_MASK_HIP;
|
BlenderSession::device_override = DEVICE_MASK_HIP;
|
||||||
}
|
}
|
||||||
|
else if (override == "METAL") {
|
||||||
|
BlenderSession::device_override = DEVICE_MASK_METAL;
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
printf("\nError: %s is not a valid Cycles device.\n", override.c_str());
|
printf("\nError: %s is not a valid Cycles device.\n", override.c_str());
|
||||||
Py_RETURN_FALSE;
|
Py_RETURN_FALSE;
|
||||||
@@ -1054,5 +1059,13 @@ void *CCL_python_module_init()
|
|||||||
Py_INCREF(Py_False);
|
Py_INCREF(Py_False);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef WITH_CYCLES_DEBUG
|
||||||
|
PyModule_AddObject(mod, "with_debug", Py_True);
|
||||||
|
Py_INCREF(Py_True);
|
||||||
|
#else /* WITH_CYCLES_DEBUG */
|
||||||
|
PyModule_AddObject(mod, "with_debug", Py_False);
|
||||||
|
Py_INCREF(Py_False);
|
||||||
|
#endif /* WITH_CYCLES_DEBUG */
|
||||||
|
|
||||||
return (void *)mod;
|
return (void *)mod;
|
||||||
}
|
}
|
||||||
|
@@ -396,6 +396,13 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
|
|||||||
/* set the current view */
|
/* set the current view */
|
||||||
b_engine.active_view_set(b_rview_name.c_str());
|
b_engine.active_view_set(b_rview_name.c_str());
|
||||||
|
|
||||||
|
/* Force update in this case, since the camera transform on each frame changes
|
||||||
|
* in different views. This could be optimized by somehow storing the animated
|
||||||
|
* camera transforms separate from the fixed stereo transform. */
|
||||||
|
if ((scene->need_motion() != Scene::MOTION_NONE) && view_index > 0) {
|
||||||
|
sync->tag_update();
|
||||||
|
}
|
||||||
|
|
||||||
/* update scene */
|
/* update scene */
|
||||||
BL::Object b_camera_override(b_engine.camera_override());
|
BL::Object b_camera_override(b_engine.camera_override());
|
||||||
sync->sync_camera(b_render, b_camera_override, width, height, b_rview_name.c_str());
|
sync->sync_camera(b_render, b_camera_override, width, height, b_rview_name.c_str());
|
||||||
@@ -629,7 +636,7 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
|
|||||||
integrator->set_use_emission((bake_filter & BL::BakeSettings::pass_filter_EMIT) != 0);
|
integrator->set_use_emission((bake_filter & BL::BakeSettings::pass_filter_EMIT) != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Always use transpanent background for baking. */
|
/* Always use transparent background for baking. */
|
||||||
scene->background->set_transparent(true);
|
scene->background->set_transparent(true);
|
||||||
|
|
||||||
/* Load built-in images from Blender. */
|
/* Load built-in images from Blender. */
|
||||||
|
@@ -378,11 +378,20 @@ static ShaderNode *add_node(Scene *scene,
|
|||||||
}
|
}
|
||||||
else if (b_node.is_a(&RNA_ShaderNodeMapRange)) {
|
else if (b_node.is_a(&RNA_ShaderNodeMapRange)) {
|
||||||
BL::ShaderNodeMapRange b_map_range_node(b_node);
|
BL::ShaderNodeMapRange b_map_range_node(b_node);
|
||||||
|
if (b_map_range_node.data_type() == BL::ShaderNodeMapRange::data_type_FLOAT_VECTOR) {
|
||||||
|
VectorMapRangeNode *vector_map_range_node = graph->create_node<VectorMapRangeNode>();
|
||||||
|
vector_map_range_node->set_use_clamp(b_map_range_node.clamp());
|
||||||
|
vector_map_range_node->set_range_type(
|
||||||
|
(NodeMapRangeType)b_map_range_node.interpolation_type());
|
||||||
|
node = vector_map_range_node;
|
||||||
|
}
|
||||||
|
else {
|
||||||
MapRangeNode *map_range_node = graph->create_node<MapRangeNode>();
|
MapRangeNode *map_range_node = graph->create_node<MapRangeNode>();
|
||||||
map_range_node->set_clamp(b_map_range_node.clamp());
|
map_range_node->set_clamp(b_map_range_node.clamp());
|
||||||
map_range_node->set_range_type((NodeMapRangeType)b_map_range_node.interpolation_type());
|
map_range_node->set_range_type((NodeMapRangeType)b_map_range_node.interpolation_type());
|
||||||
node = map_range_node;
|
node = map_range_node;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
else if (b_node.is_a(&RNA_ShaderNodeClamp)) {
|
else if (b_node.is_a(&RNA_ShaderNodeClamp)) {
|
||||||
BL::ShaderNodeClamp b_clamp_node(b_node);
|
BL::ShaderNodeClamp b_clamp_node(b_node);
|
||||||
ClampNode *clamp_node = graph->create_node<ClampNode>();
|
ClampNode *clamp_node = graph->create_node<ClampNode>();
|
||||||
@@ -762,7 +771,8 @@ static ShaderNode *add_node(Scene *scene,
|
|||||||
int scene_frame = b_scene.frame_current();
|
int scene_frame = b_scene.frame_current();
|
||||||
int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
|
int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
|
||||||
image->handle = scene->image_manager->add_image(
|
image->handle = scene->image_manager->add_image(
|
||||||
new BlenderImageLoader(b_image, image_frame), image->image_params());
|
new BlenderImageLoader(b_image, image_frame, b_engine.is_preview()),
|
||||||
|
image->image_params());
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
ustring filename = ustring(
|
ustring filename = ustring(
|
||||||
@@ -797,7 +807,8 @@ static ShaderNode *add_node(Scene *scene,
|
|||||||
if (is_builtin) {
|
if (is_builtin) {
|
||||||
int scene_frame = b_scene.frame_current();
|
int scene_frame = b_scene.frame_current();
|
||||||
int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
|
int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
|
||||||
env->handle = scene->image_manager->add_image(new BlenderImageLoader(b_image, image_frame),
|
env->handle = scene->image_manager->add_image(
|
||||||
|
new BlenderImageLoader(b_image, image_frame, b_engine.is_preview()),
|
||||||
env->image_params());
|
env->image_params());
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@@ -95,6 +95,11 @@ void BlenderSync::reset(BL::BlendData &b_data, BL::Scene &b_scene)
|
|||||||
this->b_scene = b_scene;
|
this->b_scene = b_scene;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BlenderSync::tag_update()
|
||||||
|
{
|
||||||
|
has_updates_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
/* Sync */
|
/* Sync */
|
||||||
|
|
||||||
void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d)
|
void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d)
|
||||||
|
@@ -66,6 +66,8 @@ class BlenderSync {
|
|||||||
|
|
||||||
void reset(BL::BlendData &b_data, BL::Scene &b_scene);
|
void reset(BL::BlendData &b_data, BL::Scene &b_scene);
|
||||||
|
|
||||||
|
void tag_update();
|
||||||
|
|
||||||
/* sync */
|
/* sync */
|
||||||
void sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d);
|
void sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d);
|
||||||
void sync_data(BL::RenderSettings &b_render,
|
void sync_data(BL::RenderSettings &b_render,
|
||||||
@@ -167,12 +169,16 @@ class BlenderSync {
|
|||||||
Hair *hair, BL::Mesh &b_mesh, BObjectInfo &b_ob_info, bool motion, int motion_step = 0);
|
Hair *hair, BL::Mesh &b_mesh, BObjectInfo &b_ob_info, bool motion, int motion_step = 0);
|
||||||
bool object_has_particle_hair(BL::Object b_ob);
|
bool object_has_particle_hair(BL::Object b_ob);
|
||||||
|
|
||||||
|
/* Point Cloud */
|
||||||
|
void sync_pointcloud(PointCloud *pointcloud, BObjectInfo &b_ob_info);
|
||||||
|
void sync_pointcloud_motion(PointCloud *pointcloud, BObjectInfo &b_ob_info, int motion_step = 0);
|
||||||
|
|
||||||
/* Camera */
|
/* Camera */
|
||||||
void sync_camera_motion(
|
void sync_camera_motion(
|
||||||
BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time);
|
BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time);
|
||||||
|
|
||||||
/* Geometry */
|
/* Geometry */
|
||||||
Geometry *sync_geometry(BL::Depsgraph &b_depsgrpah,
|
Geometry *sync_geometry(BL::Depsgraph &b_depsgraph,
|
||||||
BObjectInfo &b_ob_info,
|
BObjectInfo &b_ob_info,
|
||||||
bool object_updated,
|
bool object_updated,
|
||||||
bool use_particle_hair,
|
bool use_particle_hair,
|
||||||
@@ -267,7 +273,6 @@ class BlenderSync {
|
|||||||
|
|
||||||
Progress &progress;
|
Progress &progress;
|
||||||
|
|
||||||
protected:
|
|
||||||
/* Indicates that `sync_recalc()` detected changes in the scene.
|
/* Indicates that `sync_recalc()` detected changes in the scene.
|
||||||
* If this flag is false then the data is considered to be up-to-date and will not be
|
* If this flag is false then the data is considered to be up-to-date and will not be
|
||||||
* synchronized at all. */
|
* synchronized at all. */
|
||||||
|
@@ -33,6 +33,17 @@ set(SRC
|
|||||||
unaligned.cpp
|
unaligned.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
|
set(SRC_METAL
|
||||||
|
metal.mm
|
||||||
|
)
|
||||||
|
|
||||||
|
if(WITH_CYCLES_DEVICE_METAL)
|
||||||
|
list(APPEND SRC
|
||||||
|
${SRC_METAL}
|
||||||
|
)
|
||||||
|
add_definitions(-DWITH_METAL)
|
||||||
|
endif()
|
||||||
|
|
||||||
set(SRC_HEADERS
|
set(SRC_HEADERS
|
||||||
bvh.h
|
bvh.h
|
||||||
bvh2.h
|
bvh2.h
|
||||||
@@ -46,6 +57,7 @@ set(SRC_HEADERS
|
|||||||
sort.h
|
sort.h
|
||||||
split.h
|
split.h
|
||||||
unaligned.h
|
unaligned.h
|
||||||
|
metal.h
|
||||||
)
|
)
|
||||||
|
|
||||||
set(LIB
|
set(LIB
|
||||||
|
@@ -26,6 +26,7 @@
|
|||||||
#include "scene/hair.h"
|
#include "scene/hair.h"
|
||||||
#include "scene/mesh.h"
|
#include "scene/mesh.h"
|
||||||
#include "scene/object.h"
|
#include "scene/object.h"
|
||||||
|
#include "scene/pointcloud.h"
|
||||||
#include "scene/scene.h"
|
#include "scene/scene.h"
|
||||||
|
|
||||||
#include "util/algorithm.h"
|
#include "util/algorithm.h"
|
||||||
@@ -113,9 +114,9 @@ void BVHBuild::add_reference_triangles(BoundBox &root,
|
|||||||
else {
|
else {
|
||||||
/* Motion triangles, trace optimized case: we split triangle
|
/* Motion triangles, trace optimized case: we split triangle
|
||||||
* primitives into separate nodes for each of the time steps.
|
* primitives into separate nodes for each of the time steps.
|
||||||
* This way we minimize overlap of neighbor curve primitives.
|
* This way we minimize overlap of neighbor triangle primitives.
|
||||||
*/
|
*/
|
||||||
const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
|
const int num_bvh_steps = params.num_motion_triangle_steps * 2 + 1;
|
||||||
const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
|
const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
|
||||||
const size_t num_verts = mesh->verts.size();
|
const size_t num_verts = mesh->verts.size();
|
||||||
const size_t num_steps = mesh->motion_steps;
|
const size_t num_steps = mesh->motion_steps;
|
||||||
@@ -269,6 +270,101 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BVHBuild::add_reference_points(BoundBox &root,
|
||||||
|
BoundBox ¢er,
|
||||||
|
PointCloud *pointcloud,
|
||||||
|
int i)
|
||||||
|
{
|
||||||
|
const Attribute *point_attr_mP = NULL;
|
||||||
|
if (pointcloud->has_motion_blur()) {
|
||||||
|
point_attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||||
|
}
|
||||||
|
|
||||||
|
const float3 *points_data = &pointcloud->points[0];
|
||||||
|
const float *radius_data = &pointcloud->radius[0];
|
||||||
|
const size_t num_points = pointcloud->num_points();
|
||||||
|
const float3 *motion_data = (point_attr_mP) ? point_attr_mP->data_float3() : NULL;
|
||||||
|
const size_t num_steps = pointcloud->get_motion_steps();
|
||||||
|
|
||||||
|
if (point_attr_mP == NULL) {
|
||||||
|
/* Really simple logic for static points. */
|
||||||
|
for (uint j = 0; j < num_points; j++) {
|
||||||
|
const PointCloud::Point point = pointcloud->get_point(j);
|
||||||
|
BoundBox bounds = BoundBox::empty;
|
||||||
|
point.bounds_grow(points_data, radius_data, bounds);
|
||||||
|
if (bounds.valid()) {
|
||||||
|
references.push_back(BVHReference(bounds, j, i, PRIMITIVE_POINT));
|
||||||
|
root.grow(bounds);
|
||||||
|
center.grow(bounds.center2());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (params.num_motion_point_steps == 0 || params.use_spatial_split) {
|
||||||
|
/* Simple case of motion points: single node for the whole
|
||||||
|
* shutter time. Lowest memory usage but less optimal
|
||||||
|
* rendering.
|
||||||
|
*/
|
||||||
|
/* TODO(sergey): Support motion steps for spatially split BVH. */
|
||||||
|
for (uint j = 0; j < num_points; j++) {
|
||||||
|
const PointCloud::Point point = pointcloud->get_point(j);
|
||||||
|
BoundBox bounds = BoundBox::empty;
|
||||||
|
point.bounds_grow(points_data, radius_data, bounds);
|
||||||
|
for (size_t step = 0; step < num_steps - 1; step++) {
|
||||||
|
point.bounds_grow(motion_data + step * num_points, radius_data, bounds);
|
||||||
|
}
|
||||||
|
if (bounds.valid()) {
|
||||||
|
references.push_back(BVHReference(bounds, j, i, PRIMITIVE_MOTION_POINT));
|
||||||
|
root.grow(bounds);
|
||||||
|
center.grow(bounds.center2());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* Motion points, trace optimized case: we split point
|
||||||
|
* primitives into separate nodes for each of the time steps.
|
||||||
|
* This way we minimize overlap of neighbor point primitives.
|
||||||
|
*/
|
||||||
|
const int num_bvh_steps = params.num_motion_point_steps * 2 + 1;
|
||||||
|
const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
|
||||||
|
|
||||||
|
for (uint j = 0; j < num_points; j++) {
|
||||||
|
const PointCloud::Point point = pointcloud->get_point(j);
|
||||||
|
const size_t num_steps = pointcloud->get_motion_steps();
|
||||||
|
const float3 *point_steps = point_attr_mP->data_float3();
|
||||||
|
|
||||||
|
/* Calculate bounding box of the previous time step.
|
||||||
|
* Will be reused later to avoid duplicated work on
|
||||||
|
* calculating BVH time step boundbox.
|
||||||
|
*/
|
||||||
|
float4 prev_key = point.motion_key(
|
||||||
|
points_data, radius_data, point_steps, num_points, num_steps, 0.0f, j);
|
||||||
|
BoundBox prev_bounds = BoundBox::empty;
|
||||||
|
point.bounds_grow(prev_key, prev_bounds);
|
||||||
|
/* Create all primitive time steps, */
|
||||||
|
for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
|
||||||
|
const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
|
||||||
|
float4 curr_key = point.motion_key(
|
||||||
|
points_data, radius_data, point_steps, num_points, num_steps, curr_time, j);
|
||||||
|
BoundBox curr_bounds = BoundBox::empty;
|
||||||
|
point.bounds_grow(curr_key, curr_bounds);
|
||||||
|
BoundBox bounds = prev_bounds;
|
||||||
|
bounds.grow(curr_bounds);
|
||||||
|
if (bounds.valid()) {
|
||||||
|
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
|
||||||
|
references.push_back(
|
||||||
|
BVHReference(bounds, j, i, PRIMITIVE_MOTION_POINT, prev_time, curr_time));
|
||||||
|
root.grow(bounds);
|
||||||
|
center.grow(bounds.center2());
|
||||||
|
}
|
||||||
|
/* Current time boundbox becomes previous one for the
|
||||||
|
* next time step.
|
||||||
|
*/
|
||||||
|
prev_bounds = curr_bounds;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void BVHBuild::add_reference_geometry(BoundBox &root,
|
void BVHBuild::add_reference_geometry(BoundBox &root,
|
||||||
BoundBox ¢er,
|
BoundBox ¢er,
|
||||||
Geometry *geom,
|
Geometry *geom,
|
||||||
@@ -282,6 +378,10 @@ void BVHBuild::add_reference_geometry(BoundBox &root,
|
|||||||
Hair *hair = static_cast<Hair *>(geom);
|
Hair *hair = static_cast<Hair *>(geom);
|
||||||
add_reference_curves(root, center, hair, object_index);
|
add_reference_curves(root, center, hair, object_index);
|
||||||
}
|
}
|
||||||
|
else if (geom->geometry_type == Geometry::POINTCLOUD) {
|
||||||
|
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||||
|
add_reference_points(root, center, pointcloud, object_index);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BVHBuild::add_reference_object(BoundBox &root, BoundBox ¢er, Object *ob, int i)
|
void BVHBuild::add_reference_object(BoundBox &root, BoundBox ¢er, Object *ob, int i)
|
||||||
@@ -311,6 +411,10 @@ static size_t count_primitives(Geometry *geom)
|
|||||||
Hair *hair = static_cast<Hair *>(geom);
|
Hair *hair = static_cast<Hair *>(geom);
|
||||||
return count_curve_segments(hair);
|
return count_curve_segments(hair);
|
||||||
}
|
}
|
||||||
|
else if (geom->geometry_type == Geometry::POINTCLOUD) {
|
||||||
|
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||||
|
return pointcloud->num_points();
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -328,9 +432,10 @@ void BVHBuild::add_references(BVHRange &root)
|
|||||||
if (!ob->get_geometry()->is_instanced()) {
|
if (!ob->get_geometry()->is_instanced()) {
|
||||||
num_alloc_references += count_primitives(ob->get_geometry());
|
num_alloc_references += count_primitives(ob->get_geometry());
|
||||||
}
|
}
|
||||||
else
|
else {
|
||||||
num_alloc_references++;
|
num_alloc_references++;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
num_alloc_references += count_primitives(ob->get_geometry());
|
num_alloc_references += count_primitives(ob->get_geometry());
|
||||||
}
|
}
|
||||||
@@ -394,7 +499,7 @@ BVHNode *BVHBuild::run()
|
|||||||
spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
|
spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
|
||||||
spatial_free_index = 0;
|
spatial_free_index = 0;
|
||||||
|
|
||||||
need_prim_time = params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0;
|
need_prim_time = params.use_motion_steps();
|
||||||
|
|
||||||
/* init progress updates */
|
/* init progress updates */
|
||||||
double build_start_time;
|
double build_start_time;
|
||||||
@@ -535,7 +640,8 @@ bool BVHBuild::range_within_max_leaf_size(const BVHRange &range,
|
|||||||
const vector<BVHReference> &references) const
|
const vector<BVHReference> &references) const
|
||||||
{
|
{
|
||||||
size_t size = range.size();
|
size_t size = range.size();
|
||||||
size_t max_leaf_size = max(params.max_triangle_leaf_size, params.max_curve_leaf_size);
|
size_t max_leaf_size = max(max(params.max_triangle_leaf_size, params.max_curve_leaf_size),
|
||||||
|
params.max_point_leaf_size);
|
||||||
|
|
||||||
if (size > max_leaf_size)
|
if (size > max_leaf_size)
|
||||||
return false;
|
return false;
|
||||||
@@ -544,32 +650,44 @@ bool BVHBuild::range_within_max_leaf_size(const BVHRange &range,
|
|||||||
size_t num_motion_triangles = 0;
|
size_t num_motion_triangles = 0;
|
||||||
size_t num_curves = 0;
|
size_t num_curves = 0;
|
||||||
size_t num_motion_curves = 0;
|
size_t num_motion_curves = 0;
|
||||||
|
size_t num_points = 0;
|
||||||
|
size_t num_motion_points = 0;
|
||||||
|
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
const BVHReference &ref = references[range.start() + i];
|
const BVHReference &ref = references[range.start() + i];
|
||||||
|
|
||||||
if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
|
if (ref.prim_type() & PRIMITIVE_CURVE) {
|
||||||
if (ref.prim_type() & PRIMITIVE_ALL_MOTION) {
|
if (ref.prim_type() & PRIMITIVE_MOTION) {
|
||||||
num_motion_curves++;
|
num_motion_curves++;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
num_curves++;
|
num_curves++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
|
else if (ref.prim_type() & PRIMITIVE_TRIANGLE) {
|
||||||
if (ref.prim_type() & PRIMITIVE_ALL_MOTION) {
|
if (ref.prim_type() & PRIMITIVE_MOTION) {
|
||||||
num_motion_triangles++;
|
num_motion_triangles++;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
num_triangles++;
|
num_triangles++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (ref.prim_type() & PRIMITIVE_POINT) {
|
||||||
|
if (ref.prim_type() & PRIMITIVE_MOTION) {
|
||||||
|
num_motion_points++;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
num_points++;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return (num_triangles <= params.max_triangle_leaf_size) &&
|
return (num_triangles <= params.max_triangle_leaf_size) &&
|
||||||
(num_motion_triangles <= params.max_motion_triangle_leaf_size) &&
|
(num_motion_triangles <= params.max_motion_triangle_leaf_size) &&
|
||||||
(num_curves <= params.max_curve_leaf_size) &&
|
(num_curves <= params.max_curve_leaf_size) &&
|
||||||
(num_motion_curves <= params.max_motion_curve_leaf_size);
|
(num_motion_curves <= params.max_motion_curve_leaf_size) &&
|
||||||
|
(num_points <= params.max_point_leaf_size) &&
|
||||||
|
(num_motion_points <= params.max_motion_point_leaf_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* multithreaded binning builder */
|
/* multithreaded binning builder */
|
||||||
@@ -855,7 +973,7 @@ BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector<BVHRefer
|
|||||||
for (int i = 0; i < range.size(); i++) {
|
for (int i = 0; i < range.size(); i++) {
|
||||||
const BVHReference &ref = references[range.start() + i];
|
const BVHReference &ref = references[range.start() + i];
|
||||||
if (ref.prim_index() != -1) {
|
if (ref.prim_index() != -1) {
|
||||||
uint32_t type_index = bitscan((uint32_t)(ref.prim_type() & PRIMITIVE_ALL));
|
uint32_t type_index = PRIMITIVE_INDEX(ref.prim_type() & PRIMITIVE_ALL);
|
||||||
p_ref[type_index].push_back(ref);
|
p_ref[type_index].push_back(ref);
|
||||||
p_type[type_index].push_back(ref.prim_type());
|
p_type[type_index].push_back(ref.prim_type());
|
||||||
p_index[type_index].push_back(ref.prim_index());
|
p_index[type_index].push_back(ref.prim_index());
|
||||||
|
@@ -39,6 +39,7 @@ class Geometry;
|
|||||||
class Hair;
|
class Hair;
|
||||||
class Mesh;
|
class Mesh;
|
||||||
class Object;
|
class Object;
|
||||||
|
class PointCloud;
|
||||||
class Progress;
|
class Progress;
|
||||||
|
|
||||||
/* BVH Builder */
|
/* BVH Builder */
|
||||||
@@ -68,6 +69,7 @@ class BVHBuild {
|
|||||||
/* Adding references. */
|
/* Adding references. */
|
||||||
void add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *mesh, int i);
|
void add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *mesh, int i);
|
||||||
void add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair, int i);
|
void add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair, int i);
|
||||||
|
void add_reference_points(BoundBox &root, BoundBox ¢er, PointCloud *pointcloud, int i);
|
||||||
void add_reference_geometry(BoundBox &root, BoundBox ¢er, Geometry *geom, int i);
|
void add_reference_geometry(BoundBox &root, BoundBox ¢er, Geometry *geom, int i);
|
||||||
void add_reference_object(BoundBox &root, BoundBox ¢er, Object *ob, int i);
|
void add_reference_object(BoundBox &root, BoundBox ¢er, Object *ob, int i);
|
||||||
void add_references(BVHRange &root);
|
void add_references(BVHRange &root);
|
||||||
|
@@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
#include "bvh/bvh2.h"
|
#include "bvh/bvh2.h"
|
||||||
#include "bvh/embree.h"
|
#include "bvh/embree.h"
|
||||||
|
#include "bvh/metal.h"
|
||||||
#include "bvh/multi.h"
|
#include "bvh/multi.h"
|
||||||
#include "bvh/optix.h"
|
#include "bvh/optix.h"
|
||||||
|
|
||||||
@@ -40,8 +41,12 @@ const char *bvh_layout_name(BVHLayout layout)
|
|||||||
return "EMBREE";
|
return "EMBREE";
|
||||||
case BVH_LAYOUT_OPTIX:
|
case BVH_LAYOUT_OPTIX:
|
||||||
return "OPTIX";
|
return "OPTIX";
|
||||||
|
case BVH_LAYOUT_METAL:
|
||||||
|
return "METAL";
|
||||||
case BVH_LAYOUT_MULTI_OPTIX:
|
case BVH_LAYOUT_MULTI_OPTIX:
|
||||||
|
case BVH_LAYOUT_MULTI_METAL:
|
||||||
case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
|
case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
|
||||||
|
case BVH_LAYOUT_MULTI_METAL_EMBREE:
|
||||||
return "MULTI";
|
return "MULTI";
|
||||||
case BVH_LAYOUT_ALL:
|
case BVH_LAYOUT_ALL:
|
||||||
return "ALL";
|
return "ALL";
|
||||||
@@ -102,9 +107,18 @@ BVH *BVH::create(const BVHParams ¶ms,
|
|||||||
#else
|
#else
|
||||||
(void)device;
|
(void)device;
|
||||||
break;
|
break;
|
||||||
|
#endif
|
||||||
|
case BVH_LAYOUT_METAL:
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
return bvh_metal_create(params, geometry, objects, device);
|
||||||
|
#else
|
||||||
|
(void)device;
|
||||||
|
break;
|
||||||
#endif
|
#endif
|
||||||
case BVH_LAYOUT_MULTI_OPTIX:
|
case BVH_LAYOUT_MULTI_OPTIX:
|
||||||
|
case BVH_LAYOUT_MULTI_METAL:
|
||||||
case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
|
case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
|
||||||
|
case BVH_LAYOUT_MULTI_METAL_EMBREE:
|
||||||
return new BVHMulti(params, geometry, objects);
|
return new BVHMulti(params, geometry, objects);
|
||||||
case BVH_LAYOUT_NONE:
|
case BVH_LAYOUT_NONE:
|
||||||
case BVH_LAYOUT_ALL:
|
case BVH_LAYOUT_ALL:
|
||||||
|
@@ -20,6 +20,7 @@
|
|||||||
#include "scene/hair.h"
|
#include "scene/hair.h"
|
||||||
#include "scene/mesh.h"
|
#include "scene/mesh.h"
|
||||||
#include "scene/object.h"
|
#include "scene/object.h"
|
||||||
|
#include "scene/pointcloud.h"
|
||||||
|
|
||||||
#include "bvh/build.h"
|
#include "bvh/build.h"
|
||||||
#include "bvh/node.h"
|
#include "bvh/node.h"
|
||||||
@@ -386,7 +387,7 @@ void BVH2::refit_primitives(int start, int end, BoundBox &bbox, uint &visibility
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* Primitives. */
|
/* Primitives. */
|
||||||
if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
|
if (pack.prim_type[prim] & PRIMITIVE_CURVE) {
|
||||||
/* Curves. */
|
/* Curves. */
|
||||||
const Hair *hair = static_cast<const Hair *>(ob->get_geometry());
|
const Hair *hair = static_cast<const Hair *>(ob->get_geometry());
|
||||||
int prim_offset = (params.top_level) ? hair->prim_offset : 0;
|
int prim_offset = (params.top_level) ? hair->prim_offset : 0;
|
||||||
@@ -409,6 +410,30 @@ void BVH2::refit_primitives(int start, int end, BoundBox &bbox, uint &visibility
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (pack.prim_type[prim] & PRIMITIVE_POINT) {
|
||||||
|
/* Points. */
|
||||||
|
const PointCloud *pointcloud = static_cast<const PointCloud *>(ob->get_geometry());
|
||||||
|
int prim_offset = (params.top_level) ? pointcloud->prim_offset : 0;
|
||||||
|
const float3 *points = &pointcloud->points[0];
|
||||||
|
const float *radius = &pointcloud->radius[0];
|
||||||
|
PointCloud::Point point = pointcloud->get_point(pidx - prim_offset);
|
||||||
|
|
||||||
|
point.bounds_grow(points, radius, bbox);
|
||||||
|
|
||||||
|
/* Motion points. */
|
||||||
|
if (pointcloud->get_use_motion_blur()) {
|
||||||
|
Attribute *attr = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||||
|
|
||||||
|
if (attr) {
|
||||||
|
size_t pointcloud_size = pointcloud->points.size();
|
||||||
|
size_t steps = pointcloud->get_motion_steps() - 1;
|
||||||
|
float3 *point_steps = attr->data_float3();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < steps; i++)
|
||||||
|
point.bounds_grow(point_steps + i * pointcloud_size, radius, bbox);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
/* Triangles. */
|
/* Triangles. */
|
||||||
const Mesh *mesh = static_cast<const Mesh *>(ob->get_geometry());
|
const Mesh *mesh = static_cast<const Mesh *>(ob->get_geometry());
|
||||||
@@ -505,7 +530,8 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
|||||||
pack.leaf_nodes.resize(leaf_nodes_size);
|
pack.leaf_nodes.resize(leaf_nodes_size);
|
||||||
pack.object_node.resize(objects.size());
|
pack.object_node.resize(objects.size());
|
||||||
|
|
||||||
if (params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0) {
|
if (params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0 ||
|
||||||
|
params.num_motion_point_steps > 0) {
|
||||||
pack.prim_time.resize(prim_index_size);
|
pack.prim_time.resize(prim_index_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -564,13 +590,7 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
|||||||
float2 *bvh_prim_time = bvh->pack.prim_time.size() ? &bvh->pack.prim_time[0] : NULL;
|
float2 *bvh_prim_time = bvh->pack.prim_time.size() ? &bvh->pack.prim_time[0] : NULL;
|
||||||
|
|
||||||
for (size_t i = 0; i < bvh_prim_index_size; i++) {
|
for (size_t i = 0; i < bvh_prim_index_size; i++) {
|
||||||
if (bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
|
|
||||||
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
|
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
|
||||||
}
|
|
||||||
else {
|
|
||||||
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
|
pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
|
||||||
pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i];
|
pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i];
|
||||||
pack_prim_object[pack_prim_index_offset] = 0; // unused for instances
|
pack_prim_object[pack_prim_index_offset] = 0; // unused for instances
|
||||||
|
@@ -45,6 +45,7 @@
|
|||||||
# include "scene/hair.h"
|
# include "scene/hair.h"
|
||||||
# include "scene/mesh.h"
|
# include "scene/mesh.h"
|
||||||
# include "scene/object.h"
|
# include "scene/object.h"
|
||||||
|
# include "scene/pointcloud.h"
|
||||||
|
|
||||||
# include "util/foreach.h"
|
# include "util/foreach.h"
|
||||||
# include "util/log.h"
|
# include "util/log.h"
|
||||||
@@ -90,7 +91,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
|
|||||||
++ctx->num_hits;
|
++ctx->num_hits;
|
||||||
|
|
||||||
/* Always use baked shadow transparency for curves. */
|
/* Always use baked shadow transparency for curves. */
|
||||||
if (current_isect.type & PRIMITIVE_ALL_CURVE) {
|
if (current_isect.type & PRIMITIVE_CURVE) {
|
||||||
ctx->throughput *= intersection_curve_shadow_transparency(
|
ctx->throughput *= intersection_curve_shadow_transparency(
|
||||||
kg, current_isect.object, current_isect.prim, current_isect.u);
|
kg, current_isect.object, current_isect.prim, current_isect.u);
|
||||||
|
|
||||||
@@ -245,7 +246,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void rtc_filter_func_thick_curve(const RTCFilterFunctionNArguments *args)
|
static void rtc_filter_func_backface_cull(const RTCFilterFunctionNArguments *args)
|
||||||
{
|
{
|
||||||
const RTCRay *ray = (RTCRay *)args->ray;
|
const RTCRay *ray = (RTCRay *)args->ray;
|
||||||
RTCHit *hit = (RTCHit *)args->hit;
|
RTCHit *hit = (RTCHit *)args->hit;
|
||||||
@@ -258,7 +259,7 @@ static void rtc_filter_func_thick_curve(const RTCFilterFunctionNArguments *args)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void rtc_filter_occluded_func_thick_curve(const RTCFilterFunctionNArguments *args)
|
static void rtc_filter_occluded_func_backface_cull(const RTCFilterFunctionNArguments *args)
|
||||||
{
|
{
|
||||||
const RTCRay *ray = (RTCRay *)args->ray;
|
const RTCRay *ray = (RTCRay *)args->ray;
|
||||||
RTCHit *hit = (RTCHit *)args->hit;
|
RTCHit *hit = (RTCHit *)args->hit;
|
||||||
@@ -410,6 +411,12 @@ void BVHEmbree::add_object(Object *ob, int i)
|
|||||||
add_curves(ob, hair, i);
|
add_curves(ob, hair, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (geom->geometry_type == Geometry::POINTCLOUD) {
|
||||||
|
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||||
|
if (pointcloud->num_points() > 0) {
|
||||||
|
add_points(ob, pointcloud, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BVHEmbree::add_instance(Object *ob, int i)
|
void BVHEmbree::add_instance(Object *ob, int i)
|
||||||
@@ -624,6 +631,89 @@ void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, c
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BVHEmbree::set_point_vertex_buffer(RTCGeometry geom_id,
|
||||||
|
const PointCloud *pointcloud,
|
||||||
|
const bool update)
|
||||||
|
{
|
||||||
|
const Attribute *attr_mP = NULL;
|
||||||
|
size_t num_motion_steps = 1;
|
||||||
|
if (pointcloud->has_motion_blur()) {
|
||||||
|
attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||||
|
if (attr_mP) {
|
||||||
|
num_motion_steps = pointcloud->get_motion_steps();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t num_points = pointcloud->num_points();
|
||||||
|
|
||||||
|
/* Copy the point data to Embree */
|
||||||
|
const int t_mid = (num_motion_steps - 1) / 2;
|
||||||
|
const float *radius = pointcloud->get_radius().data();
|
||||||
|
for (int t = 0; t < num_motion_steps; ++t) {
|
||||||
|
const float3 *verts;
|
||||||
|
if (t == t_mid || attr_mP == NULL) {
|
||||||
|
verts = pointcloud->get_points().data();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int t_ = (t > t_mid) ? (t - 1) : t;
|
||||||
|
verts = &attr_mP->data_float3()[t_ * num_points];
|
||||||
|
}
|
||||||
|
|
||||||
|
float4 *rtc_verts = (update) ? (float4 *)rtcGetGeometryBufferData(
|
||||||
|
geom_id, RTC_BUFFER_TYPE_VERTEX, t) :
|
||||||
|
(float4 *)rtcSetNewGeometryBuffer(geom_id,
|
||||||
|
RTC_BUFFER_TYPE_VERTEX,
|
||||||
|
t,
|
||||||
|
RTC_FORMAT_FLOAT4,
|
||||||
|
sizeof(float) * 4,
|
||||||
|
num_points);
|
||||||
|
|
||||||
|
assert(rtc_verts);
|
||||||
|
if (rtc_verts) {
|
||||||
|
for (size_t j = 0; j < num_points; ++j) {
|
||||||
|
rtc_verts[j] = float3_to_float4(verts[j]);
|
||||||
|
rtc_verts[j].w = radius[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (update) {
|
||||||
|
rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void BVHEmbree::add_points(const Object *ob, const PointCloud *pointcloud, int i)
|
||||||
|
{
|
||||||
|
size_t prim_offset = pointcloud->prim_offset;
|
||||||
|
|
||||||
|
const Attribute *attr_mP = NULL;
|
||||||
|
size_t num_motion_steps = 1;
|
||||||
|
if (pointcloud->has_motion_blur()) {
|
||||||
|
attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||||
|
if (attr_mP) {
|
||||||
|
num_motion_steps = pointcloud->get_motion_steps();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum RTCGeometryType type = RTC_GEOMETRY_TYPE_SPHERE_POINT;
|
||||||
|
|
||||||
|
RTCGeometry geom_id = rtcNewGeometry(rtc_device, type);
|
||||||
|
|
||||||
|
rtcSetGeometryBuildQuality(geom_id, build_quality);
|
||||||
|
rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
|
||||||
|
|
||||||
|
set_point_vertex_buffer(geom_id, pointcloud, false);
|
||||||
|
|
||||||
|
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
|
||||||
|
rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_backface_cull);
|
||||||
|
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_backface_cull);
|
||||||
|
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
|
||||||
|
|
||||||
|
rtcCommitGeometry(geom_id);
|
||||||
|
rtcAttachGeometryByID(scene, geom_id, i * 2);
|
||||||
|
rtcReleaseGeometry(geom_id);
|
||||||
|
}
|
||||||
|
|
||||||
void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
|
void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
|
||||||
{
|
{
|
||||||
size_t prim_offset = hair->curve_segment_offset;
|
size_t prim_offset = hair->curve_segment_offset;
|
||||||
@@ -678,8 +768,8 @@ void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
|
|||||||
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
|
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_thick_curve);
|
rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_backface_cull);
|
||||||
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_thick_curve);
|
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_backface_cull);
|
||||||
}
|
}
|
||||||
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
|
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
|
||||||
|
|
||||||
@@ -716,6 +806,14 @@ void BVHEmbree::refit(Progress &progress)
|
|||||||
rtcCommitGeometry(geom);
|
rtcCommitGeometry(geom);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (geom->geometry_type == Geometry::POINTCLOUD) {
|
||||||
|
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||||
|
if (pointcloud->num_points() > 0) {
|
||||||
|
RTCGeometry geom = rtcGetGeometry(scene, geom_id);
|
||||||
|
set_point_vertex_buffer(geom, pointcloud, true);
|
||||||
|
rtcCommitGeometry(geom);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
geom_id += 2;
|
geom_id += 2;
|
||||||
}
|
}
|
||||||
|
@@ -33,6 +33,7 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
class Hair;
|
class Hair;
|
||||||
class Mesh;
|
class Mesh;
|
||||||
|
class PointCloud;
|
||||||
|
|
||||||
class BVHEmbree : public BVH {
|
class BVHEmbree : public BVH {
|
||||||
public:
|
public:
|
||||||
@@ -51,11 +52,15 @@ class BVHEmbree : public BVH {
|
|||||||
void add_object(Object *ob, int i);
|
void add_object(Object *ob, int i);
|
||||||
void add_instance(Object *ob, int i);
|
void add_instance(Object *ob, int i);
|
||||||
void add_curves(const Object *ob, const Hair *hair, int i);
|
void add_curves(const Object *ob, const Hair *hair, int i);
|
||||||
|
void add_points(const Object *ob, const PointCloud *pointcloud, int i);
|
||||||
void add_triangles(const Object *ob, const Mesh *mesh, int i);
|
void add_triangles(const Object *ob, const Mesh *mesh, int i);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void set_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh, const bool update);
|
void set_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh, const bool update);
|
||||||
void set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, const bool update);
|
void set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, const bool update);
|
||||||
|
void set_point_vertex_buffer(RTCGeometry geom_id,
|
||||||
|
const PointCloud *pointcloud,
|
||||||
|
const bool update);
|
||||||
|
|
||||||
RTCDevice rtc_device;
|
RTCDevice rtc_device;
|
||||||
enum RTCBuildQuality build_quality;
|
enum RTCBuildQuality build_quality;
|
||||||
|
35
intern/cycles/bvh/metal.h
Normal file
35
intern/cycles/bvh/metal.h
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __BVH_METAL_H__
|
||||||
|
#define __BVH_METAL_H__
|
||||||
|
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
|
||||||
|
# include "bvh/bvh.h"
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
BVH *bvh_metal_create(const BVHParams ¶ms,
|
||||||
|
const vector<Geometry *> &geometry,
|
||||||
|
const vector<Object *> &objects,
|
||||||
|
Device *device);
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
|
#endif /* WITH_METAL */
|
||||||
|
|
||||||
|
#endif /* __BVH_METAL_H__ */
|
33
intern/cycles/bvh/metal.mm
Normal file
33
intern/cycles/bvh/metal.mm
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
|
||||||
|
# include "device/metal/bvh.h"
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
BVH *bvh_metal_create(const BVHParams ¶ms,
|
||||||
|
const vector<Geometry *> &geometry,
|
||||||
|
const vector<Object *> &objects,
|
||||||
|
Device *device)
|
||||||
|
{
|
||||||
|
return new BVHMetal(params, geometry, objects, device);
|
||||||
|
}
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
|
#endif /* WITH_METAL */
|
@@ -83,6 +83,8 @@ class BVHParams {
|
|||||||
int max_motion_triangle_leaf_size;
|
int max_motion_triangle_leaf_size;
|
||||||
int max_curve_leaf_size;
|
int max_curve_leaf_size;
|
||||||
int max_motion_curve_leaf_size;
|
int max_motion_curve_leaf_size;
|
||||||
|
int max_point_leaf_size;
|
||||||
|
int max_motion_point_leaf_size;
|
||||||
|
|
||||||
/* object or mesh level bvh */
|
/* object or mesh level bvh */
|
||||||
bool top_level;
|
bool top_level;
|
||||||
@@ -98,13 +100,13 @@ class BVHParams {
|
|||||||
/* Split time range to this number of steps and create leaf node for each
|
/* Split time range to this number of steps and create leaf node for each
|
||||||
* of this time steps.
|
* of this time steps.
|
||||||
*
|
*
|
||||||
* Speeds up rendering of motion curve primitives in the cost of higher
|
* Speeds up rendering of motion primitives in the cost of higher memory usage.
|
||||||
* memory usage.
|
|
||||||
*/
|
*/
|
||||||
int num_motion_curve_steps;
|
|
||||||
|
|
||||||
/* Same as above, but for triangle primitives. */
|
/* Same as above, but for triangle primitives. */
|
||||||
int num_motion_triangle_steps;
|
int num_motion_triangle_steps;
|
||||||
|
int num_motion_curve_steps;
|
||||||
|
int num_motion_point_steps;
|
||||||
|
|
||||||
/* Same as in SceneParams. */
|
/* Same as in SceneParams. */
|
||||||
int bvh_type;
|
int bvh_type;
|
||||||
@@ -132,6 +134,8 @@ class BVHParams {
|
|||||||
max_motion_triangle_leaf_size = 8;
|
max_motion_triangle_leaf_size = 8;
|
||||||
max_curve_leaf_size = 1;
|
max_curve_leaf_size = 1;
|
||||||
max_motion_curve_leaf_size = 4;
|
max_motion_curve_leaf_size = 4;
|
||||||
|
max_point_leaf_size = 8;
|
||||||
|
max_motion_point_leaf_size = 8;
|
||||||
|
|
||||||
top_level = false;
|
top_level = false;
|
||||||
bvh_layout = BVH_LAYOUT_BVH2;
|
bvh_layout = BVH_LAYOUT_BVH2;
|
||||||
@@ -139,6 +143,7 @@ class BVHParams {
|
|||||||
|
|
||||||
num_motion_curve_steps = 0;
|
num_motion_curve_steps = 0;
|
||||||
num_motion_triangle_steps = 0;
|
num_motion_triangle_steps = 0;
|
||||||
|
num_motion_point_steps = 0;
|
||||||
|
|
||||||
bvh_type = 0;
|
bvh_type = 0;
|
||||||
|
|
||||||
@@ -166,6 +171,12 @@ class BVHParams {
|
|||||||
return (size <= min_leaf_size || level >= MAX_DEPTH);
|
return (size <= min_leaf_size || level >= MAX_DEPTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool use_motion_steps()
|
||||||
|
{
|
||||||
|
return num_motion_curve_steps > 0 || num_motion_triangle_steps > 0 ||
|
||||||
|
num_motion_point_steps > 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Gets best matching BVH.
|
/* Gets best matching BVH.
|
||||||
*
|
*
|
||||||
* If the requested layout is supported by the device, it will be used.
|
* If the requested layout is supported by the device, it will be used.
|
||||||
|
@@ -23,6 +23,7 @@
|
|||||||
#include "scene/hair.h"
|
#include "scene/hair.h"
|
||||||
#include "scene/mesh.h"
|
#include "scene/mesh.h"
|
||||||
#include "scene/object.h"
|
#include "scene/object.h"
|
||||||
|
#include "scene/pointcloud.h"
|
||||||
|
|
||||||
#include "util/algorithm.h"
|
#include "util/algorithm.h"
|
||||||
|
|
||||||
@@ -426,6 +427,32 @@ void BVHSpatialSplit::split_curve_primitive(const Hair *hair,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BVHSpatialSplit::split_point_primitive(const PointCloud *pointcloud,
|
||||||
|
const Transform *tfm,
|
||||||
|
int prim_index,
|
||||||
|
int dim,
|
||||||
|
float pos,
|
||||||
|
BoundBox &left_bounds,
|
||||||
|
BoundBox &right_bounds)
|
||||||
|
{
|
||||||
|
/* No real splitting support for points, assume they are small enough for it
|
||||||
|
* not to matter. */
|
||||||
|
float3 point = pointcloud->get_points()[prim_index];
|
||||||
|
|
||||||
|
if (tfm != NULL) {
|
||||||
|
point = transform_point(tfm, point);
|
||||||
|
}
|
||||||
|
point = get_unaligned_point(point);
|
||||||
|
|
||||||
|
if (point[dim] <= pos) {
|
||||||
|
left_bounds.grow(point);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (point[dim] >= pos) {
|
||||||
|
right_bounds.grow(point);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void BVHSpatialSplit::split_triangle_reference(const BVHReference &ref,
|
void BVHSpatialSplit::split_triangle_reference(const BVHReference &ref,
|
||||||
const Mesh *mesh,
|
const Mesh *mesh,
|
||||||
int dim,
|
int dim,
|
||||||
@@ -453,6 +480,16 @@ void BVHSpatialSplit::split_curve_reference(const BVHReference &ref,
|
|||||||
right_bounds);
|
right_bounds);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BVHSpatialSplit::split_point_reference(const BVHReference &ref,
|
||||||
|
const PointCloud *pointcloud,
|
||||||
|
int dim,
|
||||||
|
float pos,
|
||||||
|
BoundBox &left_bounds,
|
||||||
|
BoundBox &right_bounds)
|
||||||
|
{
|
||||||
|
split_point_primitive(pointcloud, NULL, ref.prim_index(), dim, pos, left_bounds, right_bounds);
|
||||||
|
}
|
||||||
|
|
||||||
void BVHSpatialSplit::split_object_reference(
|
void BVHSpatialSplit::split_object_reference(
|
||||||
const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds)
|
const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds)
|
||||||
{
|
{
|
||||||
@@ -475,6 +512,13 @@ void BVHSpatialSplit::split_object_reference(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (geom->geometry_type == Geometry::POINTCLOUD) {
|
||||||
|
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||||
|
for (int point_idx = 0; point_idx < pointcloud->num_points(); ++point_idx) {
|
||||||
|
split_point_primitive(
|
||||||
|
pointcloud, &object->get_tfm(), point_idx, dim, pos, left_bounds, right_bounds);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BVHSpatialSplit::split_reference(const BVHBuild &builder,
|
void BVHSpatialSplit::split_reference(const BVHBuild &builder,
|
||||||
@@ -491,14 +535,18 @@ void BVHSpatialSplit::split_reference(const BVHBuild &builder,
|
|||||||
/* loop over vertices/edges. */
|
/* loop over vertices/edges. */
|
||||||
const Object *ob = builder.objects[ref.prim_object()];
|
const Object *ob = builder.objects[ref.prim_object()];
|
||||||
|
|
||||||
if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
|
if (ref.prim_type() & PRIMITIVE_TRIANGLE) {
|
||||||
Mesh *mesh = static_cast<Mesh *>(ob->get_geometry());
|
Mesh *mesh = static_cast<Mesh *>(ob->get_geometry());
|
||||||
split_triangle_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
|
split_triangle_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
|
||||||
}
|
}
|
||||||
else if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
|
else if (ref.prim_type() & PRIMITIVE_CURVE) {
|
||||||
Hair *hair = static_cast<Hair *>(ob->get_geometry());
|
Hair *hair = static_cast<Hair *>(ob->get_geometry());
|
||||||
split_curve_reference(ref, hair, dim, pos, left_bounds, right_bounds);
|
split_curve_reference(ref, hair, dim, pos, left_bounds, right_bounds);
|
||||||
}
|
}
|
||||||
|
else if (ref.prim_type() & PRIMITIVE_POINT) {
|
||||||
|
PointCloud *pointcloud = static_cast<PointCloud *>(ob->get_geometry());
|
||||||
|
split_point_reference(ref, pointcloud, dim, pos, left_bounds, right_bounds);
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
split_object_reference(ob, dim, pos, left_bounds, right_bounds);
|
split_object_reference(ob, dim, pos, left_bounds, right_bounds);
|
||||||
}
|
}
|
||||||
|
@@ -26,6 +26,7 @@ CCL_NAMESPACE_BEGIN
|
|||||||
class BVHBuild;
|
class BVHBuild;
|
||||||
class Hair;
|
class Hair;
|
||||||
class Mesh;
|
class Mesh;
|
||||||
|
class PointCloud;
|
||||||
struct Transform;
|
struct Transform;
|
||||||
|
|
||||||
/* Object Split */
|
/* Object Split */
|
||||||
@@ -123,6 +124,13 @@ class BVHSpatialSplit {
|
|||||||
float pos,
|
float pos,
|
||||||
BoundBox &left_bounds,
|
BoundBox &left_bounds,
|
||||||
BoundBox &right_bounds);
|
BoundBox &right_bounds);
|
||||||
|
void split_point_primitive(const PointCloud *pointcloud,
|
||||||
|
const Transform *tfm,
|
||||||
|
int prim_index,
|
||||||
|
int dim,
|
||||||
|
float pos,
|
||||||
|
BoundBox &left_bounds,
|
||||||
|
BoundBox &right_bounds);
|
||||||
|
|
||||||
/* Lower-level functions which calculates boundaries of left and right nodes
|
/* Lower-level functions which calculates boundaries of left and right nodes
|
||||||
* needed for spatial split.
|
* needed for spatial split.
|
||||||
@@ -141,6 +149,12 @@ class BVHSpatialSplit {
|
|||||||
float pos,
|
float pos,
|
||||||
BoundBox &left_bounds,
|
BoundBox &left_bounds,
|
||||||
BoundBox &right_bounds);
|
BoundBox &right_bounds);
|
||||||
|
void split_point_reference(const BVHReference &ref,
|
||||||
|
const PointCloud *pointcloud,
|
||||||
|
int dim,
|
||||||
|
float pos,
|
||||||
|
BoundBox &left_bounds,
|
||||||
|
BoundBox &right_bounds);
|
||||||
void split_object_reference(
|
void split_object_reference(
|
||||||
const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds);
|
const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds);
|
||||||
|
|
||||||
|
@@ -69,7 +69,7 @@ bool BVHUnaligned::compute_aligned_space(const BVHReference &ref, Transform *ali
|
|||||||
const int packed_type = ref.prim_type();
|
const int packed_type = ref.prim_type();
|
||||||
const int type = (packed_type & PRIMITIVE_ALL);
|
const int type = (packed_type & PRIMITIVE_ALL);
|
||||||
/* No motion blur curves here, we can't fit them to aligned boxes well. */
|
/* No motion blur curves here, we can't fit them to aligned boxes well. */
|
||||||
if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_CURVE_THICK)) {
|
if ((type & PRIMITIVE_CURVE) && !(type & PRIMITIVE_MOTION)) {
|
||||||
const int curve_index = ref.prim_index();
|
const int curve_index = ref.prim_index();
|
||||||
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
|
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
|
||||||
const Hair *hair = static_cast<const Hair *>(object->get_geometry());
|
const Hair *hair = static_cast<const Hair *>(object->get_geometry());
|
||||||
@@ -95,7 +95,7 @@ BoundBox BVHUnaligned::compute_aligned_prim_boundbox(const BVHReference &prim,
|
|||||||
const int packed_type = prim.prim_type();
|
const int packed_type = prim.prim_type();
|
||||||
const int type = (packed_type & PRIMITIVE_ALL);
|
const int type = (packed_type & PRIMITIVE_ALL);
|
||||||
/* No motion blur curves here, we can't fit them to aligned boxes well. */
|
/* No motion blur curves here, we can't fit them to aligned boxes well. */
|
||||||
if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_CURVE_THICK)) {
|
if ((type & PRIMITIVE_CURVE) && !(type & PRIMITIVE_MOTION)) {
|
||||||
const int curve_index = prim.prim_index();
|
const int curve_index = prim.prim_index();
|
||||||
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
|
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
|
||||||
const Hair *hair = static_cast<const Hair *>(object->get_geometry());
|
const Hair *hair = static_cast<const Hair *>(object->get_geometry());
|
||||||
|
@@ -551,4 +551,23 @@ if(NOT WITH_HIP_DYNLOAD)
|
|||||||
set(WITH_HIP_DYNLOAD ON)
|
set(WITH_HIP_DYNLOAD ON)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
###########################################################################
|
||||||
|
# Metal
|
||||||
|
###########################################################################
|
||||||
|
|
||||||
|
if(WITH_CYCLES_DEVICE_METAL)
|
||||||
|
find_library(METAL_LIBRARY Metal)
|
||||||
|
|
||||||
|
# This file was added in the 12.0 SDK, use it as a way to detect the version.
|
||||||
|
if (METAL_LIBRARY AND NOT EXISTS "${METAL_LIBRARY}/Headers/MTLFunctionStitching.h")
|
||||||
|
message(STATUS "Metal version too old, must be SDK 12.0 or newer, disabling WITH_CYCLES_DEVICE_METAL")
|
||||||
|
set(WITH_CYCLES_DEVICE_METAL OFF)
|
||||||
|
elseif (NOT METAL_LIBRARY)
|
||||||
|
message(STATUS "Metal not found, disabling WITH_CYCLES_DEVICE_METAL")
|
||||||
|
set(WITH_CYCLES_DEVICE_METAL OFF)
|
||||||
|
else()
|
||||||
|
message(STATUS "Found Metal: ${METAL_LIBRARY}")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
unset(_cycles_lib_dir)
|
unset(_cycles_lib_dir)
|
||||||
|
@@ -43,7 +43,7 @@ if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
|
|||||||
add_definitions(-DWITH_HIP_DYNLOAD)
|
add_definitions(-DWITH_HIP_DYNLOAD)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(SRC
|
set(SRC_BASE
|
||||||
device.cpp
|
device.cpp
|
||||||
denoise.cpp
|
denoise.cpp
|
||||||
graphics_interop.cpp
|
graphics_interop.cpp
|
||||||
@@ -104,6 +104,21 @@ set(SRC_MULTI
|
|||||||
multi/device.h
|
multi/device.h
|
||||||
)
|
)
|
||||||
|
|
||||||
|
set(SRC_METAL
|
||||||
|
metal/bvh.mm
|
||||||
|
metal/bvh.h
|
||||||
|
metal/device.mm
|
||||||
|
metal/device.h
|
||||||
|
metal/device_impl.mm
|
||||||
|
metal/device_impl.h
|
||||||
|
metal/kernel.mm
|
||||||
|
metal/kernel.h
|
||||||
|
metal/queue.mm
|
||||||
|
metal/queue.h
|
||||||
|
metal/util.mm
|
||||||
|
metal/util.h
|
||||||
|
)
|
||||||
|
|
||||||
set(SRC_OPTIX
|
set(SRC_OPTIX
|
||||||
optix/device.cpp
|
optix/device.cpp
|
||||||
optix/device.h
|
optix/device.h
|
||||||
@@ -123,6 +138,17 @@ set(SRC_HEADERS
|
|||||||
queue.h
|
queue.h
|
||||||
)
|
)
|
||||||
|
|
||||||
|
set(SRC
|
||||||
|
${SRC_BASE}
|
||||||
|
${SRC_CPU}
|
||||||
|
${SRC_CUDA}
|
||||||
|
${SRC_HIP}
|
||||||
|
${SRC_DUMMY}
|
||||||
|
${SRC_MULTI}
|
||||||
|
${SRC_OPTIX}
|
||||||
|
${SRC_HEADERS}
|
||||||
|
)
|
||||||
|
|
||||||
set(LIB
|
set(LIB
|
||||||
cycles_kernel
|
cycles_kernel
|
||||||
cycles_util
|
cycles_util
|
||||||
@@ -158,6 +184,15 @@ endif()
|
|||||||
if(WITH_CYCLES_DEVICE_OPTIX)
|
if(WITH_CYCLES_DEVICE_OPTIX)
|
||||||
add_definitions(-DWITH_OPTIX)
|
add_definitions(-DWITH_OPTIX)
|
||||||
endif()
|
endif()
|
||||||
|
if(WITH_CYCLES_DEVICE_METAL)
|
||||||
|
list(APPEND LIB
|
||||||
|
${METAL_LIBRARY}
|
||||||
|
)
|
||||||
|
add_definitions(-DWITH_METAL)
|
||||||
|
list(APPEND SRC
|
||||||
|
${SRC_METAL}
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(WITH_OPENIMAGEDENOISE)
|
if(WITH_OPENIMAGEDENOISE)
|
||||||
list(APPEND LIB
|
list(APPEND LIB
|
||||||
@@ -168,20 +203,12 @@ endif()
|
|||||||
include_directories(${INC})
|
include_directories(${INC})
|
||||||
include_directories(SYSTEM ${INC_SYS})
|
include_directories(SYSTEM ${INC_SYS})
|
||||||
|
|
||||||
cycles_add_library(cycles_device "${LIB}"
|
cycles_add_library(cycles_device "${LIB}" ${SRC})
|
||||||
${SRC}
|
|
||||||
${SRC_CPU}
|
|
||||||
${SRC_CUDA}
|
|
||||||
${SRC_HIP}
|
|
||||||
${SRC_DUMMY}
|
|
||||||
${SRC_MULTI}
|
|
||||||
${SRC_OPTIX}
|
|
||||||
${SRC_HEADERS}
|
|
||||||
)
|
|
||||||
|
|
||||||
source_group("cpu" FILES ${SRC_CPU})
|
source_group("cpu" FILES ${SRC_CPU})
|
||||||
source_group("cuda" FILES ${SRC_CUDA})
|
source_group("cuda" FILES ${SRC_CUDA})
|
||||||
source_group("dummy" FILES ${SRC_DUMMY})
|
source_group("dummy" FILES ${SRC_DUMMY})
|
||||||
source_group("multi" FILES ${SRC_MULTI})
|
source_group("multi" FILES ${SRC_MULTI})
|
||||||
|
source_group("metal" FILES ${SRC_METAL})
|
||||||
source_group("optix" FILES ${SRC_OPTIX})
|
source_group("optix" FILES ${SRC_OPTIX})
|
||||||
source_group("common" FILES ${SRC} ${SRC_HEADERS})
|
source_group("common" FILES ${SRC} ${SRC_HEADERS})
|
||||||
|
@@ -129,8 +129,7 @@ void CPUDevice::mem_alloc(device_memory &mem)
|
|||||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mem.type == MEM_DEVICE_ONLY) {
|
if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
|
||||||
assert(!mem.host_pointer);
|
|
||||||
size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
|
size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
|
||||||
void *data = util_aligned_malloc(mem.memory_size(), alignment);
|
void *data = util_aligned_malloc(mem.memory_size(), alignment);
|
||||||
mem.device_pointer = (device_ptr)data;
|
mem.device_pointer = (device_ptr)data;
|
||||||
@@ -189,7 +188,7 @@ void CPUDevice::mem_free(device_memory &mem)
|
|||||||
tex_free((device_texture &)mem);
|
tex_free((device_texture &)mem);
|
||||||
}
|
}
|
||||||
else if (mem.device_pointer) {
|
else if (mem.device_pointer) {
|
||||||
if (mem.type == MEM_DEVICE_ONLY) {
|
if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
|
||||||
util_aligned_free((void *)mem.device_pointer);
|
util_aligned_free((void *)mem.device_pointer);
|
||||||
}
|
}
|
||||||
mem.device_pointer = 0;
|
mem.device_pointer = 0;
|
||||||
@@ -274,7 +273,8 @@ void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
{
|
{
|
||||||
#ifdef WITH_EMBREE
|
#ifdef WITH_EMBREE
|
||||||
if (bvh->params.bvh_layout == BVH_LAYOUT_EMBREE ||
|
if (bvh->params.bvh_layout == BVH_LAYOUT_EMBREE ||
|
||||||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE) {
|
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE ||
|
||||||
|
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE) {
|
||||||
BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
|
BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
|
||||||
if (refit) {
|
if (refit) {
|
||||||
bvh_embree->refit(progress);
|
bvh_embree->refit(progress);
|
||||||
|
@@ -477,10 +477,10 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
|
|||||||
* still to make it faster. */
|
* still to make it faster. */
|
||||||
CUDADeviceQueue queue(this);
|
CUDADeviceQueue queue(this);
|
||||||
|
|
||||||
void *d_path_index = nullptr;
|
device_ptr d_path_index = 0;
|
||||||
void *d_render_buffer = nullptr;
|
device_ptr d_render_buffer = 0;
|
||||||
int d_work_size = 0;
|
int d_work_size = 0;
|
||||||
void *args[] = {&d_path_index, &d_render_buffer, &d_work_size};
|
DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size);
|
||||||
|
|
||||||
queue.init_execution();
|
queue.init_execution();
|
||||||
queue.enqueue(test_kernel, 1, args);
|
queue.enqueue(test_kernel, 1, args);
|
||||||
@@ -678,7 +678,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_
|
|||||||
|
|
||||||
void *shared_pointer = 0;
|
void *shared_pointer = 0;
|
||||||
|
|
||||||
if (mem_alloc_result != CUDA_SUCCESS && can_map_host) {
|
if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) {
|
||||||
if (mem.shared_pointer) {
|
if (mem.shared_pointer) {
|
||||||
/* Another device already allocated host memory. */
|
/* Another device already allocated host memory. */
|
||||||
mem_alloc_result = CUDA_SUCCESS;
|
mem_alloc_result = CUDA_SUCCESS;
|
||||||
@@ -701,9 +701,15 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (mem_alloc_result != CUDA_SUCCESS) {
|
if (mem_alloc_result != CUDA_SUCCESS) {
|
||||||
|
if (mem.type == MEM_DEVICE_ONLY) {
|
||||||
|
status = " failed, out of device memory";
|
||||||
|
set_error("System is out of GPU memory");
|
||||||
|
}
|
||||||
|
else {
|
||||||
status = " failed, out of device and host memory";
|
status = " failed, out of device and host memory";
|
||||||
set_error("System is out of GPU and shared host memory");
|
set_error("System is out of GPU and shared host memory");
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (mem.name) {
|
if (mem.name) {
|
||||||
VLOG(1) << "Buffer allocate: " << mem.name << ", "
|
VLOG(1) << "Buffer allocate: " << mem.name << ", "
|
||||||
|
@@ -89,7 +89,9 @@ bool CUDADeviceQueue::kernel_available(DeviceKernel kernel) const
|
|||||||
return cuda_device_->kernels.available(kernel);
|
return cuda_device_->kernels.available(kernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
|
bool CUDADeviceQueue::enqueue(DeviceKernel kernel,
|
||||||
|
const int work_size,
|
||||||
|
DeviceKernelArguments const &args)
|
||||||
{
|
{
|
||||||
if (cuda_device_->have_error()) {
|
if (cuda_device_->have_error()) {
|
||||||
return false;
|
return false;
|
||||||
@@ -133,7 +135,7 @@ bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *ar
|
|||||||
1,
|
1,
|
||||||
shared_mem_bytes,
|
shared_mem_bytes,
|
||||||
cuda_stream_,
|
cuda_stream_,
|
||||||
args,
|
const_cast<void **>(args.values),
|
||||||
0),
|
0),
|
||||||
"enqueue");
|
"enqueue");
|
||||||
|
|
||||||
|
@@ -42,7 +42,9 @@ class CUDADeviceQueue : public DeviceQueue {
|
|||||||
|
|
||||||
virtual bool kernel_available(DeviceKernel kernel) const override;
|
virtual bool kernel_available(DeviceKernel kernel) const override;
|
||||||
|
|
||||||
virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;
|
virtual bool enqueue(DeviceKernel kernel,
|
||||||
|
const int work_size,
|
||||||
|
DeviceKernelArguments const &args) override;
|
||||||
|
|
||||||
virtual bool synchronize() override;
|
virtual bool synchronize() override;
|
||||||
|
|
||||||
|
@@ -27,6 +27,7 @@
|
|||||||
#include "device/cuda/device.h"
|
#include "device/cuda/device.h"
|
||||||
#include "device/dummy/device.h"
|
#include "device/dummy/device.h"
|
||||||
#include "device/hip/device.h"
|
#include "device/hip/device.h"
|
||||||
|
#include "device/metal/device.h"
|
||||||
#include "device/multi/device.h"
|
#include "device/multi/device.h"
|
||||||
#include "device/optix/device.h"
|
#include "device/optix/device.h"
|
||||||
|
|
||||||
@@ -49,6 +50,7 @@ vector<DeviceInfo> Device::cuda_devices;
|
|||||||
vector<DeviceInfo> Device::optix_devices;
|
vector<DeviceInfo> Device::optix_devices;
|
||||||
vector<DeviceInfo> Device::cpu_devices;
|
vector<DeviceInfo> Device::cpu_devices;
|
||||||
vector<DeviceInfo> Device::hip_devices;
|
vector<DeviceInfo> Device::hip_devices;
|
||||||
|
vector<DeviceInfo> Device::metal_devices;
|
||||||
uint Device::devices_initialized_mask = 0;
|
uint Device::devices_initialized_mask = 0;
|
||||||
|
|
||||||
/* Device */
|
/* Device */
|
||||||
@@ -105,6 +107,12 @@ Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
|||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
case DEVICE_METAL:
|
||||||
|
if (device_metal_init())
|
||||||
|
device = device_metal_create(info, stats, profiler);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -128,6 +136,8 @@ DeviceType Device::type_from_string(const char *name)
|
|||||||
return DEVICE_MULTI;
|
return DEVICE_MULTI;
|
||||||
else if (strcmp(name, "HIP") == 0)
|
else if (strcmp(name, "HIP") == 0)
|
||||||
return DEVICE_HIP;
|
return DEVICE_HIP;
|
||||||
|
else if (strcmp(name, "METAL") == 0)
|
||||||
|
return DEVICE_METAL;
|
||||||
|
|
||||||
return DEVICE_NONE;
|
return DEVICE_NONE;
|
||||||
}
|
}
|
||||||
@@ -144,6 +154,8 @@ string Device::string_from_type(DeviceType type)
|
|||||||
return "MULTI";
|
return "MULTI";
|
||||||
else if (type == DEVICE_HIP)
|
else if (type == DEVICE_HIP)
|
||||||
return "HIP";
|
return "HIP";
|
||||||
|
else if (type == DEVICE_METAL)
|
||||||
|
return "METAL";
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
@@ -161,7 +173,9 @@ vector<DeviceType> Device::available_types()
|
|||||||
#ifdef WITH_HIP
|
#ifdef WITH_HIP
|
||||||
types.push_back(DEVICE_HIP);
|
types.push_back(DEVICE_HIP);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
types.push_back(DEVICE_METAL);
|
||||||
|
#endif
|
||||||
return types;
|
return types;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -227,6 +241,20 @@ vector<DeviceInfo> Device::available_devices(uint mask)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
if (mask & DEVICE_MASK_METAL) {
|
||||||
|
if (!(devices_initialized_mask & DEVICE_MASK_METAL)) {
|
||||||
|
if (device_metal_init()) {
|
||||||
|
device_metal_info(metal_devices);
|
||||||
|
}
|
||||||
|
devices_initialized_mask |= DEVICE_MASK_METAL;
|
||||||
|
}
|
||||||
|
foreach (DeviceInfo &info, metal_devices) {
|
||||||
|
devices.push_back(info);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return devices;
|
return devices;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -266,6 +294,15 @@ string Device::device_capabilities(uint mask)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
if (mask & DEVICE_MASK_METAL) {
|
||||||
|
if (device_metal_init()) {
|
||||||
|
capabilities += "\nMetal device capabilities:\n";
|
||||||
|
capabilities += device_metal_capabilities();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return capabilities;
|
return capabilities;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -354,6 +391,7 @@ void Device::free_memory()
|
|||||||
optix_devices.free_memory();
|
optix_devices.free_memory();
|
||||||
hip_devices.free_memory();
|
hip_devices.free_memory();
|
||||||
cpu_devices.free_memory();
|
cpu_devices.free_memory();
|
||||||
|
metal_devices.free_memory();
|
||||||
}
|
}
|
||||||
|
|
||||||
unique_ptr<DeviceQueue> Device::gpu_queue_create()
|
unique_ptr<DeviceQueue> Device::gpu_queue_create()
|
||||||
|
@@ -52,6 +52,7 @@ enum DeviceType {
|
|||||||
DEVICE_MULTI,
|
DEVICE_MULTI,
|
||||||
DEVICE_OPTIX,
|
DEVICE_OPTIX,
|
||||||
DEVICE_HIP,
|
DEVICE_HIP,
|
||||||
|
DEVICE_METAL,
|
||||||
DEVICE_DUMMY,
|
DEVICE_DUMMY,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -60,6 +61,7 @@ enum DeviceTypeMask {
|
|||||||
DEVICE_MASK_CUDA = (1 << DEVICE_CUDA),
|
DEVICE_MASK_CUDA = (1 << DEVICE_CUDA),
|
||||||
DEVICE_MASK_OPTIX = (1 << DEVICE_OPTIX),
|
DEVICE_MASK_OPTIX = (1 << DEVICE_OPTIX),
|
||||||
DEVICE_MASK_HIP = (1 << DEVICE_HIP),
|
DEVICE_MASK_HIP = (1 << DEVICE_HIP),
|
||||||
|
DEVICE_MASK_METAL = (1 << DEVICE_METAL),
|
||||||
DEVICE_MASK_ALL = ~0
|
DEVICE_MASK_ALL = ~0
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -281,6 +283,7 @@ class Device {
|
|||||||
static vector<DeviceInfo> optix_devices;
|
static vector<DeviceInfo> optix_devices;
|
||||||
static vector<DeviceInfo> cpu_devices;
|
static vector<DeviceInfo> cpu_devices;
|
||||||
static vector<DeviceInfo> hip_devices;
|
static vector<DeviceInfo> hip_devices;
|
||||||
|
static vector<DeviceInfo> metal_devices;
|
||||||
static uint devices_initialized_mask;
|
static uint devices_initialized_mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -440,10 +440,10 @@ void HIPDevice::reserve_local_memory(const uint kernel_features)
|
|||||||
* still to make it faster. */
|
* still to make it faster. */
|
||||||
HIPDeviceQueue queue(this);
|
HIPDeviceQueue queue(this);
|
||||||
|
|
||||||
void *d_path_index = nullptr;
|
device_ptr d_path_index = 0;
|
||||||
void *d_render_buffer = nullptr;
|
device_ptr d_render_buffer = 0;
|
||||||
int d_work_size = 0;
|
int d_work_size = 0;
|
||||||
void *args[] = {&d_path_index, &d_render_buffer, &d_work_size};
|
DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size);
|
||||||
|
|
||||||
queue.init_execution();
|
queue.init_execution();
|
||||||
queue.enqueue(test_kernel, 1, args);
|
queue.enqueue(test_kernel, 1, args);
|
||||||
|
@@ -89,7 +89,9 @@ bool HIPDeviceQueue::kernel_available(DeviceKernel kernel) const
|
|||||||
return hip_device_->kernels.available(kernel);
|
return hip_device_->kernels.available(kernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
|
bool HIPDeviceQueue::enqueue(DeviceKernel kernel,
|
||||||
|
const int work_size,
|
||||||
|
DeviceKernelArguments const &args)
|
||||||
{
|
{
|
||||||
if (hip_device_->have_error()) {
|
if (hip_device_->have_error()) {
|
||||||
return false;
|
return false;
|
||||||
@@ -132,7 +134,7 @@ bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *arg
|
|||||||
1,
|
1,
|
||||||
shared_mem_bytes,
|
shared_mem_bytes,
|
||||||
hip_stream_,
|
hip_stream_,
|
||||||
args,
|
const_cast<void **>(args.values),
|
||||||
0),
|
0),
|
||||||
"enqueue");
|
"enqueue");
|
||||||
|
|
||||||
|
@@ -42,7 +42,9 @@ class HIPDeviceQueue : public DeviceQueue {
|
|||||||
|
|
||||||
virtual bool kernel_available(DeviceKernel kernel) const override;
|
virtual bool kernel_available(DeviceKernel kernel) const override;
|
||||||
|
|
||||||
virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;
|
virtual bool enqueue(DeviceKernel kernel,
|
||||||
|
const int work_size,
|
||||||
|
DeviceKernelArguments const &args) override;
|
||||||
|
|
||||||
virtual bool synchronize() override;
|
virtual bool synchronize() override;
|
||||||
|
|
||||||
|
@@ -263,6 +263,7 @@ class device_memory {
|
|||||||
friend class CUDADevice;
|
friend class CUDADevice;
|
||||||
friend class OptiXDevice;
|
friend class OptiXDevice;
|
||||||
friend class HIPDevice;
|
friend class HIPDevice;
|
||||||
|
friend class MetalDevice;
|
||||||
|
|
||||||
/* Only create through subclasses. */
|
/* Only create through subclasses. */
|
||||||
device_memory(Device *device, const char *name, MemoryType type);
|
device_memory(Device *device, const char *name, MemoryType type);
|
||||||
@@ -581,7 +582,7 @@ template<typename T> class device_vector : public device_memory {
|
|||||||
* from an already allocated base memory. It is freed automatically when it
|
* from an already allocated base memory. It is freed automatically when it
|
||||||
* goes out of scope, which should happen before base memory is freed.
|
* goes out of scope, which should happen before base memory is freed.
|
||||||
*
|
*
|
||||||
* Note: some devices require offset and size of the sub_ptr to be properly
|
* NOTE: some devices require offset and size of the sub_ptr to be properly
|
||||||
* aligned to device->mem_address_alingment(). */
|
* aligned to device->mem_address_alingment(). */
|
||||||
|
|
||||||
class device_sub_ptr {
|
class device_sub_ptr {
|
||||||
|
66
intern/cycles/device/metal/bvh.h
Normal file
66
intern/cycles/device/metal/bvh.h
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
|
||||||
|
# include "bvh/bvh.h"
|
||||||
|
# include "bvh/params.h"
|
||||||
|
# include "device/memory.h"
|
||||||
|
|
||||||
|
# include <Metal/Metal.h>
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
class BVHMetal : public BVH {
|
||||||
|
public:
|
||||||
|
API_AVAILABLE(macos(11.0))
|
||||||
|
id<MTLAccelerationStructure> accel_struct = nil;
|
||||||
|
bool accel_struct_building = false;
|
||||||
|
|
||||||
|
API_AVAILABLE(macos(11.0))
|
||||||
|
vector<id<MTLAccelerationStructure>> blas_array;
|
||||||
|
|
||||||
|
bool motion_blur = false;
|
||||||
|
|
||||||
|
Stats &stats;
|
||||||
|
|
||||||
|
bool build(Progress &progress, id<MTLDevice> device, id<MTLCommandQueue> queue, bool refit);
|
||||||
|
|
||||||
|
BVHMetal(const BVHParams ¶ms,
|
||||||
|
const vector<Geometry *> &geometry,
|
||||||
|
const vector<Object *> &objects,
|
||||||
|
Device *device);
|
||||||
|
virtual ~BVHMetal();
|
||||||
|
|
||||||
|
bool build_BLAS(Progress &progress, id<MTLDevice> device, id<MTLCommandQueue> queue, bool refit);
|
||||||
|
bool build_BLAS_mesh(Progress &progress,
|
||||||
|
id<MTLDevice> device,
|
||||||
|
id<MTLCommandQueue> queue,
|
||||||
|
Geometry *const geom,
|
||||||
|
bool refit);
|
||||||
|
bool build_BLAS_hair(Progress &progress,
|
||||||
|
id<MTLDevice> device,
|
||||||
|
id<MTLCommandQueue> queue,
|
||||||
|
Geometry *const geom,
|
||||||
|
bool refit);
|
||||||
|
bool build_TLAS(Progress &progress, id<MTLDevice> device, id<MTLCommandQueue> queue, bool refit);
|
||||||
|
};
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
|
#endif /* WITH_METAL */
|
813
intern/cycles/device/metal/bvh.mm
Normal file
813
intern/cycles/device/metal/bvh.mm
Normal file
@@ -0,0 +1,813 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
|
||||||
|
# include "scene/hair.h"
|
||||||
|
# include "scene/mesh.h"
|
||||||
|
# include "scene/object.h"
|
||||||
|
|
||||||
|
# include "util/progress.h"
|
||||||
|
|
||||||
|
# include "device/metal/bvh.h"
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
# define BVH_status(...) \
|
||||||
|
{ \
|
||||||
|
string str = string_printf(__VA_ARGS__); \
|
||||||
|
progress.set_substatus(str); \
|
||||||
|
}
|
||||||
|
|
||||||
|
BVHMetal::BVHMetal(const BVHParams ¶ms_,
|
||||||
|
const vector<Geometry *> &geometry_,
|
||||||
|
const vector<Object *> &objects_,
|
||||||
|
Device *device)
|
||||||
|
: BVH(params_, geometry_, objects_), stats(device->stats)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
BVHMetal::~BVHMetal()
|
||||||
|
{
|
||||||
|
if (@available(macos 12.0, *)) {
|
||||||
|
if (accel_struct) {
|
||||||
|
stats.mem_free(accel_struct.allocatedSize);
|
||||||
|
[accel_struct release];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BVHMetal::build_BLAS_mesh(Progress &progress,
|
||||||
|
id<MTLDevice> device,
|
||||||
|
id<MTLCommandQueue> queue,
|
||||||
|
Geometry *const geom,
|
||||||
|
bool refit)
|
||||||
|
{
|
||||||
|
if (@available(macos 12.0, *)) {
|
||||||
|
/* Build BLAS for triangle primitives */
|
||||||
|
Mesh *const mesh = static_cast<Mesh *const>(geom);
|
||||||
|
if (mesh->num_triangles() == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*------------------------------------------------*/
|
||||||
|
BVH_status(
|
||||||
|
"Building mesh BLAS | %7d tris | %s", (int)mesh->num_triangles(), geom->name.c_str());
|
||||||
|
/*------------------------------------------------*/
|
||||||
|
|
||||||
|
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
|
||||||
|
|
||||||
|
const array<float3> &verts = mesh->get_verts();
|
||||||
|
const array<int> &tris = mesh->get_triangles();
|
||||||
|
const size_t num_verts = verts.size();
|
||||||
|
const size_t num_indices = tris.size();
|
||||||
|
|
||||||
|
size_t num_motion_steps = 1;
|
||||||
|
Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||||
|
if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
|
||||||
|
num_motion_steps = mesh->get_motion_steps();
|
||||||
|
}
|
||||||
|
|
||||||
|
MTLResourceOptions storage_mode;
|
||||||
|
if (device.hasUnifiedMemory) {
|
||||||
|
storage_mode = MTLResourceStorageModeShared;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
storage_mode = MTLResourceStorageModeManaged;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Upload the mesh data to the GPU */
|
||||||
|
id<MTLBuffer> posBuf = nil;
|
||||||
|
id<MTLBuffer> indexBuf = [device newBufferWithBytes:tris.data()
|
||||||
|
length:num_indices * sizeof(tris.data()[0])
|
||||||
|
options:storage_mode];
|
||||||
|
|
||||||
|
if (num_motion_steps == 1) {
|
||||||
|
posBuf = [device newBufferWithBytes:verts.data()
|
||||||
|
length:num_verts * sizeof(verts.data()[0])
|
||||||
|
options:storage_mode];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
posBuf = [device newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0])
|
||||||
|
options:storage_mode];
|
||||||
|
float3 *dest_data = (float3 *)[posBuf contents];
|
||||||
|
size_t center_step = (num_motion_steps - 1) / 2;
|
||||||
|
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||||
|
const float3 *verts = mesh->get_verts().data();
|
||||||
|
|
||||||
|
/* The center step for motion vertices is not stored in the attribute. */
|
||||||
|
if (step != center_step) {
|
||||||
|
verts = motion_keys->data_float3() + (step > center_step ? step - 1 : step) * num_verts;
|
||||||
|
}
|
||||||
|
memcpy(dest_data + num_verts * step, verts, num_verts * sizeof(float3));
|
||||||
|
}
|
||||||
|
if (storage_mode == MTLResourceStorageModeManaged) {
|
||||||
|
[posBuf didModifyRange:NSMakeRange(0, posBuf.length)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create an acceleration structure. */
|
||||||
|
MTLAccelerationStructureGeometryDescriptor *geomDesc;
|
||||||
|
if (num_motion_steps > 1) {
|
||||||
|
std::vector<MTLMotionKeyframeData *> vertex_ptrs;
|
||||||
|
vertex_ptrs.reserve(num_motion_steps);
|
||||||
|
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||||
|
MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
|
||||||
|
k.buffer = posBuf;
|
||||||
|
k.offset = num_verts * step * sizeof(float3);
|
||||||
|
vertex_ptrs.push_back(k);
|
||||||
|
}
|
||||||
|
|
||||||
|
MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion =
|
||||||
|
[MTLAccelerationStructureMotionTriangleGeometryDescriptor descriptor];
|
||||||
|
geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data()
|
||||||
|
count:vertex_ptrs.size()];
|
||||||
|
geomDescMotion.vertexStride = sizeof(verts.data()[0]);
|
||||||
|
geomDescMotion.indexBuffer = indexBuf;
|
||||||
|
geomDescMotion.indexBufferOffset = 0;
|
||||||
|
geomDescMotion.indexType = MTLIndexTypeUInt32;
|
||||||
|
geomDescMotion.triangleCount = num_indices / 3;
|
||||||
|
geomDescMotion.intersectionFunctionTableOffset = 0;
|
||||||
|
|
||||||
|
geomDesc = geomDescMotion;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion =
|
||||||
|
[MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
|
||||||
|
geomDescNoMotion.vertexBuffer = posBuf;
|
||||||
|
geomDescNoMotion.vertexBufferOffset = 0;
|
||||||
|
geomDescNoMotion.vertexStride = sizeof(verts.data()[0]);
|
||||||
|
geomDescNoMotion.indexBuffer = indexBuf;
|
||||||
|
geomDescNoMotion.indexBufferOffset = 0;
|
||||||
|
geomDescNoMotion.indexType = MTLIndexTypeUInt32;
|
||||||
|
geomDescNoMotion.triangleCount = num_indices / 3;
|
||||||
|
geomDescNoMotion.intersectionFunctionTableOffset = 0;
|
||||||
|
|
||||||
|
geomDesc = geomDescNoMotion;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Force a single any-hit call, so shadow record-all behavior works correctly */
|
||||||
|
/* (Match optix behavior: unsigned int build_flags =
|
||||||
|
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
|
||||||
|
geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
|
||||||
|
|
||||||
|
MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
|
||||||
|
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
|
||||||
|
accelDesc.geometryDescriptors = @[ geomDesc ];
|
||||||
|
if (num_motion_steps > 1) {
|
||||||
|
accelDesc.motionStartTime = 0.0f;
|
||||||
|
accelDesc.motionEndTime = 1.0f;
|
||||||
|
accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp;
|
||||||
|
accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp;
|
||||||
|
accelDesc.motionKeyframeCount = num_motion_steps;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!use_fast_trace_bvh) {
|
||||||
|
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
|
||||||
|
MTLAccelerationStructureUsagePreferFastBuild);
|
||||||
|
}
|
||||||
|
|
||||||
|
MTLAccelerationStructureSizes accelSizes = [device
|
||||||
|
accelerationStructureSizesWithDescriptor:accelDesc];
|
||||||
|
id<MTLAccelerationStructure> accel_uncompressed = [device
|
||||||
|
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
|
||||||
|
id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
|
||||||
|
options:MTLResourceStorageModePrivate];
|
||||||
|
id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared];
|
||||||
|
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
||||||
|
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
||||||
|
[accelCommands accelerationStructureCommandEncoder];
|
||||||
|
if (refit) {
|
||||||
|
[accelEnc refitAccelerationStructure:accel_struct
|
||||||
|
descriptor:accelDesc
|
||||||
|
destination:accel_uncompressed
|
||||||
|
scratchBuffer:scratchBuf
|
||||||
|
scratchBufferOffset:0];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
[accelEnc buildAccelerationStructure:accel_uncompressed
|
||||||
|
descriptor:accelDesc
|
||||||
|
scratchBuffer:scratchBuf
|
||||||
|
scratchBufferOffset:0];
|
||||||
|
}
|
||||||
|
if (use_fast_trace_bvh) {
|
||||||
|
[accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
|
||||||
|
toBuffer:sizeBuf
|
||||||
|
offset:0
|
||||||
|
sizeDataType:MTLDataTypeULong];
|
||||||
|
}
|
||||||
|
[accelEnc endEncoding];
|
||||||
|
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
|
||||||
|
/* free temp resources */
|
||||||
|
[scratchBuf release];
|
||||||
|
[indexBuf release];
|
||||||
|
[posBuf release];
|
||||||
|
|
||||||
|
if (use_fast_trace_bvh) {
|
||||||
|
/* Compact the accel structure */
|
||||||
|
uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
|
||||||
|
|
||||||
|
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
||||||
|
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
||||||
|
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
||||||
|
[accelCommands accelerationStructureCommandEncoder];
|
||||||
|
id<MTLAccelerationStructure> accel = [device
|
||||||
|
newAccelerationStructureWithSize:compressed_size];
|
||||||
|
[accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
|
||||||
|
toAccelerationStructure:accel];
|
||||||
|
[accelEnc endEncoding];
|
||||||
|
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
|
||||||
|
uint64_t allocated_size = [accel allocatedSize];
|
||||||
|
stats.mem_alloc(allocated_size);
|
||||||
|
accel_struct = accel;
|
||||||
|
[accel_uncompressed release];
|
||||||
|
accel_struct_building = false;
|
||||||
|
}];
|
||||||
|
[accelCommands commit];
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* set our acceleration structure to the uncompressed structure */
|
||||||
|
accel_struct = accel_uncompressed;
|
||||||
|
|
||||||
|
uint64_t allocated_size = [accel_struct allocatedSize];
|
||||||
|
stats.mem_alloc(allocated_size);
|
||||||
|
accel_struct_building = false;
|
||||||
|
}
|
||||||
|
[sizeBuf release];
|
||||||
|
}];
|
||||||
|
|
||||||
|
accel_struct_building = true;
|
||||||
|
[accelCommands commit];
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BVHMetal::build_BLAS_hair(Progress &progress,
|
||||||
|
id<MTLDevice> device,
|
||||||
|
id<MTLCommandQueue> queue,
|
||||||
|
Geometry *const geom,
|
||||||
|
bool refit)
|
||||||
|
{
|
||||||
|
if (@available(macos 12.0, *)) {
|
||||||
|
/* Build BLAS for hair curves */
|
||||||
|
Hair *hair = static_cast<Hair *>(geom);
|
||||||
|
if (hair->num_curves() == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*------------------------------------------------*/
|
||||||
|
BVH_status(
|
||||||
|
"Building hair BLAS | %7d curves | %s", (int)hair->num_curves(), geom->name.c_str());
|
||||||
|
/*------------------------------------------------*/
|
||||||
|
|
||||||
|
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
|
||||||
|
const size_t num_segments = hair->num_segments();
|
||||||
|
|
||||||
|
size_t num_motion_steps = 1;
|
||||||
|
Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||||
|
if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
|
||||||
|
num_motion_steps = hair->get_motion_steps();
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t num_aabbs = num_segments * num_motion_steps;
|
||||||
|
|
||||||
|
MTLResourceOptions storage_mode;
|
||||||
|
if (device.hasUnifiedMemory) {
|
||||||
|
storage_mode = MTLResourceStorageModeShared;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
storage_mode = MTLResourceStorageModeManaged;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocate a GPU buffer for the AABB data and populate it */
|
||||||
|
id<MTLBuffer> aabbBuf = [device
|
||||||
|
newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox)
|
||||||
|
options:storage_mode];
|
||||||
|
MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
|
||||||
|
|
||||||
|
/* Get AABBs for each motion step */
|
||||||
|
size_t center_step = (num_motion_steps - 1) / 2;
|
||||||
|
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||||
|
/* The center step for motion vertices is not stored in the attribute */
|
||||||
|
const float3 *keys = hair->get_curve_keys().data();
|
||||||
|
if (step != center_step) {
|
||||||
|
size_t attr_offset = (step > center_step) ? step - 1 : step;
|
||||||
|
/* Technically this is a float4 array, but sizeof(float3) == sizeof(float4) */
|
||||||
|
keys = motion_keys->data_float3() + attr_offset * hair->get_curve_keys().size();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) {
|
||||||
|
const Hair::Curve curve = hair->get_curve(j);
|
||||||
|
|
||||||
|
for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) {
|
||||||
|
{
|
||||||
|
BoundBox bounds = BoundBox::empty;
|
||||||
|
curve.bounds_grow(segment, keys, hair->get_curve_radius().data(), bounds);
|
||||||
|
|
||||||
|
const size_t index = step * num_segments + i;
|
||||||
|
aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
|
||||||
|
aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (storage_mode == MTLResourceStorageModeManaged) {
|
||||||
|
[aabbBuf didModifyRange:NSMakeRange(0, aabbBuf.length)];
|
||||||
|
}
|
||||||
|
|
||||||
|
# if 0
|
||||||
|
for (size_t i=0; i<num_aabbs && i < 400; i++) {
|
||||||
|
MTLAxisAlignedBoundingBox& bb = aabb_data[i];
|
||||||
|
printf(" %d: %.1f,%.1f,%.1f -- %.1f,%.1f,%.1f\n", int(i), bb.min.x, bb.min.y, bb.min.z, bb.max.x, bb.max.y, bb.max.z);
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
|
MTLAccelerationStructureGeometryDescriptor *geomDesc;
|
||||||
|
if (motion_blur) {
|
||||||
|
std::vector<MTLMotionKeyframeData *> aabb_ptrs;
|
||||||
|
aabb_ptrs.reserve(num_motion_steps);
|
||||||
|
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||||
|
MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
|
||||||
|
k.buffer = aabbBuf;
|
||||||
|
k.offset = step * num_segments * sizeof(MTLAxisAlignedBoundingBox);
|
||||||
|
aabb_ptrs.push_back(k);
|
||||||
|
}
|
||||||
|
|
||||||
|
MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion =
|
||||||
|
[MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor descriptor];
|
||||||
|
geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data()
|
||||||
|
count:aabb_ptrs.size()];
|
||||||
|
geomDescMotion.boundingBoxCount = num_segments;
|
||||||
|
geomDescMotion.boundingBoxStride = sizeof(aabb_data[0]);
|
||||||
|
geomDescMotion.intersectionFunctionTableOffset = 1;
|
||||||
|
|
||||||
|
/* Force a single any-hit call, so shadow record-all behavior works correctly */
|
||||||
|
/* (Match optix behavior: unsigned int build_flags =
|
||||||
|
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
|
||||||
|
geomDescMotion.allowDuplicateIntersectionFunctionInvocation = false;
|
||||||
|
geomDescMotion.opaque = true;
|
||||||
|
geomDesc = geomDescMotion;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion =
|
||||||
|
[MTLAccelerationStructureBoundingBoxGeometryDescriptor descriptor];
|
||||||
|
geomDescNoMotion.boundingBoxBuffer = aabbBuf;
|
||||||
|
geomDescNoMotion.boundingBoxBufferOffset = 0;
|
||||||
|
geomDescNoMotion.boundingBoxCount = int(num_aabbs);
|
||||||
|
geomDescNoMotion.boundingBoxStride = sizeof(aabb_data[0]);
|
||||||
|
geomDescNoMotion.intersectionFunctionTableOffset = 1;
|
||||||
|
|
||||||
|
/* Force a single any-hit call, so shadow record-all behavior works correctly */
|
||||||
|
/* (Match optix behavior: unsigned int build_flags =
|
||||||
|
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
|
||||||
|
geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation = false;
|
||||||
|
geomDescNoMotion.opaque = true;
|
||||||
|
geomDesc = geomDescNoMotion;
|
||||||
|
}
|
||||||
|
|
||||||
|
MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
|
||||||
|
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
|
||||||
|
accelDesc.geometryDescriptors = @[ geomDesc ];
|
||||||
|
|
||||||
|
if (motion_blur) {
|
||||||
|
accelDesc.motionStartTime = 0.0f;
|
||||||
|
accelDesc.motionEndTime = 1.0f;
|
||||||
|
accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
|
||||||
|
accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
|
||||||
|
accelDesc.motionKeyframeCount = num_motion_steps;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!use_fast_trace_bvh) {
|
||||||
|
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
|
||||||
|
MTLAccelerationStructureUsagePreferFastBuild);
|
||||||
|
}
|
||||||
|
|
||||||
|
MTLAccelerationStructureSizes accelSizes = [device
|
||||||
|
accelerationStructureSizesWithDescriptor:accelDesc];
|
||||||
|
id<MTLAccelerationStructure> accel_uncompressed = [device
|
||||||
|
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
|
||||||
|
id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
|
||||||
|
options:MTLResourceStorageModePrivate];
|
||||||
|
id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared];
|
||||||
|
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
||||||
|
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
||||||
|
[accelCommands accelerationStructureCommandEncoder];
|
||||||
|
if (refit) {
|
||||||
|
[accelEnc refitAccelerationStructure:accel_struct
|
||||||
|
descriptor:accelDesc
|
||||||
|
destination:accel_uncompressed
|
||||||
|
scratchBuffer:scratchBuf
|
||||||
|
scratchBufferOffset:0];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
[accelEnc buildAccelerationStructure:accel_uncompressed
|
||||||
|
descriptor:accelDesc
|
||||||
|
scratchBuffer:scratchBuf
|
||||||
|
scratchBufferOffset:0];
|
||||||
|
}
|
||||||
|
if (use_fast_trace_bvh) {
|
||||||
|
[accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
|
||||||
|
toBuffer:sizeBuf
|
||||||
|
offset:0
|
||||||
|
sizeDataType:MTLDataTypeULong];
|
||||||
|
}
|
||||||
|
[accelEnc endEncoding];
|
||||||
|
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
|
||||||
|
/* free temp resources */
|
||||||
|
[scratchBuf release];
|
||||||
|
[aabbBuf release];
|
||||||
|
|
||||||
|
if (use_fast_trace_bvh) {
|
||||||
|
/* Compact the accel structure */
|
||||||
|
uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
|
||||||
|
|
||||||
|
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
||||||
|
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
||||||
|
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
||||||
|
[accelCommands accelerationStructureCommandEncoder];
|
||||||
|
id<MTLAccelerationStructure> accel = [device
|
||||||
|
newAccelerationStructureWithSize:compressed_size];
|
||||||
|
[accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
|
||||||
|
toAccelerationStructure:accel];
|
||||||
|
[accelEnc endEncoding];
|
||||||
|
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
|
||||||
|
uint64_t allocated_size = [accel allocatedSize];
|
||||||
|
stats.mem_alloc(allocated_size);
|
||||||
|
accel_struct = accel;
|
||||||
|
[accel_uncompressed release];
|
||||||
|
accel_struct_building = false;
|
||||||
|
}];
|
||||||
|
[accelCommands commit];
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* set our acceleration structure to the uncompressed structure */
|
||||||
|
accel_struct = accel_uncompressed;
|
||||||
|
|
||||||
|
uint64_t allocated_size = [accel_struct allocatedSize];
|
||||||
|
stats.mem_alloc(allocated_size);
|
||||||
|
accel_struct_building = false;
|
||||||
|
}
|
||||||
|
[sizeBuf release];
|
||||||
|
}];
|
||||||
|
|
||||||
|
accel_struct_building = true;
|
||||||
|
[accelCommands commit];
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BVHMetal::build_BLAS(Progress &progress,
|
||||||
|
id<MTLDevice> device,
|
||||||
|
id<MTLCommandQueue> queue,
|
||||||
|
bool refit)
|
||||||
|
{
|
||||||
|
if (@available(macos 12.0, *)) {
|
||||||
|
assert(objects.size() == 1 && geometry.size() == 1);
|
||||||
|
|
||||||
|
/* Build bottom level acceleration structures (BLAS) */
|
||||||
|
Geometry *const geom = geometry[0];
|
||||||
|
switch (geom->geometry_type) {
|
||||||
|
case Geometry::VOLUME:
|
||||||
|
case Geometry::MESH:
|
||||||
|
return build_BLAS_mesh(progress, device, queue, geom, refit);
|
||||||
|
case Geometry::HAIR:
|
||||||
|
return build_BLAS_hair(progress, device, queue, geom, refit);
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BVHMetal::build_TLAS(Progress &progress,
|
||||||
|
id<MTLDevice> device,
|
||||||
|
id<MTLCommandQueue> queue,
|
||||||
|
bool refit)
|
||||||
|
{
|
||||||
|
if (@available(macos 12.0, *)) {
|
||||||
|
|
||||||
|
/* we need to sync here and ensure that all BLAS have completed async generation by both GCD
|
||||||
|
* and Metal */
|
||||||
|
{
|
||||||
|
__block bool complete_bvh = false;
|
||||||
|
while (!complete_bvh) {
|
||||||
|
dispatch_sync(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
||||||
|
complete_bvh = true;
|
||||||
|
for (Object *ob : objects) {
|
||||||
|
/* Skip non-traceable objects */
|
||||||
|
if (!ob->is_traceable())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
Geometry const *geom = ob->get_geometry();
|
||||||
|
BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh);
|
||||||
|
if (blas->accel_struct_building) {
|
||||||
|
complete_bvh = false;
|
||||||
|
|
||||||
|
/* We're likely waiting on a command buffer that's in flight to complete.
|
||||||
|
* Queue up a command buffer and wait for it complete before checking the BLAS again
|
||||||
|
*/
|
||||||
|
id<MTLCommandBuffer> command_buffer = [queue commandBuffer];
|
||||||
|
[command_buffer commit];
|
||||||
|
[command_buffer waitUntilCompleted];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t num_instances = 0;
|
||||||
|
uint32_t num_motion_transforms = 0;
|
||||||
|
for (Object *ob : objects) {
|
||||||
|
/* Skip non-traceable objects */
|
||||||
|
if (!ob->is_traceable())
|
||||||
|
continue;
|
||||||
|
num_instances++;
|
||||||
|
|
||||||
|
if (ob->use_motion()) {
|
||||||
|
num_motion_transforms += max(1, ob->get_motion().size());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
num_motion_transforms++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*------------------------------------------------*/
|
||||||
|
BVH_status("Building TLAS | %7d instances", (int)num_instances);
|
||||||
|
/*------------------------------------------------*/
|
||||||
|
|
||||||
|
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
|
||||||
|
|
||||||
|
NSMutableArray *all_blas = [NSMutableArray array];
|
||||||
|
unordered_map<BVHMetal const *, int> instance_mapping;
|
||||||
|
|
||||||
|
/* Lambda function to build/retrieve the BLAS index mapping */
|
||||||
|
auto get_blas_index = [&](BVHMetal const *blas) {
|
||||||
|
auto it = instance_mapping.find(blas);
|
||||||
|
if (it != instance_mapping.end()) {
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int blas_index = (int)[all_blas count];
|
||||||
|
instance_mapping[blas] = blas_index;
|
||||||
|
if (@available(macos 12.0, *)) {
|
||||||
|
[all_blas addObject:blas->accel_struct];
|
||||||
|
}
|
||||||
|
return blas_index;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
MTLResourceOptions storage_mode;
|
||||||
|
if (device.hasUnifiedMemory) {
|
||||||
|
storage_mode = MTLResourceStorageModeShared;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
storage_mode = MTLResourceStorageModeManaged;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t instance_size;
|
||||||
|
if (motion_blur) {
|
||||||
|
instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
instance_size = sizeof(MTLAccelerationStructureUserIDInstanceDescriptor);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocate a GPU buffer for the instance data and populate it */
|
||||||
|
id<MTLBuffer> instanceBuf = [device newBufferWithLength:num_instances * instance_size
|
||||||
|
options:storage_mode];
|
||||||
|
id<MTLBuffer> motion_transforms_buf = nil;
|
||||||
|
MTLPackedFloat4x3 *motion_transforms = nullptr;
|
||||||
|
if (motion_blur && num_motion_transforms) {
|
||||||
|
motion_transforms_buf = [device
|
||||||
|
newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3)
|
||||||
|
options:storage_mode];
|
||||||
|
motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t instance_index = 0;
|
||||||
|
uint32_t motion_transform_index = 0;
|
||||||
|
for (Object *ob : objects) {
|
||||||
|
/* Skip non-traceable objects */
|
||||||
|
if (!ob->is_traceable())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
Geometry const *geom = ob->get_geometry();
|
||||||
|
|
||||||
|
BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh);
|
||||||
|
uint32_t accel_struct_index = get_blas_index(blas);
|
||||||
|
|
||||||
|
/* Add some of the object visibility bits to the mask.
|
||||||
|
* __prim_visibility contains the combined visibility bits of all instances, so is not
|
||||||
|
* reliable if they differ between instances.
|
||||||
|
*
|
||||||
|
* METAL_WIP: OptiX visibility mask can only contain 8 bits, so have to trade-off here
|
||||||
|
* and select just a few important ones.
|
||||||
|
*/
|
||||||
|
uint32_t mask = ob->visibility_for_tracing() & 0xFF;
|
||||||
|
|
||||||
|
/* Have to have at least one bit in the mask, or else instance would always be culled. */
|
||||||
|
if (0 == mask) {
|
||||||
|
mask = 0xFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set user instance ID to object index */
|
||||||
|
int object_index = ob->get_device_index();
|
||||||
|
uint32_t user_id = uint32_t(object_index);
|
||||||
|
|
||||||
|
/* Bake into the appropriate descriptor */
|
||||||
|
if (motion_blur) {
|
||||||
|
MTLAccelerationStructureMotionInstanceDescriptor *instances =
|
||||||
|
(MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
|
||||||
|
MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[instance_index++];
|
||||||
|
|
||||||
|
desc.accelerationStructureIndex = accel_struct_index;
|
||||||
|
desc.userID = user_id;
|
||||||
|
desc.mask = mask;
|
||||||
|
desc.motionStartTime = 0.0f;
|
||||||
|
desc.motionEndTime = 1.0f;
|
||||||
|
desc.motionTransformsStartIndex = motion_transform_index;
|
||||||
|
desc.motionStartBorderMode = MTLMotionBorderModeVanish;
|
||||||
|
desc.motionEndBorderMode = MTLMotionBorderModeVanish;
|
||||||
|
desc.intersectionFunctionTableOffset = 0;
|
||||||
|
|
||||||
|
int key_count = ob->get_motion().size();
|
||||||
|
if (key_count) {
|
||||||
|
desc.motionTransformsCount = key_count;
|
||||||
|
|
||||||
|
Transform *keys = ob->get_motion().data();
|
||||||
|
for (int i = 0; i < key_count; i++) {
|
||||||
|
float *t = (float *)&motion_transforms[motion_transform_index++];
|
||||||
|
/* Transpose transform */
|
||||||
|
auto src = (float const *)&keys[i];
|
||||||
|
for (int i = 0; i < 12; i++) {
|
||||||
|
t[i] = src[(i / 3) + 4 * (i % 3)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
desc.motionTransformsCount = 1;
|
||||||
|
|
||||||
|
float *t = (float *)&motion_transforms[motion_transform_index++];
|
||||||
|
if (ob->get_geometry()->is_instanced()) {
|
||||||
|
/* Transpose transform */
|
||||||
|
auto src = (float const *)&ob->get_tfm();
|
||||||
|
for (int i = 0; i < 12; i++) {
|
||||||
|
t[i] = src[(i / 3) + 4 * (i % 3)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* Clear transform to identity matrix */
|
||||||
|
t[0] = t[4] = t[8] = 1.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
MTLAccelerationStructureUserIDInstanceDescriptor *instances =
|
||||||
|
(MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
|
||||||
|
MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[instance_index++];
|
||||||
|
|
||||||
|
desc.accelerationStructureIndex = accel_struct_index;
|
||||||
|
desc.userID = user_id;
|
||||||
|
desc.mask = mask;
|
||||||
|
desc.intersectionFunctionTableOffset = 0;
|
||||||
|
|
||||||
|
float *t = (float *)&desc.transformationMatrix;
|
||||||
|
if (ob->get_geometry()->is_instanced()) {
|
||||||
|
/* Transpose transform */
|
||||||
|
auto src = (float const *)&ob->get_tfm();
|
||||||
|
for (int i = 0; i < 12; i++) {
|
||||||
|
t[i] = src[(i / 3) + 4 * (i % 3)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* Clear transform to identity matrix */
|
||||||
|
t[0] = t[4] = t[8] = 1.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (storage_mode == MTLResourceStorageModeManaged) {
|
||||||
|
[instanceBuf didModifyRange:NSMakeRange(0, instanceBuf.length)];
|
||||||
|
if (motion_transforms_buf) {
|
||||||
|
[motion_transforms_buf didModifyRange:NSMakeRange(0, motion_transforms_buf.length)];
|
||||||
|
assert(num_motion_transforms == motion_transform_index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MTLInstanceAccelerationStructureDescriptor *accelDesc =
|
||||||
|
[MTLInstanceAccelerationStructureDescriptor descriptor];
|
||||||
|
accelDesc.instanceCount = num_instances;
|
||||||
|
accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID;
|
||||||
|
accelDesc.instanceDescriptorBuffer = instanceBuf;
|
||||||
|
accelDesc.instanceDescriptorBufferOffset = 0;
|
||||||
|
accelDesc.instanceDescriptorStride = instance_size;
|
||||||
|
accelDesc.instancedAccelerationStructures = all_blas;
|
||||||
|
|
||||||
|
if (motion_blur) {
|
||||||
|
accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion;
|
||||||
|
accelDesc.motionTransformBuffer = motion_transforms_buf;
|
||||||
|
accelDesc.motionTransformCount = num_motion_transforms;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!use_fast_trace_bvh) {
|
||||||
|
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
|
||||||
|
MTLAccelerationStructureUsagePreferFastBuild);
|
||||||
|
}
|
||||||
|
|
||||||
|
MTLAccelerationStructureSizes accelSizes = [device
|
||||||
|
accelerationStructureSizesWithDescriptor:accelDesc];
|
||||||
|
id<MTLAccelerationStructure> accel = [device
|
||||||
|
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
|
||||||
|
id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
|
||||||
|
options:MTLResourceStorageModePrivate];
|
||||||
|
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
||||||
|
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
||||||
|
[accelCommands accelerationStructureCommandEncoder];
|
||||||
|
if (refit) {
|
||||||
|
[accelEnc refitAccelerationStructure:accel_struct
|
||||||
|
descriptor:accelDesc
|
||||||
|
destination:accel
|
||||||
|
scratchBuffer:scratchBuf
|
||||||
|
scratchBufferOffset:0];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
[accelEnc buildAccelerationStructure:accel
|
||||||
|
descriptor:accelDesc
|
||||||
|
scratchBuffer:scratchBuf
|
||||||
|
scratchBufferOffset:0];
|
||||||
|
}
|
||||||
|
[accelEnc endEncoding];
|
||||||
|
[accelCommands commit];
|
||||||
|
[accelCommands waitUntilCompleted];
|
||||||
|
|
||||||
|
if (motion_transforms_buf) {
|
||||||
|
[motion_transforms_buf release];
|
||||||
|
}
|
||||||
|
[instanceBuf release];
|
||||||
|
[scratchBuf release];
|
||||||
|
|
||||||
|
uint64_t allocated_size = [accel allocatedSize];
|
||||||
|
stats.mem_alloc(allocated_size);
|
||||||
|
|
||||||
|
/* Cache top and bottom-level acceleration structs */
|
||||||
|
accel_struct = accel;
|
||||||
|
blas_array.clear();
|
||||||
|
blas_array.reserve(all_blas.count);
|
||||||
|
for (id<MTLAccelerationStructure> blas in all_blas) {
|
||||||
|
blas_array.push_back(blas);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BVHMetal::build(Progress &progress,
|
||||||
|
id<MTLDevice> device,
|
||||||
|
id<MTLCommandQueue> queue,
|
||||||
|
bool refit)
|
||||||
|
{
|
||||||
|
if (@available(macos 12.0, *)) {
|
||||||
|
if (refit && params.bvh_type != BVH_TYPE_STATIC) {
|
||||||
|
assert(accel_struct);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (accel_struct) {
|
||||||
|
stats.mem_free(accel_struct.allocatedSize);
|
||||||
|
[accel_struct release];
|
||||||
|
accel_struct = nil;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!params.top_level) {
|
||||||
|
return build_BLAS(progress, device, queue, refit);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return build_TLAS(progress, device, queue, refit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
|
#endif /* WITH_METAL */
|
37
intern/cycles/device/metal/device.h
Normal file
37
intern/cycles/device/metal/device.h
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "util/string.h"
|
||||||
|
#include "util/vector.h"
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
class Device;
|
||||||
|
class DeviceInfo;
|
||||||
|
class Profiler;
|
||||||
|
class Stats;
|
||||||
|
|
||||||
|
bool device_metal_init();
|
||||||
|
|
||||||
|
Device *device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||||
|
|
||||||
|
void device_metal_info(vector<DeviceInfo> &devices);
|
||||||
|
|
||||||
|
string device_metal_capabilities();
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
136
intern/cycles/device/metal/device.mm
Normal file
136
intern/cycles/device/metal/device.mm
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
|
||||||
|
# include "device/metal/device.h"
|
||||||
|
# include "device/metal/device_impl.h"
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "util/debug.h"
|
||||||
|
#include "util/set.h"
|
||||||
|
#include "util/system.h"
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
|
||||||
|
Device *device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||||
|
{
|
||||||
|
return new MetalDevice(info, stats, profiler);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool device_metal_init()
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int device_metal_get_num_devices_safe(uint32_t *num_devices)
|
||||||
|
{
|
||||||
|
*num_devices = MTLCopyAllDevices().count;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void device_metal_info(vector<DeviceInfo> &devices)
|
||||||
|
{
|
||||||
|
uint32_t num_devices = 0;
|
||||||
|
device_metal_get_num_devices_safe(&num_devices);
|
||||||
|
if (num_devices == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<MetalPlatformDevice> usable_devices;
|
||||||
|
MetalInfo::get_usable_devices(&usable_devices);
|
||||||
|
/* Devices are numbered consecutively across platforms. */
|
||||||
|
set<string> unique_ids;
|
||||||
|
int device_index = 0;
|
||||||
|
for (MetalPlatformDevice &device : usable_devices) {
|
||||||
|
/* Compute unique ID for persistent user preferences. */
|
||||||
|
const string &device_name = device.device_name;
|
||||||
|
string id = string("METAL_") + device_name;
|
||||||
|
|
||||||
|
/* Hardware ID might not be unique, add device number in that case. */
|
||||||
|
if (unique_ids.find(id) != unique_ids.end()) {
|
||||||
|
id += string_printf("_ID_%d", num_devices);
|
||||||
|
}
|
||||||
|
unique_ids.insert(id);
|
||||||
|
|
||||||
|
/* Create DeviceInfo. */
|
||||||
|
DeviceInfo info;
|
||||||
|
info.type = DEVICE_METAL;
|
||||||
|
info.description = string_remove_trademark(string(device_name));
|
||||||
|
|
||||||
|
/* Ensure unique naming on Apple Silicon / SoC devices which return the same string for CPU and
|
||||||
|
* GPU */
|
||||||
|
if (info.description == system_cpu_brand_string()) {
|
||||||
|
info.description += " (GPU)";
|
||||||
|
}
|
||||||
|
|
||||||
|
info.num = device_index;
|
||||||
|
/* We don't know if it's used for display, but assume it is. */
|
||||||
|
info.display_device = true;
|
||||||
|
info.denoisers = DENOISER_NONE;
|
||||||
|
info.id = id;
|
||||||
|
|
||||||
|
devices.push_back(info);
|
||||||
|
device_index++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
string device_metal_capabilities()
|
||||||
|
{
|
||||||
|
string result = "";
|
||||||
|
string error_msg = "";
|
||||||
|
uint32_t num_devices = 0;
|
||||||
|
assert(device_metal_get_num_devices_safe(&num_devices));
|
||||||
|
if (num_devices == 0) {
|
||||||
|
return "No Metal devices found\n";
|
||||||
|
}
|
||||||
|
result += string_printf("Number of devices: %u\n", num_devices);
|
||||||
|
|
||||||
|
NSArray<id<MTLDevice>> *allDevices = MTLCopyAllDevices();
|
||||||
|
for (id<MTLDevice> device in allDevices) {
|
||||||
|
result += string_printf("\t\tDevice: %s\n", [device.name UTF8String]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
Device *device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||||
|
{
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool device_metal_init()
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void device_metal_info(vector<DeviceInfo> &devices)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
string device_metal_capabilities()
|
||||||
|
{
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
166
intern/cycles/device/metal/device_impl.h
Normal file
166
intern/cycles/device/metal/device_impl.h
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
|
||||||
|
# include "bvh/bvh.h"
|
||||||
|
# include "device/device.h"
|
||||||
|
# include "device/metal/bvh.h"
|
||||||
|
# include "device/metal/device.h"
|
||||||
|
# include "device/metal/kernel.h"
|
||||||
|
# include "device/metal/queue.h"
|
||||||
|
# include "device/metal/util.h"
|
||||||
|
|
||||||
|
# include <Metal/Metal.h>
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
class DeviceQueue;
|
||||||
|
|
||||||
|
class MetalDevice : public Device {
|
||||||
|
public:
|
||||||
|
id<MTLDevice> mtlDevice = nil;
|
||||||
|
id<MTLLibrary> mtlLibrary[PSO_NUM] = {nil};
|
||||||
|
id<MTLArgumentEncoder> mtlBufferKernelParamsEncoder =
|
||||||
|
nil; /* encoder used for fetching device pointers from MTLBuffers */
|
||||||
|
id<MTLCommandQueue> mtlGeneralCommandQueue = nil;
|
||||||
|
id<MTLArgumentEncoder> mtlAncillaryArgEncoder =
|
||||||
|
nil; /* encoder used for fetching device pointers from MTLBuffers */
|
||||||
|
string source_used_for_compile[PSO_NUM];
|
||||||
|
|
||||||
|
KernelParamsMetal launch_params = {0};
|
||||||
|
|
||||||
|
/* MetalRT members ----------------------------------*/
|
||||||
|
BVHMetal *bvhMetalRT = nullptr;
|
||||||
|
bool motion_blur = false;
|
||||||
|
id<MTLArgumentEncoder> mtlASArgEncoder =
|
||||||
|
nil; /* encoder used for fetching device pointers from MTLAccelerationStructure */
|
||||||
|
/*---------------------------------------------------*/
|
||||||
|
|
||||||
|
string device_name;
|
||||||
|
MetalGPUVendor device_vendor;
|
||||||
|
|
||||||
|
uint kernel_features;
|
||||||
|
MTLResourceOptions default_storage_mode;
|
||||||
|
int max_threads_per_threadgroup;
|
||||||
|
|
||||||
|
int mtlDevId = 0;
|
||||||
|
bool first_error = true;
|
||||||
|
|
||||||
|
struct MetalMem {
|
||||||
|
device_memory *mem = nullptr;
|
||||||
|
int pointer_index = -1;
|
||||||
|
id<MTLBuffer> mtlBuffer = nil;
|
||||||
|
id<MTLTexture> mtlTexture = nil;
|
||||||
|
uint64_t offset = 0;
|
||||||
|
uint64_t size = 0;
|
||||||
|
void *hostPtr = nullptr;
|
||||||
|
bool use_UMA = false; /* If true, UMA memory in shared_pointer is being used. */
|
||||||
|
};
|
||||||
|
typedef map<device_memory *, unique_ptr<MetalMem>> MetalMemMap;
|
||||||
|
MetalMemMap metal_mem_map;
|
||||||
|
std::vector<id<MTLResource>> delayed_free_list;
|
||||||
|
std::recursive_mutex metal_mem_map_mutex;
|
||||||
|
|
||||||
|
/* Bindless Textures */
|
||||||
|
device_vector<TextureInfo> texture_info;
|
||||||
|
bool need_texture_info;
|
||||||
|
id<MTLArgumentEncoder> mtlTextureArgEncoder = nil;
|
||||||
|
id<MTLBuffer> texture_bindings_2d = nil;
|
||||||
|
id<MTLBuffer> texture_bindings_3d = nil;
|
||||||
|
std::vector<id<MTLTexture>> texture_slot_map;
|
||||||
|
|
||||||
|
MetalDeviceKernels kernels;
|
||||||
|
bool use_metalrt = false;
|
||||||
|
bool use_function_specialisation = false;
|
||||||
|
|
||||||
|
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||||
|
|
||||||
|
void set_error(const string &error) override;
|
||||||
|
|
||||||
|
MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||||
|
|
||||||
|
virtual ~MetalDevice();
|
||||||
|
|
||||||
|
bool support_device(const uint /*kernel_features*/);
|
||||||
|
|
||||||
|
bool check_peer_access(Device *peer_device) override;
|
||||||
|
|
||||||
|
bool use_adaptive_compilation();
|
||||||
|
|
||||||
|
string get_source(const uint kernel_features);
|
||||||
|
|
||||||
|
string compile_kernel(const uint kernel_features, const char *name);
|
||||||
|
|
||||||
|
virtual bool load_kernels(const uint kernel_features) override;
|
||||||
|
|
||||||
|
void reserve_local_memory(const uint kernel_features);
|
||||||
|
|
||||||
|
void init_host_memory();
|
||||||
|
|
||||||
|
void load_texture_info();
|
||||||
|
|
||||||
|
virtual bool should_use_graphics_interop() override;
|
||||||
|
|
||||||
|
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
|
||||||
|
|
||||||
|
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
/* low-level memory management */
|
||||||
|
|
||||||
|
MetalMem *generic_alloc(device_memory &mem);
|
||||||
|
|
||||||
|
void generic_copy_to(device_memory &mem);
|
||||||
|
|
||||||
|
void generic_free(device_memory &mem);
|
||||||
|
|
||||||
|
void mem_alloc(device_memory &mem) override;
|
||||||
|
|
||||||
|
void mem_copy_to(device_memory &mem) override;
|
||||||
|
|
||||||
|
void mem_copy_from(device_memory &mem)
|
||||||
|
{
|
||||||
|
mem_copy_from(mem, -1, -1, -1, -1);
|
||||||
|
}
|
||||||
|
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
|
||||||
|
|
||||||
|
void mem_zero(device_memory &mem) override;
|
||||||
|
|
||||||
|
void mem_free(device_memory &mem) override;
|
||||||
|
|
||||||
|
device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
|
||||||
|
|
||||||
|
virtual void const_copy_to(const char *name, void *host, size_t size) override;
|
||||||
|
|
||||||
|
void global_alloc(device_memory &mem);
|
||||||
|
|
||||||
|
void global_free(device_memory &mem);
|
||||||
|
|
||||||
|
void tex_alloc(device_texture &mem);
|
||||||
|
|
||||||
|
void tex_alloc_as_buffer(device_texture &mem);
|
||||||
|
|
||||||
|
void tex_free(device_texture &mem);
|
||||||
|
|
||||||
|
void flush_delayed_free_list();
|
||||||
|
};
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
|
#endif
|
1008
intern/cycles/device/metal/device_impl.mm
Normal file
1008
intern/cycles/device/metal/device_impl.mm
Normal file
File diff suppressed because it is too large
Load Diff
168
intern/cycles/device/metal/kernel.h
Normal file
168
intern/cycles/device/metal/kernel.h
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
|
||||||
|
# include "device/kernel.h"
|
||||||
|
# include <Metal/Metal.h>
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
class MetalDevice;
|
||||||
|
|
||||||
|
enum {
|
||||||
|
METALRT_FUNC_DEFAULT_TRI,
|
||||||
|
METALRT_FUNC_DEFAULT_BOX,
|
||||||
|
METALRT_FUNC_SHADOW_TRI,
|
||||||
|
METALRT_FUNC_SHADOW_BOX,
|
||||||
|
METALRT_FUNC_LOCAL_TRI,
|
||||||
|
METALRT_FUNC_LOCAL_BOX,
|
||||||
|
METALRT_FUNC_CURVE_RIBBON,
|
||||||
|
METALRT_FUNC_CURVE_RIBBON_SHADOW,
|
||||||
|
METALRT_FUNC_CURVE_ALL,
|
||||||
|
METALRT_FUNC_CURVE_ALL_SHADOW,
|
||||||
|
METALRT_FUNC_NUM
|
||||||
|
};
|
||||||
|
|
||||||
|
enum { METALRT_TABLE_DEFAULT, METALRT_TABLE_SHADOW, METALRT_TABLE_LOCAL, METALRT_TABLE_NUM };
|
||||||
|
|
||||||
|
/* Pipeline State Object types */
|
||||||
|
enum {
|
||||||
|
/* A kernel that can be used with all scenes, supporting all features.
|
||||||
|
* It is slow to compile, but only needs to be compiled once and is then
|
||||||
|
* cached for future render sessions. This allows a render to get underway
|
||||||
|
* on the GPU quickly.
|
||||||
|
*/
|
||||||
|
PSO_GENERIC,
|
||||||
|
|
||||||
|
/* A kernel that is relatively quick to compile, but is specialized for the
|
||||||
|
* scene being rendered. It only contains the functionality and even baked in
|
||||||
|
* constants for values that means it needs to be recompiled whenever a
|
||||||
|
* dependent setting is changed. The render performance of this kernel is
|
||||||
|
* significantly faster though, and justifies the extra compile time.
|
||||||
|
*/
|
||||||
|
/* METAL_WIP: This isn't used and will require more changes to enable. */
|
||||||
|
PSO_SPECIALISED,
|
||||||
|
|
||||||
|
PSO_NUM
|
||||||
|
};
|
||||||
|
|
||||||
|
const char *kernel_type_as_string(int kernel_type);
|
||||||
|
|
||||||
|
struct MetalKernelPipeline {
|
||||||
|
void release()
|
||||||
|
{
|
||||||
|
if (pipeline) {
|
||||||
|
[pipeline release];
|
||||||
|
pipeline = nil;
|
||||||
|
if (@available(macOS 11.0, *)) {
|
||||||
|
for (int i = 0; i < METALRT_TABLE_NUM; i++) {
|
||||||
|
if (intersection_func_table[i]) {
|
||||||
|
[intersection_func_table[i] release];
|
||||||
|
intersection_func_table[i] = nil;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (function) {
|
||||||
|
[function release];
|
||||||
|
function = nil;
|
||||||
|
}
|
||||||
|
if (@available(macOS 11.0, *)) {
|
||||||
|
for (int i = 0; i < METALRT_TABLE_NUM; i++) {
|
||||||
|
if (intersection_func_table[i]) {
|
||||||
|
[intersection_func_table[i] release];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool loaded = false;
|
||||||
|
id<MTLFunction> function = nil;
|
||||||
|
id<MTLComputePipelineState> pipeline = nil;
|
||||||
|
|
||||||
|
API_AVAILABLE(macos(11.0))
|
||||||
|
id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MetalKernelLoadDesc {
|
||||||
|
int pso_index = 0;
|
||||||
|
const char *function_name = nullptr;
|
||||||
|
int kernel_index = 0;
|
||||||
|
int threads_per_threadgroup = 0;
|
||||||
|
MTLFunctionConstantValues *constant_values = nullptr;
|
||||||
|
NSArray *linked_functions = nullptr;
|
||||||
|
|
||||||
|
struct IntersectorFunctions {
|
||||||
|
NSArray *defaults;
|
||||||
|
NSArray *shadow;
|
||||||
|
NSArray *local;
|
||||||
|
NSArray *operator[](int index) const
|
||||||
|
{
|
||||||
|
if (index == METALRT_TABLE_DEFAULT)
|
||||||
|
return defaults;
|
||||||
|
if (index == METALRT_TABLE_SHADOW)
|
||||||
|
return shadow;
|
||||||
|
return local;
|
||||||
|
}
|
||||||
|
} intersector_functions = {nullptr};
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Metal kernel and associate occupancy information. */
|
||||||
|
class MetalDeviceKernel {
|
||||||
|
public:
|
||||||
|
~MetalDeviceKernel();
|
||||||
|
|
||||||
|
bool load(MetalDevice *device, MetalKernelLoadDesc const &desc, class MD5Hash const &md5);
|
||||||
|
|
||||||
|
void mark_loaded(int pso_index)
|
||||||
|
{
|
||||||
|
pso[pso_index].loaded = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_num_threads_per_block() const
|
||||||
|
{
|
||||||
|
return num_threads_per_block;
|
||||||
|
}
|
||||||
|
const MetalKernelPipeline &get_pso() const;
|
||||||
|
|
||||||
|
double load_duration = 0.0;
|
||||||
|
|
||||||
|
private:
|
||||||
|
MetalKernelPipeline pso[PSO_NUM];
|
||||||
|
|
||||||
|
int num_threads_per_block = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Cache of Metal kernels for each DeviceKernel. */
|
||||||
|
class MetalDeviceKernels {
|
||||||
|
public:
|
||||||
|
bool load(MetalDevice *device, int kernel_type);
|
||||||
|
bool available(DeviceKernel kernel) const;
|
||||||
|
const MetalDeviceKernel &get(DeviceKernel kernel) const;
|
||||||
|
|
||||||
|
MetalDeviceKernel kernels_[DEVICE_KERNEL_NUM];
|
||||||
|
|
||||||
|
id<MTLFunction> rt_intersection_funcs[PSO_NUM][METALRT_FUNC_NUM] = {{nil}};
|
||||||
|
|
||||||
|
string loaded_md5[PSO_NUM];
|
||||||
|
};
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
|
#endif /* WITH_METAL */
|
525
intern/cycles/device/metal/kernel.mm
Normal file
525
intern/cycles/device/metal/kernel.mm
Normal file
@@ -0,0 +1,525 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
|
||||||
|
# include "device/metal/kernel.h"
|
||||||
|
# include "device/metal/device_impl.h"
|
||||||
|
# include "util/md5.h"
|
||||||
|
# include "util/path.h"
|
||||||
|
# include "util/tbb.h"
|
||||||
|
# include "util/time.h"
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
/* limit to 2 MTLCompiler instances */
|
||||||
|
int max_mtlcompiler_threads = 2;
|
||||||
|
|
||||||
|
const char *kernel_type_as_string(int kernel_type)
|
||||||
|
{
|
||||||
|
switch (kernel_type) {
|
||||||
|
case PSO_GENERIC:
|
||||||
|
return "PSO_GENERIC";
|
||||||
|
case PSO_SPECIALISED:
|
||||||
|
return "PSO_SPECIALISED";
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
MetalDeviceKernel::~MetalDeviceKernel()
|
||||||
|
{
|
||||||
|
for (int i = 0; i < PSO_NUM; i++) {
|
||||||
|
pso[i].release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MetalDeviceKernel::load(MetalDevice *device,
|
||||||
|
MetalKernelLoadDesc const &desc_in,
|
||||||
|
MD5Hash const &md5)
|
||||||
|
{
|
||||||
|
__block MetalKernelLoadDesc const desc(desc_in);
|
||||||
|
if (desc.kernel_index == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||||
|
/* skip megakernel */
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool use_binary_archive = true;
|
||||||
|
if (getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
|
||||||
|
use_binary_archive = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
id<MTLBinaryArchive> archive = nil;
|
||||||
|
string metalbin_path;
|
||||||
|
if (use_binary_archive) {
|
||||||
|
NSProcessInfo *processInfo = [NSProcessInfo processInfo];
|
||||||
|
string osVersion = [[processInfo operatingSystemVersionString] UTF8String];
|
||||||
|
MD5Hash local_md5(md5);
|
||||||
|
local_md5.append(osVersion);
|
||||||
|
string metalbin_name = string(desc.function_name) + "." + local_md5.get_hex() +
|
||||||
|
to_string(desc.pso_index) + ".bin";
|
||||||
|
metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
|
||||||
|
path_create_directories(metalbin_path);
|
||||||
|
|
||||||
|
if (path_exists(metalbin_path) && use_binary_archive) {
|
||||||
|
if (@available(macOS 11.0, *)) {
|
||||||
|
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
|
||||||
|
archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
|
||||||
|
archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
|
||||||
|
[archiveDesc release];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
NSString *entryPoint = [@(desc.function_name) copy];
|
||||||
|
|
||||||
|
NSError *error = NULL;
|
||||||
|
if (@available(macOS 11.0, *)) {
|
||||||
|
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
|
||||||
|
func_desc.name = entryPoint;
|
||||||
|
if (desc.constant_values) {
|
||||||
|
func_desc.constantValues = desc.constant_values;
|
||||||
|
}
|
||||||
|
pso[desc.pso_index].function = [device->mtlLibrary[desc.pso_index]
|
||||||
|
newFunctionWithDescriptor:func_desc
|
||||||
|
error:&error];
|
||||||
|
}
|
||||||
|
[entryPoint release];
|
||||||
|
|
||||||
|
if (pso[desc.pso_index].function == nil) {
|
||||||
|
NSString *err = [error localizedDescription];
|
||||||
|
string errors = [err UTF8String];
|
||||||
|
|
||||||
|
device->set_error(
|
||||||
|
string_printf("Error getting function \"%s\": %s", desc.function_name, errors.c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
pso[desc.pso_index].function.label = [@(desc.function_name) copy];
|
||||||
|
|
||||||
|
__block MTLComputePipelineDescriptor *computePipelineStateDescriptor =
|
||||||
|
[[MTLComputePipelineDescriptor alloc] init];
|
||||||
|
|
||||||
|
computePipelineStateDescriptor.buffers[0].mutability = MTLMutabilityImmutable;
|
||||||
|
computePipelineStateDescriptor.buffers[1].mutability = MTLMutabilityImmutable;
|
||||||
|
computePipelineStateDescriptor.buffers[2].mutability = MTLMutabilityImmutable;
|
||||||
|
|
||||||
|
if (@available(macos 10.14, *)) {
|
||||||
|
computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = desc.threads_per_threadgroup;
|
||||||
|
}
|
||||||
|
computePipelineStateDescriptor.threadGroupSizeIsMultipleOfThreadExecutionWidth = true;
|
||||||
|
|
||||||
|
computePipelineStateDescriptor.computeFunction = pso[desc.pso_index].function;
|
||||||
|
if (@available(macOS 11.0, *)) {
|
||||||
|
/* Attach the additional functions to an MTLLinkedFunctions object */
|
||||||
|
if (desc.linked_functions) {
|
||||||
|
computePipelineStateDescriptor.linkedFunctions = [[MTLLinkedFunctions alloc] init];
|
||||||
|
computePipelineStateDescriptor.linkedFunctions.functions = desc.linked_functions;
|
||||||
|
}
|
||||||
|
|
||||||
|
computePipelineStateDescriptor.maxCallStackDepth = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create a new Compute pipeline state object */
|
||||||
|
MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;
|
||||||
|
|
||||||
|
bool creating_new_archive = false;
|
||||||
|
if (@available(macOS 11.0, *)) {
|
||||||
|
if (use_binary_archive) {
|
||||||
|
if (!archive) {
|
||||||
|
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
|
||||||
|
archiveDesc.url = nil;
|
||||||
|
archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
|
||||||
|
creating_new_archive = true;
|
||||||
|
|
||||||
|
double starttime = time_dt();
|
||||||
|
|
||||||
|
if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
|
||||||
|
error:&error]) {
|
||||||
|
NSString *errStr = [error localizedDescription];
|
||||||
|
metal_printf("Failed to add PSO to archive:\n%s\n",
|
||||||
|
errStr ? [errStr UTF8String] : "nil");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
double duration = time_dt() - starttime;
|
||||||
|
metal_printf("%2d | %-55s | %7.2fs\n",
|
||||||
|
desc.kernel_index,
|
||||||
|
device_kernel_as_string((DeviceKernel)desc.kernel_index),
|
||||||
|
duration);
|
||||||
|
|
||||||
|
if (desc.pso_index == PSO_GENERIC) {
|
||||||
|
this->load_duration = duration;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil];
|
||||||
|
pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double starttime = time_dt();
|
||||||
|
|
||||||
|
MTLNewComputePipelineStateWithReflectionCompletionHandler completionHandler = ^(
|
||||||
|
id<MTLComputePipelineState> computePipelineState,
|
||||||
|
MTLComputePipelineReflection *reflection,
|
||||||
|
NSError *error) {
|
||||||
|
bool recreate_archive = false;
|
||||||
|
if (computePipelineState == nil && archive && !creating_new_archive) {
|
||||||
|
|
||||||
|
assert(0);
|
||||||
|
|
||||||
|
NSString *errStr = [error localizedDescription];
|
||||||
|
metal_printf(
|
||||||
|
"Failed to create compute pipeline state \"%s\" from archive - attempting recreation... "
|
||||||
|
"(error: %s)\n",
|
||||||
|
device_kernel_as_string((DeviceKernel)desc.kernel_index),
|
||||||
|
errStr ? [errStr UTF8String] : "nil");
|
||||||
|
computePipelineState = [device->mtlDevice
|
||||||
|
newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
|
||||||
|
options:MTLPipelineOptionNone
|
||||||
|
reflection:nullptr
|
||||||
|
error:&error];
|
||||||
|
recreate_archive = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
double duration = time_dt() - starttime;
|
||||||
|
|
||||||
|
if (computePipelineState == nil) {
|
||||||
|
NSString *errStr = [error localizedDescription];
|
||||||
|
device->set_error(string_printf("Failed to create compute pipeline state \"%s\", error: \n",
|
||||||
|
device_kernel_as_string((DeviceKernel)desc.kernel_index)) +
|
||||||
|
(errStr ? [errStr UTF8String] : "nil"));
|
||||||
|
metal_printf("%2d | %-55s | %7.2fs | FAILED!\n",
|
||||||
|
desc.kernel_index,
|
||||||
|
device_kernel_as_string((DeviceKernel)desc.kernel_index),
|
||||||
|
duration);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pso[desc.pso_index].pipeline = computePipelineState;
|
||||||
|
num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup,
|
||||||
|
computePipelineState.threadExecutionWidth);
|
||||||
|
num_threads_per_block = std::max(num_threads_per_block,
|
||||||
|
(int)computePipelineState.threadExecutionWidth);
|
||||||
|
|
||||||
|
if (!use_binary_archive) {
|
||||||
|
metal_printf("%2d | %-55s | %7.2fs\n",
|
||||||
|
desc.kernel_index,
|
||||||
|
device_kernel_as_string((DeviceKernel)desc.kernel_index),
|
||||||
|
duration);
|
||||||
|
|
||||||
|
if (desc.pso_index == PSO_GENERIC) {
|
||||||
|
this->load_duration = duration;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (@available(macOS 11.0, *)) {
|
||||||
|
if (creating_new_archive || recreate_archive) {
|
||||||
|
if (![archive serializeToURL:[NSURL fileURLWithPath:@(metalbin_path.c_str())]
|
||||||
|
error:&error]) {
|
||||||
|
metal_printf("Failed to save binary archive, error:\n%s\n",
|
||||||
|
[[error localizedDescription] UTF8String]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[computePipelineStateDescriptor release];
|
||||||
|
computePipelineStateDescriptor = nil;
|
||||||
|
|
||||||
|
if (device->use_metalrt && desc.linked_functions) {
|
||||||
|
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
|
||||||
|
if (@available(macOS 11.0, *)) {
|
||||||
|
MTLIntersectionFunctionTableDescriptor *ift_desc =
|
||||||
|
[[MTLIntersectionFunctionTableDescriptor alloc] init];
|
||||||
|
ift_desc.functionCount = desc.intersector_functions[table].count;
|
||||||
|
|
||||||
|
pso[desc.pso_index].intersection_func_table[table] = [pso[desc.pso_index].pipeline
|
||||||
|
newIntersectionFunctionTableWithDescriptor:ift_desc];
|
||||||
|
|
||||||
|
/* Finally write the function handles into this pipeline's table */
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
id<MTLFunctionHandle> handle = [pso[desc.pso_index].pipeline
|
||||||
|
functionHandleWithFunction:desc.intersector_functions[table][i]];
|
||||||
|
[pso[desc.pso_index].intersection_func_table[table] setFunction:handle atIndex:i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mark_loaded(desc.pso_index);
|
||||||
|
};
|
||||||
|
|
||||||
|
if (desc.pso_index == PSO_SPECIALISED) {
|
||||||
|
/* Asynchronous load */
|
||||||
|
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
||||||
|
NSError *error;
|
||||||
|
id<MTLComputePipelineState> pipeline = [device->mtlDevice
|
||||||
|
newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
|
||||||
|
options:pipelineOptions
|
||||||
|
reflection:nullptr
|
||||||
|
error:&error];
|
||||||
|
completionHandler(pipeline, nullptr, error);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* Block on load to ensure we continue with a valid kernel function */
|
||||||
|
id<MTLComputePipelineState> pipeline = [device->mtlDevice
|
||||||
|
newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
|
||||||
|
options:pipelineOptions
|
||||||
|
reflection:nullptr
|
||||||
|
error:&error];
|
||||||
|
completionHandler(pipeline, nullptr, error);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const MetalKernelPipeline &MetalDeviceKernel::get_pso() const
|
||||||
|
{
|
||||||
|
if (pso[PSO_SPECIALISED].loaded) {
|
||||||
|
return pso[PSO_SPECIALISED];
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(pso[PSO_GENERIC].loaded);
|
||||||
|
return pso[PSO_GENERIC];
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type)
|
||||||
|
{
|
||||||
|
bool any_error = false;
|
||||||
|
|
||||||
|
MD5Hash md5;
|
||||||
|
|
||||||
|
/* Build the function constant table */
|
||||||
|
MTLFunctionConstantValues *constant_values = nullptr;
|
||||||
|
if (kernel_type == PSO_SPECIALISED) {
|
||||||
|
constant_values = [MTLFunctionConstantValues new];
|
||||||
|
|
||||||
|
# define KERNEL_FILM(_type, name) \
|
||||||
|
[constant_values setConstantValue:&data.film.name \
|
||||||
|
type:get_MTLDataType_##_type() \
|
||||||
|
atIndex:KernelData_film_##name]; \
|
||||||
|
md5.append((uint8_t *)&data.film.name, sizeof(data.film.name));
|
||||||
|
|
||||||
|
# define KERNEL_BACKGROUND(_type, name) \
|
||||||
|
[constant_values setConstantValue:&data.background.name \
|
||||||
|
type:get_MTLDataType_##_type() \
|
||||||
|
atIndex:KernelData_background_##name]; \
|
||||||
|
md5.append((uint8_t *)&data.background.name, sizeof(data.background.name));
|
||||||
|
|
||||||
|
# define KERNEL_INTEGRATOR(_type, name) \
|
||||||
|
[constant_values setConstantValue:&data.integrator.name \
|
||||||
|
type:get_MTLDataType_##_type() \
|
||||||
|
atIndex:KernelData_integrator_##name]; \
|
||||||
|
md5.append((uint8_t *)&data.integrator.name, sizeof(data.integrator.name));
|
||||||
|
|
||||||
|
# define KERNEL_BVH(_type, name) \
|
||||||
|
[constant_values setConstantValue:&data.bvh.name \
|
||||||
|
type:get_MTLDataType_##_type() \
|
||||||
|
atIndex:KernelData_bvh_##name]; \
|
||||||
|
md5.append((uint8_t *)&data.bvh.name, sizeof(data.bvh.name));
|
||||||
|
|
||||||
|
/* METAL_WIP: populate constant_values based on KernelData */
|
||||||
|
assert(0);
|
||||||
|
/*
|
||||||
|
const KernelData &data = device->launch_params.data;
|
||||||
|
# include "kernel/types/background.h"
|
||||||
|
# include "kernel/types/bvh.h"
|
||||||
|
# include "kernel/types/film.h"
|
||||||
|
# include "kernel/types/integrator.h"
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
if (device->use_metalrt) {
|
||||||
|
if (@available(macOS 11.0, *)) {
|
||||||
|
/* create the id<MTLFunction> for each intersection function */
|
||||||
|
const char *function_names[] = {
|
||||||
|
"__anyhit__cycles_metalrt_visibility_test_tri",
|
||||||
|
"__anyhit__cycles_metalrt_visibility_test_box",
|
||||||
|
"__anyhit__cycles_metalrt_shadow_all_hit_tri",
|
||||||
|
"__anyhit__cycles_metalrt_shadow_all_hit_box",
|
||||||
|
"__anyhit__cycles_metalrt_local_hit_tri",
|
||||||
|
"__anyhit__cycles_metalrt_local_hit_box",
|
||||||
|
"__intersection__curve_ribbon",
|
||||||
|
"__intersection__curve_ribbon_shadow",
|
||||||
|
"__intersection__curve_all",
|
||||||
|
"__intersection__curve_all_shadow",
|
||||||
|
};
|
||||||
|
assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM);
|
||||||
|
|
||||||
|
MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
|
||||||
|
if (kernel_type == PSO_SPECIALISED) {
|
||||||
|
desc.constantValues = constant_values;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < METALRT_FUNC_NUM; i++) {
|
||||||
|
const char *function_name = function_names[i];
|
||||||
|
desc.name = [@(function_name) copy];
|
||||||
|
|
||||||
|
NSError *error = NULL;
|
||||||
|
rt_intersection_funcs[kernel_type][i] = [device->mtlLibrary[kernel_type]
|
||||||
|
newFunctionWithDescriptor:desc
|
||||||
|
error:&error];
|
||||||
|
|
||||||
|
if (rt_intersection_funcs[kernel_type][i] == nil) {
|
||||||
|
NSString *err = [error localizedDescription];
|
||||||
|
string errors = [err UTF8String];
|
||||||
|
|
||||||
|
device->set_error(string_printf(
|
||||||
|
"Error getting intersection function \"%s\": %s", function_name, errors.c_str()));
|
||||||
|
any_error = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_intersection_funcs[kernel_type][i].label = [@(function_name) copy];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
md5.append(device->source_used_for_compile[kernel_type]);
|
||||||
|
|
||||||
|
string hash = md5.get_hex();
|
||||||
|
if (loaded_md5[kernel_type] == hash) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!any_error) {
|
||||||
|
NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
|
||||||
|
NSArray *function_list = nil;
|
||||||
|
|
||||||
|
if (device->use_metalrt) {
|
||||||
|
id<MTLFunction> box_intersect_default = nil;
|
||||||
|
id<MTLFunction> box_intersect_shadow = nil;
|
||||||
|
if (device->kernel_features & KERNEL_FEATURE_HAIR) {
|
||||||
|
/* Add curve intersection programs. */
|
||||||
|
if (device->kernel_features & KERNEL_FEATURE_HAIR_THICK) {
|
||||||
|
/* Slower programs for thick hair since that also slows down ribbons.
|
||||||
|
* Ideally this should not be needed. */
|
||||||
|
box_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL];
|
||||||
|
box_intersect_shadow = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL_SHADOW];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
box_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON];
|
||||||
|
box_intersect_shadow =
|
||||||
|
rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON_SHADOW];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
table_functions[METALRT_TABLE_DEFAULT] = [NSArray
|
||||||
|
arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_TRI],
|
||||||
|
box_intersect_default ?
|
||||||
|
box_intersect_default :
|
||||||
|
rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX],
|
||||||
|
nil];
|
||||||
|
table_functions[METALRT_TABLE_SHADOW] = [NSArray
|
||||||
|
arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_TRI],
|
||||||
|
box_intersect_shadow ?
|
||||||
|
box_intersect_shadow :
|
||||||
|
rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX],
|
||||||
|
nil];
|
||||||
|
table_functions[METALRT_TABLE_LOCAL] = [NSArray
|
||||||
|
arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_TRI],
|
||||||
|
rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX],
|
||||||
|
nil];
|
||||||
|
|
||||||
|
NSMutableSet *unique_functions = [NSMutableSet
|
||||||
|
setWithArray:table_functions[METALRT_TABLE_DEFAULT]];
|
||||||
|
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]];
|
||||||
|
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]];
|
||||||
|
|
||||||
|
function_list = [[NSArray arrayWithArray:[unique_functions allObjects]]
|
||||||
|
sortedArrayUsingComparator:^NSComparisonResult(id<MTLFunction> f1, id<MTLFunction> f2) {
|
||||||
|
return [f1.label compare:f2.label];
|
||||||
|
}];
|
||||||
|
|
||||||
|
unique_functions = nil;
|
||||||
|
}
|
||||||
|
|
||||||
|
metal_printf("Starting %s \"cycles_metal_...\" pipeline builds\n",
|
||||||
|
kernel_type_as_string(kernel_type));
|
||||||
|
|
||||||
|
tbb::task_arena local_arena(max_mtlcompiler_threads);
|
||||||
|
local_arena.execute([&]() {
|
||||||
|
tbb::parallel_for(int(0), int(DEVICE_KERNEL_NUM), [&](int i) {
|
||||||
|
/* skip megakernel */
|
||||||
|
if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Only specialize kernels where it can make an impact. */
|
||||||
|
if (kernel_type == PSO_SPECIALISED) {
|
||||||
|
if (i < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||||
|
i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MetalDeviceKernel &kernel = kernels_[i];
|
||||||
|
|
||||||
|
const std::string function_name = std::string("cycles_metal_") +
|
||||||
|
device_kernel_as_string((DeviceKernel)i);
|
||||||
|
int threads_per_threadgroup = device->max_threads_per_threadgroup;
|
||||||
|
if (i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL && i < DEVICE_KERNEL_INTEGRATOR_RESET) {
|
||||||
|
/* Always use 512 for the sorting kernels */
|
||||||
|
threads_per_threadgroup = 512;
|
||||||
|
}
|
||||||
|
|
||||||
|
NSArray *kernel_function_list = nil;
|
||||||
|
|
||||||
|
if (i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||||
|
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
|
||||||
|
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
|
||||||
|
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
|
||||||
|
i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
||||||
|
kernel_function_list = function_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
MetalKernelLoadDesc desc;
|
||||||
|
desc.pso_index = kernel_type;
|
||||||
|
desc.kernel_index = i;
|
||||||
|
desc.linked_functions = kernel_function_list;
|
||||||
|
desc.intersector_functions.defaults = table_functions[METALRT_TABLE_DEFAULT];
|
||||||
|
desc.intersector_functions.shadow = table_functions[METALRT_TABLE_SHADOW];
|
||||||
|
desc.intersector_functions.local = table_functions[METALRT_TABLE_LOCAL];
|
||||||
|
desc.constant_values = constant_values;
|
||||||
|
desc.threads_per_threadgroup = threads_per_threadgroup;
|
||||||
|
desc.function_name = function_name.c_str();
|
||||||
|
|
||||||
|
bool success = kernel.load(device, desc, md5);
|
||||||
|
|
||||||
|
any_error |= !success;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
bool loaded = !any_error;
|
||||||
|
if (loaded) {
|
||||||
|
loaded_md5[kernel_type] = hash;
|
||||||
|
}
|
||||||
|
return loaded;
|
||||||
|
}
|
||||||
|
|
||||||
|
const MetalDeviceKernel &MetalDeviceKernels::get(DeviceKernel kernel) const
|
||||||
|
{
|
||||||
|
return kernels_[(int)kernel];
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MetalDeviceKernels::available(DeviceKernel kernel) const
|
||||||
|
{
|
||||||
|
return kernels_[(int)kernel].get_pso().function != nil;
|
||||||
|
}
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
|
#endif /* WITH_METAL*/
|
99
intern/cycles/device/metal/queue.h
Normal file
99
intern/cycles/device/metal/queue.h
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
|
||||||
|
# include "device/kernel.h"
|
||||||
|
# include "device/memory.h"
|
||||||
|
# include "device/queue.h"
|
||||||
|
|
||||||
|
# include "device/metal/util.h"
|
||||||
|
# include "kernel/device/metal/globals.h"
|
||||||
|
|
||||||
|
# define metal_printf VLOG(4) << string_printf
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
class MetalDevice;
|
||||||
|
|
||||||
|
/* Base class for Metal queues. */
|
||||||
|
class MetalDeviceQueue : public DeviceQueue {
|
||||||
|
public:
|
||||||
|
MetalDeviceQueue(MetalDevice *device);
|
||||||
|
~MetalDeviceQueue();
|
||||||
|
|
||||||
|
virtual int num_concurrent_states(const size_t) const override;
|
||||||
|
virtual int num_concurrent_busy_states() const override;
|
||||||
|
|
||||||
|
virtual void init_execution() override;
|
||||||
|
|
||||||
|
virtual bool enqueue(DeviceKernel kernel,
|
||||||
|
const int work_size,
|
||||||
|
DeviceKernelArguments const &args) override;
|
||||||
|
|
||||||
|
virtual bool synchronize() override;
|
||||||
|
|
||||||
|
virtual void zero_to_device(device_memory &mem) override;
|
||||||
|
virtual void copy_to_device(device_memory &mem) override;
|
||||||
|
virtual void copy_from_device(device_memory &mem) override;
|
||||||
|
|
||||||
|
virtual bool kernel_available(DeviceKernel kernel) const override;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void prepare_resources(DeviceKernel kernel);
|
||||||
|
|
||||||
|
id<MTLComputeCommandEncoder> get_compute_encoder(DeviceKernel kernel);
|
||||||
|
id<MTLBlitCommandEncoder> get_blit_encoder();
|
||||||
|
|
||||||
|
MetalDevice *metal_device;
|
||||||
|
MetalBufferPool temp_buffer_pool;
|
||||||
|
|
||||||
|
API_AVAILABLE(macos(11.0), ios(14.0))
|
||||||
|
MTLCommandBufferDescriptor *command_buffer_desc = nullptr;
|
||||||
|
id<MTLDevice> mtlDevice = nil;
|
||||||
|
id<MTLCommandQueue> mtlCommandQueue = nil;
|
||||||
|
id<MTLCommandBuffer> mtlCommandBuffer = nil;
|
||||||
|
id<MTLComputeCommandEncoder> mtlComputeEncoder = nil;
|
||||||
|
id<MTLBlitCommandEncoder> mtlBlitEncoder = nil;
|
||||||
|
API_AVAILABLE(macos(10.14), ios(14.0))
|
||||||
|
id<MTLSharedEvent> shared_event = nil;
|
||||||
|
API_AVAILABLE(macos(10.14), ios(14.0))
|
||||||
|
MTLSharedEventListener *shared_event_listener = nil;
|
||||||
|
|
||||||
|
dispatch_queue_t event_queue;
|
||||||
|
dispatch_semaphore_t wait_semaphore;
|
||||||
|
|
||||||
|
struct CopyBack {
|
||||||
|
void *host_pointer;
|
||||||
|
void *gpu_mem;
|
||||||
|
uint64_t size;
|
||||||
|
};
|
||||||
|
std::vector<CopyBack> copy_back_mem;
|
||||||
|
|
||||||
|
uint64_t shared_event_id;
|
||||||
|
uint64_t command_buffers_submitted = 0;
|
||||||
|
uint64_t command_buffers_completed = 0;
|
||||||
|
Stats &stats;
|
||||||
|
|
||||||
|
void close_compute_encoder();
|
||||||
|
void close_blit_encoder();
|
||||||
|
};
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
|
#endif /* WITH_METAL */
|
610
intern/cycles/device/metal/queue.mm
Normal file
610
intern/cycles/device/metal/queue.mm
Normal file
@@ -0,0 +1,610 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
|
||||||
|
# include "device/metal/queue.h"
|
||||||
|
|
||||||
|
# include "device/metal/device_impl.h"
|
||||||
|
# include "device/metal/kernel.h"
|
||||||
|
|
||||||
|
# include "util/path.h"
|
||||||
|
# include "util/string.h"
|
||||||
|
# include "util/time.h"
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
/* MetalDeviceQueue */
|
||||||
|
|
||||||
|
MetalDeviceQueue::MetalDeviceQueue(MetalDevice *device)
|
||||||
|
: DeviceQueue(device), metal_device(device), stats(device->stats)
|
||||||
|
{
|
||||||
|
if (@available(macos 11.0, *)) {
|
||||||
|
command_buffer_desc = [[MTLCommandBufferDescriptor alloc] init];
|
||||||
|
command_buffer_desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
|
||||||
|
}
|
||||||
|
|
||||||
|
mtlDevice = device->mtlDevice;
|
||||||
|
mtlCommandQueue = [mtlDevice newCommandQueue];
|
||||||
|
|
||||||
|
if (@available(macos 10.14, *)) {
|
||||||
|
shared_event = [mtlDevice newSharedEvent];
|
||||||
|
shared_event_id = 1;
|
||||||
|
|
||||||
|
/* Shareable event listener */
|
||||||
|
event_queue = dispatch_queue_create("com.cycles.metal.event_queue", NULL);
|
||||||
|
shared_event_listener = [[MTLSharedEventListener alloc] initWithDispatchQueue:event_queue];
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_semaphore = dispatch_semaphore_create(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
MetalDeviceQueue::~MetalDeviceQueue()
|
||||||
|
{
|
||||||
|
/* Tidying up here isn't really practical - we should expect and require the work
|
||||||
|
* queue to be empty here. */
|
||||||
|
assert(mtlCommandBuffer == nil);
|
||||||
|
assert(command_buffers_submitted == command_buffers_completed);
|
||||||
|
|
||||||
|
if (@available(macos 10.14, *)) {
|
||||||
|
[shared_event_listener release];
|
||||||
|
[shared_event release];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (@available(macos 11.0, *)) {
|
||||||
|
[command_buffer_desc release];
|
||||||
|
}
|
||||||
|
if (mtlCommandQueue) {
|
||||||
|
[mtlCommandQueue release];
|
||||||
|
mtlCommandQueue = nil;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int MetalDeviceQueue::num_concurrent_states(const size_t /*state_size*/) const
|
||||||
|
{
|
||||||
|
/* METAL_WIP */
|
||||||
|
/* TODO: compute automatically. */
|
||||||
|
/* TODO: must have at least num_threads_per_block. */
|
||||||
|
int result = 1048576;
|
||||||
|
if (metal_device->device_vendor == METAL_GPU_AMD) {
|
||||||
|
result *= 2;
|
||||||
|
}
|
||||||
|
else if (metal_device->device_vendor == METAL_GPU_APPLE) {
|
||||||
|
result *= 4;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
int MetalDeviceQueue::num_concurrent_busy_states() const
|
||||||
|
{
|
||||||
|
/* METAL_WIP */
|
||||||
|
/* TODO: compute automatically. */
|
||||||
|
int result = 65536;
|
||||||
|
if (metal_device->device_vendor == METAL_GPU_AMD) {
|
||||||
|
result *= 2;
|
||||||
|
}
|
||||||
|
else if (metal_device->device_vendor == METAL_GPU_APPLE) {
|
||||||
|
result *= 4;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetalDeviceQueue::init_execution()
|
||||||
|
{
|
||||||
|
/* Synchronize all textures and memory copies before executing task. */
|
||||||
|
metal_device->load_texture_info();
|
||||||
|
|
||||||
|
synchronize();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||||
|
const int work_size,
|
||||||
|
DeviceKernelArguments const &args)
|
||||||
|
{
|
||||||
|
if (metal_device->have_error()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
VLOG(3) << "Metal queue launch " << device_kernel_as_string(kernel) << ", work_size "
|
||||||
|
<< work_size;
|
||||||
|
|
||||||
|
const MetalDeviceKernel &metal_kernel = metal_device->kernels.get(kernel);
|
||||||
|
const MetalKernelPipeline &metal_kernel_pso = metal_kernel.get_pso();
|
||||||
|
|
||||||
|
id<MTLComputeCommandEncoder> mtlComputeCommandEncoder = get_compute_encoder(kernel);
|
||||||
|
|
||||||
|
/* Determine size requirement for argument buffer. */
|
||||||
|
size_t arg_buffer_length = 0;
|
||||||
|
for (size_t i = 0; i < args.count; i++) {
|
||||||
|
size_t size_in_bytes = args.sizes[i];
|
||||||
|
arg_buffer_length = round_up(arg_buffer_length, size_in_bytes) + size_in_bytes;
|
||||||
|
}
|
||||||
|
/* 256 is the Metal offset alignment for constant address space bindings */
|
||||||
|
arg_buffer_length = round_up(arg_buffer_length, 256);
|
||||||
|
|
||||||
|
/* Globals placed after "vanilla" arguments. */
|
||||||
|
size_t globals_offsets = arg_buffer_length;
|
||||||
|
arg_buffer_length += sizeof(KernelParamsMetal);
|
||||||
|
arg_buffer_length = round_up(arg_buffer_length, 256);
|
||||||
|
|
||||||
|
/* Metal ancillary bindless pointers. */
|
||||||
|
size_t metal_offsets = arg_buffer_length;
|
||||||
|
arg_buffer_length += metal_device->mtlAncillaryArgEncoder.encodedLength;
|
||||||
|
arg_buffer_length = round_up(arg_buffer_length, metal_device->mtlAncillaryArgEncoder.alignment);
|
||||||
|
|
||||||
|
/* Temporary buffer used to prepare arg_buffer */
|
||||||
|
uint8_t *init_arg_buffer = (uint8_t *)alloca(arg_buffer_length);
|
||||||
|
memset(init_arg_buffer, 0, arg_buffer_length);
|
||||||
|
|
||||||
|
/* Prepare the non-pointer "enqueue" arguments */
|
||||||
|
size_t bytes_written = 0;
|
||||||
|
for (size_t i = 0; i < args.count; i++) {
|
||||||
|
size_t size_in_bytes = args.sizes[i];
|
||||||
|
bytes_written = round_up(bytes_written, size_in_bytes);
|
||||||
|
if (args.types[i] != DeviceKernelArguments::POINTER) {
|
||||||
|
memcpy(init_arg_buffer + bytes_written, args.values[i], size_in_bytes);
|
||||||
|
}
|
||||||
|
bytes_written += size_in_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Prepare any non-pointer (i.e. plain-old-data) KernelParamsMetal data */
|
||||||
|
/* The plain-old-data is contiguous, continuing to the end of KernelParamsMetal */
|
||||||
|
size_t plain_old_launch_data_offset = offsetof(KernelParamsMetal, __integrator_state) +
|
||||||
|
sizeof(IntegratorStateGPU);
|
||||||
|
size_t plain_old_launch_data_size = sizeof(KernelParamsMetal) - plain_old_launch_data_offset;
|
||||||
|
memcpy(init_arg_buffer + globals_offsets + plain_old_launch_data_offset,
|
||||||
|
(uint8_t *)&metal_device->launch_params + plain_old_launch_data_offset,
|
||||||
|
plain_old_launch_data_size);
|
||||||
|
|
||||||
|
/* Allocate an argument buffer. */
|
||||||
|
MTLResourceOptions arg_buffer_options = MTLResourceStorageModeManaged;
|
||||||
|
if (@available(macOS 11.0, *)) {
|
||||||
|
if ([mtlDevice hasUnifiedMemory]) {
|
||||||
|
arg_buffer_options = MTLResourceStorageModeShared;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
id<MTLBuffer> arg_buffer = temp_buffer_pool.get_buffer(
|
||||||
|
mtlDevice, mtlCommandBuffer, arg_buffer_length, arg_buffer_options, init_arg_buffer, stats);
|
||||||
|
|
||||||
|
/* Encode the pointer "enqueue" arguments */
|
||||||
|
bytes_written = 0;
|
||||||
|
for (size_t i = 0; i < args.count; i++) {
|
||||||
|
size_t size_in_bytes = args.sizes[i];
|
||||||
|
bytes_written = round_up(bytes_written, size_in_bytes);
|
||||||
|
if (args.types[i] == DeviceKernelArguments::POINTER) {
|
||||||
|
[metal_device->mtlBufferKernelParamsEncoder setArgumentBuffer:arg_buffer
|
||||||
|
offset:bytes_written];
|
||||||
|
if (MetalDevice::MetalMem *mmem = *(MetalDevice::MetalMem **)args.values[i]) {
|
||||||
|
[mtlComputeCommandEncoder useResource:mmem->mtlBuffer
|
||||||
|
usage:MTLResourceUsageRead | MTLResourceUsageWrite];
|
||||||
|
[metal_device->mtlBufferKernelParamsEncoder setBuffer:mmem->mtlBuffer offset:0 atIndex:0];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (@available(macos 12.0, *)) {
|
||||||
|
[metal_device->mtlBufferKernelParamsEncoder setBuffer:nil offset:0 atIndex:0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bytes_written += size_in_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Encode KernelParamsMetal buffers */
|
||||||
|
[metal_device->mtlBufferKernelParamsEncoder setArgumentBuffer:arg_buffer offset:globals_offsets];
|
||||||
|
|
||||||
|
/* this relies on IntegratorStateGPU layout being contiguous device_ptrs */
|
||||||
|
const size_t pointer_block_end = offsetof(KernelParamsMetal, __integrator_state) +
|
||||||
|
sizeof(IntegratorStateGPU);
|
||||||
|
for (size_t offset = 0; offset < pointer_block_end; offset += sizeof(device_ptr)) {
|
||||||
|
int pointer_index = offset / sizeof(device_ptr);
|
||||||
|
MetalDevice::MetalMem *mmem = *(
|
||||||
|
MetalDevice::MetalMem **)((uint8_t *)&metal_device->launch_params + offset);
|
||||||
|
if (mmem && (mmem->mtlBuffer || mmem->mtlTexture)) {
|
||||||
|
[metal_device->mtlBufferKernelParamsEncoder setBuffer:mmem->mtlBuffer
|
||||||
|
offset:0
|
||||||
|
atIndex:pointer_index];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (@available(macos 12.0, *)) {
|
||||||
|
[metal_device->mtlBufferKernelParamsEncoder setBuffer:nil offset:0 atIndex:pointer_index];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bytes_written = globals_offsets + sizeof(KernelParamsMetal);
|
||||||
|
|
||||||
|
/* Encode ancillaries */
|
||||||
|
[metal_device->mtlAncillaryArgEncoder setArgumentBuffer:arg_buffer offset:metal_offsets];
|
||||||
|
[metal_device->mtlAncillaryArgEncoder setBuffer:metal_device->texture_bindings_2d
|
||||||
|
offset:0
|
||||||
|
atIndex:0];
|
||||||
|
[metal_device->mtlAncillaryArgEncoder setBuffer:metal_device->texture_bindings_3d
|
||||||
|
offset:0
|
||||||
|
atIndex:1];
|
||||||
|
if (@available(macos 12.0, *)) {
|
||||||
|
if (metal_device->use_metalrt) {
|
||||||
|
if (metal_device->bvhMetalRT) {
|
||||||
|
id<MTLAccelerationStructure> accel_struct = metal_device->bvhMetalRT->accel_struct;
|
||||||
|
[metal_device->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:2];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
|
||||||
|
if (metal_kernel_pso.intersection_func_table[table]) {
|
||||||
|
[metal_kernel_pso.intersection_func_table[table] setBuffer:arg_buffer
|
||||||
|
offset:globals_offsets
|
||||||
|
atIndex:1];
|
||||||
|
[metal_device->mtlAncillaryArgEncoder
|
||||||
|
setIntersectionFunctionTable:metal_kernel_pso.intersection_func_table[table]
|
||||||
|
atIndex:3 + table];
|
||||||
|
[mtlComputeCommandEncoder useResource:metal_kernel_pso.intersection_func_table[table]
|
||||||
|
usage:MTLResourceUsageRead];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
[metal_device->mtlAncillaryArgEncoder setIntersectionFunctionTable:nil
|
||||||
|
atIndex:3 + table];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bytes_written = metal_offsets + metal_device->mtlAncillaryArgEncoder.encodedLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (arg_buffer.storageMode == MTLStorageModeManaged) {
|
||||||
|
[arg_buffer didModifyRange:NSMakeRange(0, bytes_written)];
|
||||||
|
}
|
||||||
|
|
||||||
|
[mtlComputeCommandEncoder setBuffer:arg_buffer offset:0 atIndex:0];
|
||||||
|
[mtlComputeCommandEncoder setBuffer:arg_buffer offset:globals_offsets atIndex:1];
|
||||||
|
[mtlComputeCommandEncoder setBuffer:arg_buffer offset:metal_offsets atIndex:2];
|
||||||
|
|
||||||
|
if (metal_device->use_metalrt) {
|
||||||
|
if (@available(macos 12.0, *)) {
|
||||||
|
|
||||||
|
auto bvhMetalRT = metal_device->bvhMetalRT;
|
||||||
|
switch (kernel) {
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
bvhMetalRT = nil;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bvhMetalRT) {
|
||||||
|
/* Mark all Accelerations resources as used */
|
||||||
|
[mtlComputeCommandEncoder useResource:bvhMetalRT->accel_struct usage:MTLResourceUsageRead];
|
||||||
|
[mtlComputeCommandEncoder useResources:bvhMetalRT->blas_array.data()
|
||||||
|
count:bvhMetalRT->blas_array.size()
|
||||||
|
usage:MTLResourceUsageRead];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso.pipeline];
|
||||||
|
|
||||||
|
/* Compute kernel launch parameters. */
|
||||||
|
const int num_threads_per_block = metal_kernel.get_num_threads_per_block();
|
||||||
|
|
||||||
|
int shared_mem_bytes = 0;
|
||||||
|
|
||||||
|
switch (kernel) {
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY:
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY:
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY:
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY:
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY:
|
||||||
|
case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY:
|
||||||
|
/* See parallel_active_index.h for why this amount of shared memory is needed.
|
||||||
|
* Rounded up to 16 bytes for Metal */
|
||||||
|
shared_mem_bytes = round_up((num_threads_per_block + 1) * sizeof(int), 16);
|
||||||
|
[mtlComputeCommandEncoder setThreadgroupMemoryLength:shared_mem_bytes atIndex:0];
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
MTLSize size_threadgroups_per_dispatch = MTLSizeMake(
|
||||||
|
divide_up(work_size, num_threads_per_block), 1, 1);
|
||||||
|
MTLSize size_threads_per_threadgroup = MTLSizeMake(num_threads_per_block, 1, 1);
|
||||||
|
[mtlComputeCommandEncoder dispatchThreadgroups:size_threadgroups_per_dispatch
|
||||||
|
threadsPerThreadgroup:size_threads_per_threadgroup];
|
||||||
|
|
||||||
|
[mtlCommandBuffer addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
|
||||||
|
NSString *kernel_name = metal_kernel_pso.function.label;
|
||||||
|
|
||||||
|
/* Enhanced command buffer errors are only available in 11.0+ */
|
||||||
|
if (@available(macos 11.0, *)) {
|
||||||
|
if (command_buffer.status == MTLCommandBufferStatusError && command_buffer.error != nil) {
|
||||||
|
printf("CommandBuffer Failed: %s\n", [kernel_name UTF8String]);
|
||||||
|
NSArray<id<MTLCommandBufferEncoderInfo>> *encoderInfos = [command_buffer.error.userInfo
|
||||||
|
valueForKey:MTLCommandBufferEncoderInfoErrorKey];
|
||||||
|
if (encoderInfos != nil) {
|
||||||
|
for (id<MTLCommandBufferEncoderInfo> encoderInfo : encoderInfos) {
|
||||||
|
NSLog(@"%@", encoderInfo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
id<MTLLogContainer> logs = command_buffer.logs;
|
||||||
|
for (id<MTLFunctionLog> log in logs) {
|
||||||
|
NSLog(@"%@", log);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (command_buffer.error) {
|
||||||
|
printf("CommandBuffer Failed: %s\n", [kernel_name UTF8String]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}];
|
||||||
|
|
||||||
|
return !(metal_device->have_error());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MetalDeviceQueue::synchronize()
|
||||||
|
{
|
||||||
|
if (metal_device->have_error()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mtlComputeEncoder) {
|
||||||
|
close_compute_encoder();
|
||||||
|
}
|
||||||
|
close_blit_encoder();
|
||||||
|
|
||||||
|
if (mtlCommandBuffer) {
|
||||||
|
uint64_t shared_event_id = this->shared_event_id++;
|
||||||
|
|
||||||
|
if (@available(macos 10.14, *)) {
|
||||||
|
__block dispatch_semaphore_t block_sema = wait_semaphore;
|
||||||
|
[shared_event notifyListener:shared_event_listener
|
||||||
|
atValue:shared_event_id
|
||||||
|
block:^(id<MTLSharedEvent> sharedEvent, uint64_t value) {
|
||||||
|
dispatch_semaphore_signal(block_sema);
|
||||||
|
}];
|
||||||
|
|
||||||
|
[mtlCommandBuffer encodeSignalEvent:shared_event value:shared_event_id];
|
||||||
|
[mtlCommandBuffer commit];
|
||||||
|
dispatch_semaphore_wait(wait_semaphore, DISPATCH_TIME_FOREVER);
|
||||||
|
}
|
||||||
|
|
||||||
|
[mtlCommandBuffer release];
|
||||||
|
|
||||||
|
for (const CopyBack &mmem : copy_back_mem) {
|
||||||
|
memcpy((uchar *)mmem.host_pointer, (uchar *)mmem.gpu_mem, mmem.size);
|
||||||
|
}
|
||||||
|
copy_back_mem.clear();
|
||||||
|
|
||||||
|
temp_buffer_pool.process_command_buffer_completion(mtlCommandBuffer);
|
||||||
|
metal_device->flush_delayed_free_list();
|
||||||
|
|
||||||
|
mtlCommandBuffer = nil;
|
||||||
|
}
|
||||||
|
|
||||||
|
return !(metal_device->have_error());
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetalDeviceQueue::zero_to_device(device_memory &mem)
|
||||||
|
{
|
||||||
|
assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
|
||||||
|
|
||||||
|
if (mem.memory_size() == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocate on demand. */
|
||||||
|
if (mem.device_pointer == 0) {
|
||||||
|
metal_device->mem_alloc(mem);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Zero memory on device. */
|
||||||
|
assert(mem.device_pointer != 0);
|
||||||
|
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
|
||||||
|
MetalDevice::MetalMem &mmem = *metal_device->metal_mem_map.at(&mem);
|
||||||
|
if (mmem.mtlBuffer) {
|
||||||
|
id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
|
||||||
|
[blitEncoder fillBuffer:mmem.mtlBuffer range:NSMakeRange(mmem.offset, mmem.size) value:0];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
metal_device->mem_zero(mem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetalDeviceQueue::copy_to_device(device_memory &mem)
|
||||||
|
{
|
||||||
|
if (mem.memory_size() == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocate on demand. */
|
||||||
|
if (mem.device_pointer == 0) {
|
||||||
|
metal_device->mem_alloc(mem);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(mem.device_pointer != 0);
|
||||||
|
assert(mem.host_pointer != nullptr);
|
||||||
|
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
|
||||||
|
auto result = metal_device->metal_mem_map.find(&mem);
|
||||||
|
if (result != metal_device->metal_mem_map.end()) {
|
||||||
|
if (mem.host_pointer == mem.shared_pointer) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
MetalDevice::MetalMem &mmem = *result->second;
|
||||||
|
id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
|
||||||
|
|
||||||
|
id<MTLBuffer> buffer = temp_buffer_pool.get_buffer(mtlDevice,
|
||||||
|
mtlCommandBuffer,
|
||||||
|
mmem.size,
|
||||||
|
MTLResourceStorageModeShared,
|
||||||
|
mem.host_pointer,
|
||||||
|
stats);
|
||||||
|
|
||||||
|
[blitEncoder copyFromBuffer:buffer
|
||||||
|
sourceOffset:0
|
||||||
|
toBuffer:mmem.mtlBuffer
|
||||||
|
destinationOffset:mmem.offset
|
||||||
|
size:mmem.size];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
metal_device->mem_copy_to(mem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetalDeviceQueue::copy_from_device(device_memory &mem)
|
||||||
|
{
|
||||||
|
assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
|
||||||
|
|
||||||
|
if (mem.memory_size() == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(mem.device_pointer != 0);
|
||||||
|
assert(mem.host_pointer != nullptr);
|
||||||
|
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
|
||||||
|
MetalDevice::MetalMem &mmem = *metal_device->metal_mem_map.at(&mem);
|
||||||
|
if (mmem.mtlBuffer) {
|
||||||
|
const size_t size = mem.memory_size();
|
||||||
|
|
||||||
|
if (mem.device_pointer) {
|
||||||
|
if ([mmem.mtlBuffer storageMode] == MTLStorageModeManaged) {
|
||||||
|
id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
|
||||||
|
[blitEncoder synchronizeResource:mmem.mtlBuffer];
|
||||||
|
}
|
||||||
|
if (mem.host_pointer != mmem.hostPtr) {
|
||||||
|
if (mtlCommandBuffer) {
|
||||||
|
copy_back_mem.push_back({mem.host_pointer, mmem.hostPtr, size});
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
memcpy((uchar *)mem.host_pointer, (uchar *)mmem.hostPtr, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
memset((char *)mem.host_pointer, 0, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
metal_device->mem_copy_from(mem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MetalDeviceQueue::kernel_available(DeviceKernel kernel) const
|
||||||
|
{
|
||||||
|
return metal_device->kernels.available(kernel);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetalDeviceQueue::prepare_resources(DeviceKernel kernel)
|
||||||
|
{
|
||||||
|
std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
|
||||||
|
|
||||||
|
/* declare resource usage */
|
||||||
|
for (auto &it : metal_device->metal_mem_map) {
|
||||||
|
device_memory *mem = it.first;
|
||||||
|
|
||||||
|
MTLResourceUsage usage = MTLResourceUsageRead;
|
||||||
|
if (mem->type != MEM_GLOBAL && mem->type != MEM_READ_ONLY && mem->type != MEM_TEXTURE) {
|
||||||
|
usage |= MTLResourceUsageWrite;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (it.second->mtlBuffer) {
|
||||||
|
/* METAL_WIP - use array version (i.e. useResources) */
|
||||||
|
[mtlComputeEncoder useResource:it.second->mtlBuffer usage:usage];
|
||||||
|
}
|
||||||
|
else if (it.second->mtlTexture) {
|
||||||
|
/* METAL_WIP - use array version (i.e. useResources) */
|
||||||
|
[mtlComputeEncoder useResource:it.second->mtlTexture usage:usage | MTLResourceUsageSample];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ancillaries */
|
||||||
|
[mtlComputeEncoder useResource:metal_device->texture_bindings_2d usage:MTLResourceUsageRead];
|
||||||
|
[mtlComputeEncoder useResource:metal_device->texture_bindings_3d usage:MTLResourceUsageRead];
|
||||||
|
}
|
||||||
|
|
||||||
|
id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel kernel)
|
||||||
|
{
|
||||||
|
bool concurrent = (kernel < DEVICE_KERNEL_INTEGRATOR_NUM);
|
||||||
|
|
||||||
|
if (@available(macos 10.14, *)) {
|
||||||
|
if (mtlComputeEncoder) {
|
||||||
|
if (mtlComputeEncoder.dispatchType == concurrent ? MTLDispatchTypeConcurrent :
|
||||||
|
MTLDispatchTypeSerial) {
|
||||||
|
/* declare usage of MTLBuffers etc */
|
||||||
|
prepare_resources(kernel);
|
||||||
|
|
||||||
|
return mtlComputeEncoder;
|
||||||
|
}
|
||||||
|
close_compute_encoder();
|
||||||
|
}
|
||||||
|
|
||||||
|
close_blit_encoder();
|
||||||
|
|
||||||
|
if (!mtlCommandBuffer) {
|
||||||
|
mtlCommandBuffer = [mtlCommandQueue commandBuffer];
|
||||||
|
[mtlCommandBuffer retain];
|
||||||
|
}
|
||||||
|
|
||||||
|
mtlComputeEncoder = [mtlCommandBuffer
|
||||||
|
computeCommandEncoderWithDispatchType:concurrent ? MTLDispatchTypeConcurrent :
|
||||||
|
MTLDispatchTypeSerial];
|
||||||
|
|
||||||
|
/* declare usage of MTLBuffers etc */
|
||||||
|
prepare_resources(kernel);
|
||||||
|
}
|
||||||
|
|
||||||
|
return mtlComputeEncoder;
|
||||||
|
}
|
||||||
|
|
||||||
|
id<MTLBlitCommandEncoder> MetalDeviceQueue::get_blit_encoder()
|
||||||
|
{
|
||||||
|
if (mtlBlitEncoder) {
|
||||||
|
return mtlBlitEncoder;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mtlComputeEncoder) {
|
||||||
|
close_compute_encoder();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!mtlCommandBuffer) {
|
||||||
|
mtlCommandBuffer = [mtlCommandQueue commandBuffer];
|
||||||
|
[mtlCommandBuffer retain];
|
||||||
|
}
|
||||||
|
|
||||||
|
mtlBlitEncoder = [mtlCommandBuffer blitCommandEncoder];
|
||||||
|
return mtlBlitEncoder;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetalDeviceQueue::close_compute_encoder()
|
||||||
|
{
|
||||||
|
[mtlComputeEncoder endEncoding];
|
||||||
|
mtlComputeEncoder = nil;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetalDeviceQueue::close_blit_encoder()
|
||||||
|
{
|
||||||
|
if (mtlBlitEncoder) {
|
||||||
|
[mtlBlitEncoder endEncoding];
|
||||||
|
mtlBlitEncoder = nil;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
|
#endif /* WITH_METAL */
|
101
intern/cycles/device/metal/util.h
Normal file
101
intern/cycles/device/metal/util.h
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
|
||||||
|
# include <Metal/Metal.h>
|
||||||
|
# include <string>
|
||||||
|
|
||||||
|
# include "device/metal/device.h"
|
||||||
|
# include "device/metal/kernel.h"
|
||||||
|
# include "device/queue.h"
|
||||||
|
|
||||||
|
# include "util/thread.h"
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
enum MetalGPUVendor {
|
||||||
|
METAL_GPU_UNKNOWN = 0,
|
||||||
|
METAL_GPU_APPLE = 1,
|
||||||
|
METAL_GPU_AMD = 2,
|
||||||
|
METAL_GPU_INTEL = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Retains a named MTLDevice for device enumeration. */
|
||||||
|
struct MetalPlatformDevice {
|
||||||
|
MetalPlatformDevice(id<MTLDevice> device, const string &device_name)
|
||||||
|
: device_id(device), device_name(device_name)
|
||||||
|
{
|
||||||
|
[device_id retain];
|
||||||
|
}
|
||||||
|
~MetalPlatformDevice()
|
||||||
|
{
|
||||||
|
[device_id release];
|
||||||
|
}
|
||||||
|
id<MTLDevice> device_id;
|
||||||
|
string device_name;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Contains static Metal helper functions. */
|
||||||
|
struct MetalInfo {
|
||||||
|
static bool device_version_check(id<MTLDevice> device);
|
||||||
|
static void get_usable_devices(vector<MetalPlatformDevice> *usable_devices);
|
||||||
|
static MetalGPUVendor get_vendor_from_device_name(string const &device_name);
|
||||||
|
|
||||||
|
/* Platform information. */
|
||||||
|
static bool get_num_devices(uint32_t *num_platforms);
|
||||||
|
static uint32_t get_num_devices();
|
||||||
|
|
||||||
|
static bool get_device_name(id<MTLDevice> device_id, string *device_name);
|
||||||
|
static string get_device_name(id<MTLDevice> device_id);
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Pool of MTLBuffers whose lifetime is linked to a single MTLCommandBuffer */
|
||||||
|
class MetalBufferPool {
|
||||||
|
struct MetalBufferListEntry {
|
||||||
|
MetalBufferListEntry(id<MTLBuffer> buffer, id<MTLCommandBuffer> command_buffer)
|
||||||
|
: buffer(buffer), command_buffer(command_buffer)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
MetalBufferListEntry() = delete;
|
||||||
|
|
||||||
|
id<MTLBuffer> buffer;
|
||||||
|
id<MTLCommandBuffer> command_buffer;
|
||||||
|
};
|
||||||
|
std::vector<MetalBufferListEntry> buffer_free_list;
|
||||||
|
std::vector<MetalBufferListEntry> buffer_in_use_list;
|
||||||
|
thread_mutex buffer_mutex;
|
||||||
|
size_t total_temp_mem_size = 0;
|
||||||
|
|
||||||
|
public:
|
||||||
|
MetalBufferPool() = default;
|
||||||
|
~MetalBufferPool();
|
||||||
|
|
||||||
|
id<MTLBuffer> get_buffer(id<MTLDevice> device,
|
||||||
|
id<MTLCommandBuffer> command_buffer,
|
||||||
|
NSUInteger length,
|
||||||
|
MTLResourceOptions options,
|
||||||
|
const void *pointer,
|
||||||
|
Stats &stats);
|
||||||
|
void process_command_buffer_completion(id<MTLCommandBuffer> command_buffer);
|
||||||
|
};
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
|
#endif /* WITH_METAL */
|
218
intern/cycles/device/metal/util.mm
Normal file
218
intern/cycles/device/metal/util.mm
Normal file
@@ -0,0 +1,218 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef WITH_METAL
|
||||||
|
|
||||||
|
# include "device/metal/util.h"
|
||||||
|
# include "device/metal/device_impl.h"
|
||||||
|
# include "util/md5.h"
|
||||||
|
# include "util/path.h"
|
||||||
|
# include "util/string.h"
|
||||||
|
# include "util/time.h"
|
||||||
|
|
||||||
|
# include <pwd.h>
|
||||||
|
# include <sys/shm.h>
|
||||||
|
# include <time.h>
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
MetalGPUVendor MetalInfo::get_vendor_from_device_name(string const &device_name)
|
||||||
|
{
|
||||||
|
if (device_name.find("Intel") != string::npos) {
|
||||||
|
return METAL_GPU_INTEL;
|
||||||
|
}
|
||||||
|
else if (device_name.find("AMD") != string::npos) {
|
||||||
|
return METAL_GPU_AMD;
|
||||||
|
}
|
||||||
|
else if (device_name.find("Apple") != string::npos) {
|
||||||
|
return METAL_GPU_APPLE;
|
||||||
|
}
|
||||||
|
return METAL_GPU_UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MetalInfo::device_version_check(id<MTLDevice> device)
|
||||||
|
{
|
||||||
|
/* Metal Cycles doesn't work correctly on macOS versions older than 12.0 */
|
||||||
|
if (@available(macos 12.0, *)) {
|
||||||
|
MetalGPUVendor vendor = get_vendor_from_device_name([[device name] UTF8String]);
|
||||||
|
|
||||||
|
/* Metal Cycles works on Apple Silicon GPUs at present */
|
||||||
|
return (vendor == METAL_GPU_APPLE);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetalInfo::get_usable_devices(vector<MetalPlatformDevice> *usable_devices)
|
||||||
|
{
|
||||||
|
static bool first_time = true;
|
||||||
|
# define FIRST_VLOG(severity) \
|
||||||
|
if (first_time) \
|
||||||
|
VLOG(severity)
|
||||||
|
|
||||||
|
usable_devices->clear();
|
||||||
|
|
||||||
|
NSArray<id<MTLDevice>> *allDevices = MTLCopyAllDevices();
|
||||||
|
for (id<MTLDevice> device in allDevices) {
|
||||||
|
string device_name;
|
||||||
|
if (!get_device_name(device, &device_name)) {
|
||||||
|
FIRST_VLOG(2) << "Failed to get device name, ignoring.";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *forceIntelStr = getenv("CYCLES_METAL_FORCE_INTEL");
|
||||||
|
bool forceIntel = forceIntelStr ? (atoi(forceIntelStr) != 0) : false;
|
||||||
|
if (forceIntel && device_name.find("Intel") == string::npos) {
|
||||||
|
FIRST_VLOG(2) << "CYCLES_METAL_FORCE_INTEL causing non-Intel device " << device_name
|
||||||
|
<< " to be ignored.";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!device_version_check(device)) {
|
||||||
|
FIRST_VLOG(2) << "Ignoring device " << device_name << " due to too old compiler version.";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
FIRST_VLOG(2) << "Adding new device " << device_name << ".";
|
||||||
|
string hardware_id;
|
||||||
|
usable_devices->push_back(MetalPlatformDevice(device, device_name));
|
||||||
|
}
|
||||||
|
first_time = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MetalInfo::get_num_devices(uint32_t *num_devices)
|
||||||
|
{
|
||||||
|
*num_devices = MTLCopyAllDevices().count;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t MetalInfo::get_num_devices()
|
||||||
|
{
|
||||||
|
uint32_t num_devices;
|
||||||
|
if (!get_num_devices(&num_devices)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return num_devices;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MetalInfo::get_device_name(id<MTLDevice> device, string *platform_name)
|
||||||
|
{
|
||||||
|
*platform_name = [device.name UTF8String];
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
string MetalInfo::get_device_name(id<MTLDevice> device)
|
||||||
|
{
|
||||||
|
string platform_name;
|
||||||
|
if (!get_device_name(device, &platform_name)) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
return platform_name;
|
||||||
|
}
|
||||||
|
|
||||||
|
id<MTLBuffer> MetalBufferPool::get_buffer(id<MTLDevice> device,
|
||||||
|
id<MTLCommandBuffer> command_buffer,
|
||||||
|
NSUInteger length,
|
||||||
|
MTLResourceOptions options,
|
||||||
|
const void *pointer,
|
||||||
|
Stats &stats)
|
||||||
|
{
|
||||||
|
id<MTLBuffer> buffer;
|
||||||
|
|
||||||
|
MTLStorageMode storageMode = MTLStorageMode((options & MTLResourceStorageModeMask) >>
|
||||||
|
MTLResourceStorageModeShift);
|
||||||
|
MTLCPUCacheMode cpuCacheMode = MTLCPUCacheMode((options & MTLResourceCPUCacheModeMask) >>
|
||||||
|
MTLResourceCPUCacheModeShift);
|
||||||
|
|
||||||
|
buffer_mutex.lock();
|
||||||
|
for (auto entry = buffer_free_list.begin(); entry != buffer_free_list.end(); entry++) {
|
||||||
|
MetalBufferListEntry bufferEntry = *entry;
|
||||||
|
|
||||||
|
/* Check if buffer matches size and storage mode and is old enough to reuse */
|
||||||
|
if (bufferEntry.buffer.length == length && storageMode == bufferEntry.buffer.storageMode &&
|
||||||
|
cpuCacheMode == bufferEntry.buffer.cpuCacheMode) {
|
||||||
|
buffer = bufferEntry.buffer;
|
||||||
|
buffer_free_list.erase(entry);
|
||||||
|
bufferEntry.command_buffer = command_buffer;
|
||||||
|
buffer_in_use_list.push_back(bufferEntry);
|
||||||
|
buffer_mutex.unlock();
|
||||||
|
|
||||||
|
/* Copy over data */
|
||||||
|
if (pointer) {
|
||||||
|
memcpy(buffer.contents, pointer, length);
|
||||||
|
if (bufferEntry.buffer.storageMode == MTLStorageModeManaged) {
|
||||||
|
[buffer didModifyRange:NSMakeRange(0, length)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// NSLog(@"Creating buffer of length %lu (%lu)", length, frameCount);
|
||||||
|
if (pointer) {
|
||||||
|
buffer = [device newBufferWithBytes:pointer length:length options:options];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
buffer = [device newBufferWithLength:length options:options];
|
||||||
|
}
|
||||||
|
|
||||||
|
MetalBufferListEntry buffer_entry(buffer, command_buffer);
|
||||||
|
|
||||||
|
stats.mem_alloc(buffer.allocatedSize);
|
||||||
|
|
||||||
|
total_temp_mem_size += buffer.allocatedSize;
|
||||||
|
buffer_in_use_list.push_back(buffer_entry);
|
||||||
|
buffer_mutex.unlock();
|
||||||
|
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetalBufferPool::process_command_buffer_completion(id<MTLCommandBuffer> command_buffer)
|
||||||
|
{
|
||||||
|
assert(command_buffer);
|
||||||
|
thread_scoped_lock lock(buffer_mutex);
|
||||||
|
/* Release all buffers that have not been recently reused back into the free pool */
|
||||||
|
for (auto entry = buffer_in_use_list.begin(); entry != buffer_in_use_list.end();) {
|
||||||
|
MetalBufferListEntry buffer_entry = *entry;
|
||||||
|
if (buffer_entry.command_buffer == command_buffer) {
|
||||||
|
entry = buffer_in_use_list.erase(entry);
|
||||||
|
buffer_entry.command_buffer = nil;
|
||||||
|
buffer_free_list.push_back(buffer_entry);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
entry++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MetalBufferPool::~MetalBufferPool()
|
||||||
|
{
|
||||||
|
thread_scoped_lock lock(buffer_mutex);
|
||||||
|
/* Release all buffers that have not been recently reused */
|
||||||
|
for (auto entry = buffer_free_list.begin(); entry != buffer_free_list.end();) {
|
||||||
|
MetalBufferListEntry buffer_entry = *entry;
|
||||||
|
|
||||||
|
id<MTLBuffer> buffer = buffer_entry.buffer;
|
||||||
|
// NSLog(@"Releasing buffer of length %lu (%lu) (%lu outstanding)", buffer.length, frameCount,
|
||||||
|
// bufferFreeList.size());
|
||||||
|
total_temp_mem_size -= buffer.allocatedSize;
|
||||||
|
[buffer release];
|
||||||
|
entry = buffer_free_list.erase(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
|
#endif /* WITH_METAL */
|
@@ -124,11 +124,20 @@ class MultiDevice : public Device {
|
|||||||
return BVH_LAYOUT_MULTI_OPTIX;
|
return BVH_LAYOUT_MULTI_OPTIX;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* With multiple Metal devices, every device needs its own acceleration structure */
|
||||||
|
if (bvh_layout_mask == BVH_LAYOUT_METAL) {
|
||||||
|
return BVH_LAYOUT_MULTI_METAL;
|
||||||
|
}
|
||||||
|
|
||||||
/* When devices do not share a common BVH layout, fall back to creating one for each */
|
/* When devices do not share a common BVH layout, fall back to creating one for each */
|
||||||
const BVHLayoutMask BVH_LAYOUT_OPTIX_EMBREE = (BVH_LAYOUT_OPTIX | BVH_LAYOUT_EMBREE);
|
const BVHLayoutMask BVH_LAYOUT_OPTIX_EMBREE = (BVH_LAYOUT_OPTIX | BVH_LAYOUT_EMBREE);
|
||||||
if ((bvh_layout_mask_all & BVH_LAYOUT_OPTIX_EMBREE) == BVH_LAYOUT_OPTIX_EMBREE) {
|
if ((bvh_layout_mask_all & BVH_LAYOUT_OPTIX_EMBREE) == BVH_LAYOUT_OPTIX_EMBREE) {
|
||||||
return BVH_LAYOUT_MULTI_OPTIX_EMBREE;
|
return BVH_LAYOUT_MULTI_OPTIX_EMBREE;
|
||||||
}
|
}
|
||||||
|
const BVHLayoutMask BVH_LAYOUT_METAL_EMBREE = (BVH_LAYOUT_METAL | BVH_LAYOUT_EMBREE);
|
||||||
|
if ((bvh_layout_mask_all & BVH_LAYOUT_METAL_EMBREE) == BVH_LAYOUT_METAL_EMBREE) {
|
||||||
|
return BVH_LAYOUT_MULTI_METAL_EMBREE;
|
||||||
|
}
|
||||||
|
|
||||||
return bvh_layout_mask;
|
return bvh_layout_mask;
|
||||||
}
|
}
|
||||||
@@ -151,7 +160,9 @@ class MultiDevice : public Device {
|
|||||||
}
|
}
|
||||||
|
|
||||||
assert(bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX ||
|
assert(bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX ||
|
||||||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE);
|
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL ||
|
||||||
|
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE ||
|
||||||
|
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE);
|
||||||
|
|
||||||
BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh);
|
BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh);
|
||||||
bvh_multi->sub_bvhs.resize(devices.size());
|
bvh_multi->sub_bvhs.resize(devices.size());
|
||||||
@@ -174,9 +185,14 @@ class MultiDevice : public Device {
|
|||||||
BVHParams params = bvh->params;
|
BVHParams params = bvh->params;
|
||||||
if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX)
|
if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX)
|
||||||
params.bvh_layout = BVH_LAYOUT_OPTIX;
|
params.bvh_layout = BVH_LAYOUT_OPTIX;
|
||||||
|
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL)
|
||||||
|
params.bvh_layout = BVH_LAYOUT_METAL;
|
||||||
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE)
|
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE)
|
||||||
params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
|
params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
|
||||||
BVH_LAYOUT_EMBREE;
|
BVH_LAYOUT_EMBREE;
|
||||||
|
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE)
|
||||||
|
params.bvh_layout = sub.device->info.type == DEVICE_METAL ? BVH_LAYOUT_METAL :
|
||||||
|
BVH_LAYOUT_EMBREE;
|
||||||
|
|
||||||
/* Skip building a bottom level acceleration structure for non-instanced geometry on Embree
|
/* Skip building a bottom level acceleration structure for non-instanced geometry on Embree
|
||||||
* (since they are put into the top level directly, see bvh_embree.cpp) */
|
* (since they are put into the top level directly, see bvh_embree.cpp) */
|
||||||
|
@@ -28,6 +28,7 @@
|
|||||||
# include "scene/mesh.h"
|
# include "scene/mesh.h"
|
||||||
# include "scene/object.h"
|
# include "scene/object.h"
|
||||||
# include "scene/pass.h"
|
# include "scene/pass.h"
|
||||||
|
# include "scene/pointcloud.h"
|
||||||
# include "scene/scene.h"
|
# include "scene/scene.h"
|
||||||
|
|
||||||
# include "util/debug.h"
|
# include "util/debug.h"
|
||||||
@@ -41,17 +42,19 @@
|
|||||||
# define __KERNEL_OPTIX__
|
# define __KERNEL_OPTIX__
|
||||||
# include "kernel/device/optix/globals.h"
|
# include "kernel/device/optix/globals.h"
|
||||||
|
|
||||||
|
# include <optix_denoiser_tiling.h>
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
|
OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
|
||||||
: device(device), queue(device), state(device, "__denoiser_state")
|
: device(device), queue(device), state(device, "__denoiser_state", true)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||||
: CUDADevice(info, stats, profiler),
|
: CUDADevice(info, stats, profiler),
|
||||||
sbt_data(this, "__sbt", MEM_READ_ONLY),
|
sbt_data(this, "__sbt", MEM_READ_ONLY),
|
||||||
launch_params(this, "__params"),
|
launch_params(this, "__params", false),
|
||||||
denoiser_(this)
|
denoiser_(this)
|
||||||
{
|
{
|
||||||
/* Make the CUDA context current. */
|
/* Make the CUDA context current. */
|
||||||
@@ -208,11 +211,15 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
|
module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
|
||||||
module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
|
module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
module_options.boundValues = nullptr;
|
module_options.boundValues = nullptr;
|
||||||
module_options.numBoundValues = 0;
|
module_options.numBoundValues = 0;
|
||||||
|
# if OPTIX_ABI_VERSION >= 55
|
||||||
|
module_options.payloadTypes = nullptr;
|
||||||
|
module_options.numPayloadTypes = 0;
|
||||||
|
# endif
|
||||||
|
|
||||||
OptixPipelineCompileOptions pipeline_options = {};
|
OptixPipelineCompileOptions pipeline_options = {};
|
||||||
/* Default to no motion blur and two-level graph, since it is the fastest option. */
|
/* Default to no motion blur and two-level graph, since it is the fastest option. */
|
||||||
@@ -227,11 +234,18 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
|||||||
pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
|
pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
|
||||||
if (kernel_features & KERNEL_FEATURE_HAIR) {
|
if (kernel_features & KERNEL_FEATURE_HAIR) {
|
||||||
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
|
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
|
||||||
|
# if OPTIX_ABI_VERSION >= 55
|
||||||
|
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM;
|
||||||
|
# else
|
||||||
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
|
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
|
||||||
|
# endif
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
|
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
|
||||||
}
|
}
|
||||||
|
if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
|
||||||
|
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
|
||||||
|
}
|
||||||
|
|
||||||
/* Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds
|
/* Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds
|
||||||
* This is necessary since objects may be reported to have motion if the Vector pass is
|
* This is necessary since objects may be reported to have motion if the Vector pass is
|
||||||
@@ -324,7 +338,13 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
|||||||
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
|
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
|
||||||
/* Built-in thick curve intersection. */
|
/* Built-in thick curve intersection. */
|
||||||
OptixBuiltinISOptions builtin_options = {};
|
OptixBuiltinISOptions builtin_options = {};
|
||||||
|
# if OPTIX_ABI_VERSION >= 55
|
||||||
|
builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
|
||||||
|
builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE;
|
||||||
|
builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT; /* Disable end-caps. */
|
||||||
|
# else
|
||||||
builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
|
builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
|
||||||
|
# endif
|
||||||
builtin_options.usesMotionBlur = false;
|
builtin_options.usesMotionBlur = false;
|
||||||
|
|
||||||
optix_assert(optixBuiltinISModuleGet(
|
optix_assert(optixBuiltinISModuleGet(
|
||||||
@@ -356,6 +376,18 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Pointclouds */
|
||||||
|
if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
|
||||||
|
group_descs[PG_HITD_POINTCLOUD] = group_descs[PG_HITD];
|
||||||
|
group_descs[PG_HITD_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
|
||||||
|
group_descs[PG_HITD_POINTCLOUD].hitgroup.moduleIS = optix_module;
|
||||||
|
group_descs[PG_HITD_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point";
|
||||||
|
group_descs[PG_HITS_POINTCLOUD] = group_descs[PG_HITS];
|
||||||
|
group_descs[PG_HITS_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
|
||||||
|
group_descs[PG_HITS_POINTCLOUD].hitgroup.moduleIS = optix_module;
|
||||||
|
group_descs[PG_HITS_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point";
|
||||||
|
}
|
||||||
|
|
||||||
if (kernel_features & (KERNEL_FEATURE_SUBSURFACE | KERNEL_FEATURE_NODE_RAYTRACE)) {
|
if (kernel_features & (KERNEL_FEATURE_SUBSURFACE | KERNEL_FEATURE_NODE_RAYTRACE)) {
|
||||||
/* Add hit group for local intersections. */
|
/* Add hit group for local intersections. */
|
||||||
group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
|
group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
|
||||||
@@ -403,6 +435,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
|||||||
stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
|
stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
|
||||||
trace_css = std::max(trace_css,
|
trace_css = std::max(trace_css,
|
||||||
stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH);
|
stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH);
|
||||||
|
trace_css = std::max(
|
||||||
|
trace_css, stack_size[PG_HITD_POINTCLOUD].cssIS + stack_size[PG_HITD_POINTCLOUD].cssAH);
|
||||||
|
trace_css = std::max(
|
||||||
|
trace_css, stack_size[PG_HITS_POINTCLOUD].cssIS + stack_size[PG_HITS_POINTCLOUD].cssAH);
|
||||||
|
|
||||||
OptixPipelineLinkOptions link_options = {};
|
OptixPipelineLinkOptions link_options = {};
|
||||||
link_options.maxTraceDepth = 1;
|
link_options.maxTraceDepth = 1;
|
||||||
@@ -411,7 +447,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
|||||||
link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
|
link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
|
link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
|
if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
|
||||||
@@ -428,6 +464,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
|||||||
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
|
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
|
||||||
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
|
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
|
||||||
}
|
}
|
||||||
|
if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
|
||||||
|
pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
|
||||||
|
pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
|
||||||
|
}
|
||||||
pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
|
pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
|
||||||
pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
|
pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
|
||||||
|
|
||||||
@@ -467,6 +507,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
|||||||
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
|
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
|
||||||
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
|
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
|
||||||
}
|
}
|
||||||
|
if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
|
||||||
|
pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
|
||||||
|
pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
|
||||||
|
}
|
||||||
|
|
||||||
optix_assert(optixPipelineCreate(context,
|
optix_assert(optixPipelineCreate(context,
|
||||||
&pipeline_options,
|
&pipeline_options,
|
||||||
@@ -507,7 +551,7 @@ class OptiXDevice::DenoiseContext {
|
|||||||
: denoise_params(task.params),
|
: denoise_params(task.params),
|
||||||
render_buffers(task.render_buffers),
|
render_buffers(task.render_buffers),
|
||||||
buffer_params(task.buffer_params),
|
buffer_params(task.buffer_params),
|
||||||
guiding_buffer(device, "denoiser guiding passes buffer"),
|
guiding_buffer(device, "denoiser guiding passes buffer", true),
|
||||||
num_samples(task.num_samples)
|
num_samples(task.num_samples)
|
||||||
{
|
{
|
||||||
num_input_passes = 1;
|
num_input_passes = 1;
|
||||||
@@ -522,9 +566,9 @@ class OptiXDevice::DenoiseContext {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const int num_guiding_passes = num_input_passes - 1;
|
use_guiding_passes = (num_input_passes - 1) > 0;
|
||||||
|
|
||||||
if (num_guiding_passes) {
|
if (use_guiding_passes) {
|
||||||
if (task.allow_inplace_modification) {
|
if (task.allow_inplace_modification) {
|
||||||
guiding_params.device_pointer = render_buffers->buffer.device_pointer;
|
guiding_params.device_pointer = render_buffers->buffer.device_pointer;
|
||||||
|
|
||||||
@@ -577,6 +621,7 @@ class OptiXDevice::DenoiseContext {
|
|||||||
|
|
||||||
/* Number of input passes. Including the color and extra auxiliary passes. */
|
/* Number of input passes. Including the color and extra auxiliary passes. */
|
||||||
int num_input_passes = 0;
|
int num_input_passes = 0;
|
||||||
|
bool use_guiding_passes = false;
|
||||||
bool use_pass_albedo = false;
|
bool use_pass_albedo = false;
|
||||||
bool use_pass_normal = false;
|
bool use_pass_normal = false;
|
||||||
|
|
||||||
@@ -653,22 +698,22 @@ bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context)
|
|||||||
|
|
||||||
const int work_size = buffer_params.width * buffer_params.height;
|
const int work_size = buffer_params.width * buffer_params.height;
|
||||||
|
|
||||||
void *args[] = {const_cast<device_ptr *>(&context.guiding_params.device_pointer),
|
DeviceKernelArguments args(&context.guiding_params.device_pointer,
|
||||||
const_cast<int *>(&context.guiding_params.pass_stride),
|
&context.guiding_params.pass_stride,
|
||||||
const_cast<int *>(&context.guiding_params.pass_albedo),
|
&context.guiding_params.pass_albedo,
|
||||||
const_cast<int *>(&context.guiding_params.pass_normal),
|
&context.guiding_params.pass_normal,
|
||||||
&context.render_buffers->buffer.device_pointer,
|
&context.render_buffers->buffer.device_pointer,
|
||||||
const_cast<int *>(&buffer_params.offset),
|
&buffer_params.offset,
|
||||||
const_cast<int *>(&buffer_params.stride),
|
&buffer_params.stride,
|
||||||
const_cast<int *>(&buffer_params.pass_stride),
|
&buffer_params.pass_stride,
|
||||||
const_cast<int *>(&context.pass_sample_count),
|
&context.pass_sample_count,
|
||||||
const_cast<int *>(&context.pass_denoising_albedo),
|
&context.pass_denoising_albedo,
|
||||||
const_cast<int *>(&context.pass_denoising_normal),
|
&context.pass_denoising_normal,
|
||||||
const_cast<int *>(&buffer_params.full_x),
|
&buffer_params.full_x,
|
||||||
const_cast<int *>(&buffer_params.full_y),
|
&buffer_params.full_y,
|
||||||
const_cast<int *>(&buffer_params.width),
|
&buffer_params.width,
|
||||||
const_cast<int *>(&buffer_params.height),
|
&buffer_params.height,
|
||||||
const_cast<int *>(&context.num_samples)};
|
&context.num_samples);
|
||||||
|
|
||||||
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
|
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
|
||||||
}
|
}
|
||||||
@@ -679,11 +724,11 @@ bool OptiXDevice::denoise_filter_guiding_set_fake_albedo(DenoiseContext &context
|
|||||||
|
|
||||||
const int work_size = buffer_params.width * buffer_params.height;
|
const int work_size = buffer_params.width * buffer_params.height;
|
||||||
|
|
||||||
void *args[] = {const_cast<device_ptr *>(&context.guiding_params.device_pointer),
|
DeviceKernelArguments args(&context.guiding_params.device_pointer,
|
||||||
const_cast<int *>(&context.guiding_params.pass_stride),
|
&context.guiding_params.pass_stride,
|
||||||
const_cast<int *>(&context.guiding_params.pass_albedo),
|
&context.guiding_params.pass_albedo,
|
||||||
const_cast<int *>(&buffer_params.width),
|
&buffer_params.width,
|
||||||
const_cast<int *>(&buffer_params.height)};
|
&buffer_params.height);
|
||||||
|
|
||||||
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
|
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
|
||||||
}
|
}
|
||||||
@@ -708,7 +753,7 @@ void OptiXDevice::denoise_pass(DenoiseContext &context, PassType pass_type)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (!context.albedo_replaced_with_fake) {
|
else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
|
||||||
context.albedo_replaced_with_fake = true;
|
context.albedo_replaced_with_fake = true;
|
||||||
if (!denoise_filter_guiding_set_fake_albedo(context)) {
|
if (!denoise_filter_guiding_set_fake_albedo(context)) {
|
||||||
LOG(ERROR) << "Error replacing real albedo with the fake one.";
|
LOG(ERROR) << "Error replacing real albedo with the fake one.";
|
||||||
@@ -779,15 +824,15 @@ bool OptiXDevice::denoise_filter_color_preprocess(DenoiseContext &context, const
|
|||||||
|
|
||||||
const int work_size = buffer_params.width * buffer_params.height;
|
const int work_size = buffer_params.width * buffer_params.height;
|
||||||
|
|
||||||
void *args[] = {&context.render_buffers->buffer.device_pointer,
|
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
|
||||||
const_cast<int *>(&buffer_params.full_x),
|
&buffer_params.full_x,
|
||||||
const_cast<int *>(&buffer_params.full_y),
|
&buffer_params.full_y,
|
||||||
const_cast<int *>(&buffer_params.width),
|
&buffer_params.width,
|
||||||
const_cast<int *>(&buffer_params.height),
|
&buffer_params.height,
|
||||||
const_cast<int *>(&buffer_params.offset),
|
&buffer_params.offset,
|
||||||
const_cast<int *>(&buffer_params.stride),
|
&buffer_params.stride,
|
||||||
const_cast<int *>(&buffer_params.pass_stride),
|
&buffer_params.pass_stride,
|
||||||
const_cast<int *>(&pass.denoised_offset)};
|
&pass.denoised_offset);
|
||||||
|
|
||||||
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
|
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
|
||||||
}
|
}
|
||||||
@@ -799,20 +844,20 @@ bool OptiXDevice::denoise_filter_color_postprocess(DenoiseContext &context,
|
|||||||
|
|
||||||
const int work_size = buffer_params.width * buffer_params.height;
|
const int work_size = buffer_params.width * buffer_params.height;
|
||||||
|
|
||||||
void *args[] = {&context.render_buffers->buffer.device_pointer,
|
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
|
||||||
const_cast<int *>(&buffer_params.full_x),
|
&buffer_params.full_x,
|
||||||
const_cast<int *>(&buffer_params.full_y),
|
&buffer_params.full_y,
|
||||||
const_cast<int *>(&buffer_params.width),
|
&buffer_params.width,
|
||||||
const_cast<int *>(&buffer_params.height),
|
&buffer_params.height,
|
||||||
const_cast<int *>(&buffer_params.offset),
|
&buffer_params.offset,
|
||||||
const_cast<int *>(&buffer_params.stride),
|
&buffer_params.stride,
|
||||||
const_cast<int *>(&buffer_params.pass_stride),
|
&buffer_params.pass_stride,
|
||||||
const_cast<int *>(&context.num_samples),
|
&context.num_samples,
|
||||||
const_cast<int *>(&pass.noisy_offset),
|
&pass.noisy_offset,
|
||||||
const_cast<int *>(&pass.denoised_offset),
|
&pass.denoised_offset,
|
||||||
const_cast<int *>(&context.pass_sample_count),
|
&context.pass_sample_count,
|
||||||
const_cast<int *>(&pass.num_components),
|
&pass.num_components,
|
||||||
const_cast<bool *>(&pass.use_compositing)};
|
&pass.use_compositing);
|
||||||
|
|
||||||
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
|
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
|
||||||
}
|
}
|
||||||
@@ -870,35 +915,33 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
|
|||||||
|
|
||||||
bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
|
bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
|
||||||
{
|
{
|
||||||
if (denoiser_.is_configured && (denoiser_.configured_size.x == context.buffer_params.width &&
|
/* Limit maximum tile size denoiser can be invoked with. */
|
||||||
denoiser_.configured_size.y == context.buffer_params.height)) {
|
const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
|
||||||
|
min(context.buffer_params.height, 4096));
|
||||||
|
|
||||||
|
if (denoiser_.is_configured &&
|
||||||
|
(denoiser_.configured_size.x == tile_size.x && denoiser_.configured_size.y == tile_size.y)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const BufferParams &buffer_params = context.buffer_params;
|
|
||||||
|
|
||||||
OptixDenoiserSizes sizes = {};
|
|
||||||
optix_assert(optixDenoiserComputeMemoryResources(
|
optix_assert(optixDenoiserComputeMemoryResources(
|
||||||
denoiser_.optix_denoiser, buffer_params.width, buffer_params.height, &sizes));
|
denoiser_.optix_denoiser, tile_size.x, tile_size.y, &denoiser_.sizes));
|
||||||
|
|
||||||
/* Denoiser is invoked on whole images only, so no overlap needed (would be used for tiling). */
|
|
||||||
denoiser_.scratch_size = sizes.withoutOverlapScratchSizeInBytes;
|
|
||||||
denoiser_.scratch_offset = sizes.stateSizeInBytes;
|
|
||||||
|
|
||||||
/* Allocate denoiser state if tile size has changed since last setup. */
|
/* Allocate denoiser state if tile size has changed since last setup. */
|
||||||
denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size);
|
denoiser_.state.alloc_to_device(denoiser_.sizes.stateSizeInBytes +
|
||||||
|
denoiser_.sizes.withOverlapScratchSizeInBytes);
|
||||||
|
|
||||||
/* Initialize denoiser state for the current tile size. */
|
/* Initialize denoiser state for the current tile size. */
|
||||||
const OptixResult result = optixDenoiserSetup(
|
const OptixResult result = optixDenoiserSetup(
|
||||||
denoiser_.optix_denoiser,
|
denoiser_.optix_denoiser,
|
||||||
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
|
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
|
||||||
on a stream that is not the default stream */
|
on a stream that is not the default stream */
|
||||||
buffer_params.width,
|
tile_size.x + denoiser_.sizes.overlapWindowSizeInPixels * 2,
|
||||||
buffer_params.height,
|
tile_size.y + denoiser_.sizes.overlapWindowSizeInPixels * 2,
|
||||||
denoiser_.state.device_pointer,
|
denoiser_.state.device_pointer,
|
||||||
denoiser_.scratch_offset,
|
denoiser_.sizes.stateSizeInBytes,
|
||||||
denoiser_.state.device_pointer + denoiser_.scratch_offset,
|
denoiser_.state.device_pointer + denoiser_.sizes.stateSizeInBytes,
|
||||||
denoiser_.scratch_size);
|
denoiser_.sizes.withOverlapScratchSizeInBytes);
|
||||||
if (result != OPTIX_SUCCESS) {
|
if (result != OPTIX_SUCCESS) {
|
||||||
set_error("Failed to set up OptiX denoiser");
|
set_error("Failed to set up OptiX denoiser");
|
||||||
return false;
|
return false;
|
||||||
@@ -907,8 +950,7 @@ bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
|
|||||||
cuda_assert(cuCtxSynchronize());
|
cuda_assert(cuCtxSynchronize());
|
||||||
|
|
||||||
denoiser_.is_configured = true;
|
denoiser_.is_configured = true;
|
||||||
denoiser_.configured_size.x = buffer_params.width;
|
denoiser_.configured_size = tile_size;
|
||||||
denoiser_.configured_size.y = buffer_params.height;
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -979,18 +1021,20 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
|
|||||||
guide_layers.albedo = albedo_layer;
|
guide_layers.albedo = albedo_layer;
|
||||||
guide_layers.normal = normal_layer;
|
guide_layers.normal = normal_layer;
|
||||||
|
|
||||||
optix_assert(optixDenoiserInvoke(denoiser_.optix_denoiser,
|
optix_assert(optixUtilDenoiserInvokeTiled(denoiser_.optix_denoiser,
|
||||||
denoiser_.queue.stream(),
|
denoiser_.queue.stream(),
|
||||||
¶ms,
|
¶ms,
|
||||||
denoiser_.state.device_pointer,
|
denoiser_.state.device_pointer,
|
||||||
denoiser_.scratch_offset,
|
denoiser_.sizes.stateSizeInBytes,
|
||||||
&guide_layers,
|
&guide_layers,
|
||||||
&image_layers,
|
&image_layers,
|
||||||
1,
|
1,
|
||||||
0,
|
denoiser_.state.device_pointer +
|
||||||
0,
|
denoiser_.sizes.stateSizeInBytes,
|
||||||
denoiser_.state.device_pointer + denoiser_.scratch_offset,
|
denoiser_.sizes.withOverlapScratchSizeInBytes,
|
||||||
denoiser_.scratch_size));
|
denoiser_.sizes.overlapWindowSizeInPixels,
|
||||||
|
denoiser_.configured_size.x,
|
||||||
|
denoiser_.configured_size.y));
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -1000,6 +1044,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
|||||||
const OptixBuildInput &build_input,
|
const OptixBuildInput &build_input,
|
||||||
uint16_t num_motion_steps)
|
uint16_t num_motion_steps)
|
||||||
{
|
{
|
||||||
|
/* Allocate and build acceleration structures only one at a time, to prevent parallel builds
|
||||||
|
* from running out of memory (since both original and compacted acceleration structure memory
|
||||||
|
* may be allocated at the same time for the duration of this function). The builds would
|
||||||
|
* otherwise happen on the same CUDA stream anyway. */
|
||||||
|
static thread_mutex mutex;
|
||||||
|
thread_scoped_lock lock(mutex);
|
||||||
|
|
||||||
const CUDAContextScope scope(this);
|
const CUDAContextScope scope(this);
|
||||||
|
|
||||||
const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
|
const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
|
||||||
@@ -1025,13 +1076,14 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
|||||||
optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes));
|
optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes));
|
||||||
|
|
||||||
/* Allocate required output buffers. */
|
/* Allocate required output buffers. */
|
||||||
device_only_memory<char> temp_mem(this, "optix temp as build mem");
|
device_only_memory<char> temp_mem(this, "optix temp as build mem", true);
|
||||||
temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8);
|
temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8);
|
||||||
if (!temp_mem.device_pointer) {
|
if (!temp_mem.device_pointer) {
|
||||||
/* Make sure temporary memory allocation succeeded. */
|
/* Make sure temporary memory allocation succeeded. */
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Acceleration structure memory has to be allocated on the device (not allowed on the host). */
|
||||||
device_only_memory<char> &out_data = *bvh->as_data;
|
device_only_memory<char> &out_data = *bvh->as_data;
|
||||||
if (operation == OPTIX_BUILD_OPERATION_BUILD) {
|
if (operation == OPTIX_BUILD_OPERATION_BUILD) {
|
||||||
assert(out_data.device == this);
|
assert(out_data.device == this);
|
||||||
@@ -1080,12 +1132,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
|||||||
|
|
||||||
/* There is no point compacting if the size does not change. */
|
/* There is no point compacting if the size does not change. */
|
||||||
if (compacted_size < sizes.outputSizeInBytes) {
|
if (compacted_size < sizes.outputSizeInBytes) {
|
||||||
device_only_memory<char> compacted_data(this, "optix compacted as");
|
device_only_memory<char> compacted_data(this, "optix compacted as", false);
|
||||||
compacted_data.alloc_to_device(compacted_size);
|
compacted_data.alloc_to_device(compacted_size);
|
||||||
if (!compacted_data.device_pointer)
|
if (!compacted_data.device_pointer) {
|
||||||
/* Do not compact if memory allocation for compacted acceleration structure fails.
|
/* Do not compact if memory allocation for compacted acceleration structure fails.
|
||||||
* Can just use the uncompacted one then, so succeed here regardless. */
|
* Can just use the uncompacted one then, so succeed here regardless. */
|
||||||
return !have_error();
|
return !have_error();
|
||||||
|
}
|
||||||
|
|
||||||
optix_assert(optixAccelCompact(
|
optix_assert(optixAccelCompact(
|
||||||
context, NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle));
|
context, NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle));
|
||||||
@@ -1096,6 +1149,8 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
|||||||
|
|
||||||
std::swap(out_data.device_size, compacted_data.device_size);
|
std::swap(out_data.device_size, compacted_data.device_size);
|
||||||
std::swap(out_data.device_pointer, compacted_data.device_pointer);
|
std::swap(out_data.device_pointer, compacted_data.device_pointer);
|
||||||
|
/* Original acceleration structure memory is freed when 'compacted_data' goes out of scope.
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1178,20 +1233,27 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
int ka = max(k0 - 1, curve.first_key);
|
int ka = max(k0 - 1, curve.first_key);
|
||||||
int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1);
|
int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1);
|
||||||
|
|
||||||
|
index_data[i] = i * 4;
|
||||||
|
float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
|
||||||
|
|
||||||
|
# if OPTIX_ABI_VERSION >= 55
|
||||||
|
v[0] = make_float4(keys[ka].x, keys[ka].y, keys[ka].z, curve_radius[ka]);
|
||||||
|
v[1] = make_float4(keys[k0].x, keys[k0].y, keys[k0].z, curve_radius[k0]);
|
||||||
|
v[2] = make_float4(keys[k1].x, keys[k1].y, keys[k1].z, curve_radius[k1]);
|
||||||
|
v[3] = make_float4(keys[kb].x, keys[kb].y, keys[kb].z, curve_radius[kb]);
|
||||||
|
# else
|
||||||
const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x);
|
const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x);
|
||||||
const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y);
|
const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y);
|
||||||
const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z);
|
const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z);
|
||||||
const float4 pw = make_float4(
|
const float4 pw = make_float4(
|
||||||
curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]);
|
curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]);
|
||||||
|
|
||||||
/* Convert Catmull-Rom data to Bezier spline. */
|
/* Convert Catmull-Rom data to B-spline. */
|
||||||
static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
|
static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
|
||||||
static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f;
|
static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f;
|
||||||
static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
|
static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
|
||||||
static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f;
|
static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f;
|
||||||
|
|
||||||
index_data[i] = i * 4;
|
|
||||||
float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
|
|
||||||
v[0] = make_float4(
|
v[0] = make_float4(
|
||||||
dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw));
|
dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw));
|
||||||
v[1] = make_float4(
|
v[1] = make_float4(
|
||||||
@@ -1200,6 +1262,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
dot(cr2bsp2, px), dot(cr2bsp2, py), dot(cr2bsp2, pz), dot(cr2bsp2, pw));
|
dot(cr2bsp2, px), dot(cr2bsp2, py), dot(cr2bsp2, pz), dot(cr2bsp2, pw));
|
||||||
v[3] = make_float4(
|
v[3] = make_float4(
|
||||||
dot(cr2bsp3, px), dot(cr2bsp3, py), dot(cr2bsp3, pz), dot(cr2bsp3, pw));
|
dot(cr2bsp3, px), dot(cr2bsp3, py), dot(cr2bsp3, pz), dot(cr2bsp3, pw));
|
||||||
|
# endif
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
BoundBox bounds = BoundBox::empty;
|
BoundBox bounds = BoundBox::empty;
|
||||||
@@ -1241,7 +1304,11 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
OptixBuildInput build_input = {};
|
OptixBuildInput build_input = {};
|
||||||
if (hair->curve_shape == CURVE_THICK) {
|
if (hair->curve_shape == CURVE_THICK) {
|
||||||
build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES;
|
build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES;
|
||||||
|
# if OPTIX_ABI_VERSION >= 55
|
||||||
|
build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
|
||||||
|
# else
|
||||||
build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
|
build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
|
||||||
|
# endif
|
||||||
build_input.curveArray.numPrimitives = num_segments;
|
build_input.curveArray.numPrimitives = num_segments;
|
||||||
build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
|
build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
|
||||||
build_input.curveArray.numVertices = num_vertices;
|
build_input.curveArray.numVertices = num_vertices;
|
||||||
@@ -1255,7 +1322,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* Disable visibility test any-hit program, since it is already checked during
|
/* Disable visibility test any-hit program, since it is already checked during
|
||||||
* intersection. Those trace calls that require anyhit can force it with a ray flag. */
|
* intersection. Those trace calls that require any-hit can force it with a ray flag. */
|
||||||
build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
|
build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
|
||||||
|
|
||||||
build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
|
build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
|
||||||
@@ -1335,6 +1402,86 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
build_input.triangleArray.numSbtRecords = 1;
|
build_input.triangleArray.numSbtRecords = 1;
|
||||||
build_input.triangleArray.primitiveIndexOffset = mesh->prim_offset;
|
build_input.triangleArray.primitiveIndexOffset = mesh->prim_offset;
|
||||||
|
|
||||||
|
if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
|
||||||
|
progress.set_error("Failed to build OptiX acceleration structure");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (geom->geometry_type == Geometry::POINTCLOUD) {
|
||||||
|
/* Build BLAS for points primitives. */
|
||||||
|
PointCloud *const pointcloud = static_cast<PointCloud *const>(geom);
|
||||||
|
const size_t num_points = pointcloud->num_points();
|
||||||
|
if (num_points == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t num_motion_steps = 1;
|
||||||
|
Attribute *motion_points = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||||
|
if (motion_blur && pointcloud->get_use_motion_blur() && motion_points) {
|
||||||
|
num_motion_steps = pointcloud->get_motion_steps();
|
||||||
|
}
|
||||||
|
|
||||||
|
device_vector<OptixAabb> aabb_data(this, "optix temp aabb data", MEM_READ_ONLY);
|
||||||
|
aabb_data.alloc(num_points * num_motion_steps);
|
||||||
|
|
||||||
|
/* Get AABBs for each motion step. */
|
||||||
|
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||||
|
/* The center step for motion vertices is not stored in the attribute. */
|
||||||
|
const float3 *points = pointcloud->get_points().data();
|
||||||
|
const float *radius = pointcloud->get_radius().data();
|
||||||
|
size_t center_step = (num_motion_steps - 1) / 2;
|
||||||
|
if (step != center_step) {
|
||||||
|
size_t attr_offset = (step > center_step) ? step - 1 : step;
|
||||||
|
/* Technically this is a float4 array, but sizeof(float3) == sizeof(float4). */
|
||||||
|
points = motion_points->data_float3() + attr_offset * num_points;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < num_points; ++i) {
|
||||||
|
const PointCloud::Point point = pointcloud->get_point(i);
|
||||||
|
BoundBox bounds = BoundBox::empty;
|
||||||
|
point.bounds_grow(points, radius, bounds);
|
||||||
|
|
||||||
|
const size_t index = step * num_points + i;
|
||||||
|
aabb_data[index].minX = bounds.min.x;
|
||||||
|
aabb_data[index].minY = bounds.min.y;
|
||||||
|
aabb_data[index].minZ = bounds.min.z;
|
||||||
|
aabb_data[index].maxX = bounds.max.x;
|
||||||
|
aabb_data[index].maxY = bounds.max.y;
|
||||||
|
aabb_data[index].maxZ = bounds.max.z;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Upload AABB data to GPU. */
|
||||||
|
aabb_data.copy_to_device();
|
||||||
|
|
||||||
|
vector<device_ptr> aabb_ptrs;
|
||||||
|
aabb_ptrs.reserve(num_motion_steps);
|
||||||
|
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||||
|
aabb_ptrs.push_back(aabb_data.device_pointer + step * num_points * sizeof(OptixAabb));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Disable visibility test any-hit program, since it is already checked during
|
||||||
|
* intersection. Those trace calls that require anyhit can force it with a ray flag.
|
||||||
|
* For those, force a single any-hit call, so shadow record-all behavior works correctly. */
|
||||||
|
unsigned int build_flags = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT |
|
||||||
|
OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
|
||||||
|
OptixBuildInput build_input = {};
|
||||||
|
build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
|
||||||
|
# if OPTIX_ABI_VERSION < 23
|
||||||
|
build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
|
||||||
|
build_input.aabbArray.numPrimitives = num_points;
|
||||||
|
build_input.aabbArray.strideInBytes = sizeof(OptixAabb);
|
||||||
|
build_input.aabbArray.flags = &build_flags;
|
||||||
|
build_input.aabbArray.numSbtRecords = 1;
|
||||||
|
build_input.aabbArray.primitiveIndexOffset = pointcloud->prim_offset;
|
||||||
|
# else
|
||||||
|
build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
|
||||||
|
build_input.customPrimitiveArray.numPrimitives = num_points;
|
||||||
|
build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb);
|
||||||
|
build_input.customPrimitiveArray.flags = &build_flags;
|
||||||
|
build_input.customPrimitiveArray.numSbtRecords = 1;
|
||||||
|
build_input.customPrimitiveArray.primitiveIndexOffset = pointcloud->prim_offset;
|
||||||
|
# endif
|
||||||
|
|
||||||
if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
|
if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
|
||||||
progress.set_error("Failed to build OptiX acceleration structure");
|
progress.set_error("Failed to build OptiX acceleration structure");
|
||||||
}
|
}
|
||||||
@@ -1422,9 +1569,22 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
|
instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else if (ob->get_geometry()->geometry_type == Geometry::POINTCLOUD) {
|
||||||
|
/* Use the hit group that has an intersection program for point clouds. */
|
||||||
|
instance.sbtOffset = PG_HITD_POINTCLOUD - PG_HITD;
|
||||||
|
|
||||||
|
/* Also skip point clouds in local trace calls. */
|
||||||
|
instance.visibilityMask |= 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
# if OPTIX_ABI_VERSION < 55
|
||||||
|
/* Cannot disable any-hit program for thick curves, since it needs to filter out end-caps. */
|
||||||
|
else
|
||||||
|
# endif
|
||||||
|
{
|
||||||
/* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves,
|
/* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves,
|
||||||
* since it needs to filter out end-caps there).
|
* since it needs to filter out end-caps there).
|
||||||
|
|
||||||
* It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit
|
* It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit
|
||||||
* programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT.
|
* programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT.
|
||||||
*/
|
*/
|
||||||
@@ -1494,9 +1654,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
cuMemcpyHtoD(motion_transform_gpu, &motion_transform, motion_transform_size);
|
cuMemcpyHtoD(motion_transform_gpu, &motion_transform, motion_transform_size);
|
||||||
delete[] reinterpret_cast<uint8_t *>(&motion_transform);
|
delete[] reinterpret_cast<uint8_t *>(&motion_transform);
|
||||||
|
|
||||||
/* Disable instance transform if object uses motion transform already. */
|
|
||||||
instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
|
|
||||||
|
|
||||||
/* Get traversable handle to motion transform. */
|
/* Get traversable handle to motion transform. */
|
||||||
optixConvertPointerToTraversableHandle(context,
|
optixConvertPointerToTraversableHandle(context,
|
||||||
motion_transform_gpu,
|
motion_transform_gpu,
|
||||||
@@ -1510,10 +1667,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||||||
/* Set transform matrix. */
|
/* Set transform matrix. */
|
||||||
memcpy(instance.transform, &ob->get_tfm(), sizeof(instance.transform));
|
memcpy(instance.transform, &ob->get_tfm(), sizeof(instance.transform));
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
/* Disable instance transform if geometry already has it applied to vertex data. */
|
|
||||||
instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -44,6 +44,8 @@ enum {
|
|||||||
PG_HITV, /* __VOLUME__ hit group. */
|
PG_HITV, /* __VOLUME__ hit group. */
|
||||||
PG_HITD_MOTION,
|
PG_HITD_MOTION,
|
||||||
PG_HITS_MOTION,
|
PG_HITS_MOTION,
|
||||||
|
PG_HITD_POINTCLOUD,
|
||||||
|
PG_HITS_POINTCLOUD,
|
||||||
PG_CALL_SVM_AO,
|
PG_CALL_SVM_AO,
|
||||||
PG_CALL_SVM_BEVEL,
|
PG_CALL_SVM_BEVEL,
|
||||||
NUM_PROGRAM_GROUPS
|
NUM_PROGRAM_GROUPS
|
||||||
@@ -52,9 +54,9 @@ enum {
|
|||||||
static const int MISS_PROGRAM_GROUP_OFFSET = PG_MISS;
|
static const int MISS_PROGRAM_GROUP_OFFSET = PG_MISS;
|
||||||
static const int NUM_MIS_PROGRAM_GROUPS = 1;
|
static const int NUM_MIS_PROGRAM_GROUPS = 1;
|
||||||
static const int HIT_PROGAM_GROUP_OFFSET = PG_HITD;
|
static const int HIT_PROGAM_GROUP_OFFSET = PG_HITD;
|
||||||
static const int NUM_HIT_PROGRAM_GROUPS = 6;
|
static const int NUM_HIT_PROGRAM_GROUPS = 8;
|
||||||
static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
|
static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
|
||||||
static const int NUM_CALLABLE_PROGRAM_GROUPS = 3;
|
static const int NUM_CALLABLE_PROGRAM_GROUPS = 2;
|
||||||
|
|
||||||
/* List of OptiX pipelines. */
|
/* List of OptiX pipelines. */
|
||||||
enum { PIP_SHADE_RAYTRACE, PIP_INTERSECT, NUM_PIPELINES };
|
enum { PIP_SHADE_RAYTRACE, PIP_INTERSECT, NUM_PIPELINES };
|
||||||
@@ -98,8 +100,7 @@ class OptiXDevice : public CUDADevice {
|
|||||||
/* OptiX denoiser state and scratch buffers, stored in a single memory buffer.
|
/* OptiX denoiser state and scratch buffers, stored in a single memory buffer.
|
||||||
* The memory layout goes as following: [denoiser state][scratch buffer]. */
|
* The memory layout goes as following: [denoiser state][scratch buffer]. */
|
||||||
device_only_memory<unsigned char> state;
|
device_only_memory<unsigned char> state;
|
||||||
size_t scratch_offset = 0;
|
OptixDenoiserSizes sizes = {};
|
||||||
size_t scratch_size = 0;
|
|
||||||
|
|
||||||
bool use_pass_albedo = false;
|
bool use_pass_albedo = false;
|
||||||
bool use_pass_normal = false;
|
bool use_pass_normal = false;
|
||||||
|
@@ -47,7 +47,9 @@ static bool is_optix_specific_kernel(DeviceKernel kernel)
|
|||||||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
|
bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
|
||||||
|
const int work_size,
|
||||||
|
DeviceKernelArguments const &args)
|
||||||
{
|
{
|
||||||
if (!is_optix_specific_kernel(kernel)) {
|
if (!is_optix_specific_kernel(kernel)) {
|
||||||
return CUDADeviceQueue::enqueue(kernel, work_size, args);
|
return CUDADeviceQueue::enqueue(kernel, work_size, args);
|
||||||
@@ -69,7 +71,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *a
|
|||||||
cuda_device_assert(
|
cuda_device_assert(
|
||||||
cuda_device_,
|
cuda_device_,
|
||||||
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, path_index_array),
|
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, path_index_array),
|
||||||
args[0], // &d_path_index
|
args.values[0], // &d_path_index
|
||||||
sizeof(device_ptr),
|
sizeof(device_ptr),
|
||||||
cuda_stream_));
|
cuda_stream_));
|
||||||
|
|
||||||
@@ -78,7 +80,7 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *a
|
|||||||
cuda_device_assert(
|
cuda_device_assert(
|
||||||
cuda_device_,
|
cuda_device_,
|
||||||
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
|
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
|
||||||
args[1], // &d_render_buffer
|
args.values[1], // &d_render_buffer
|
||||||
sizeof(device_ptr),
|
sizeof(device_ptr),
|
||||||
cuda_stream_));
|
cuda_stream_));
|
||||||
}
|
}
|
||||||
|
@@ -31,7 +31,9 @@ class OptiXDeviceQueue : public CUDADeviceQueue {
|
|||||||
|
|
||||||
virtual void init_execution() override;
|
virtual void init_execution() override;
|
||||||
|
|
||||||
virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;
|
virtual bool enqueue(DeviceKernel kernel,
|
||||||
|
const int work_size,
|
||||||
|
DeviceKernelArguments const &args) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -31,6 +31,72 @@ class device_memory;
|
|||||||
|
|
||||||
struct KernelWorkTile;
|
struct KernelWorkTile;
|
||||||
|
|
||||||
|
/* Container for device kernel arguments with type correctness ensured by API. */
|
||||||
|
struct DeviceKernelArguments {
|
||||||
|
|
||||||
|
enum Type {
|
||||||
|
POINTER,
|
||||||
|
INT32,
|
||||||
|
FLOAT32,
|
||||||
|
BOOLEAN,
|
||||||
|
KERNEL_FILM_CONVERT,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const int MAX_ARGS = 16;
|
||||||
|
Type types[MAX_ARGS];
|
||||||
|
void *values[MAX_ARGS];
|
||||||
|
size_t sizes[MAX_ARGS];
|
||||||
|
size_t count = 0;
|
||||||
|
|
||||||
|
DeviceKernelArguments()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> DeviceKernelArguments(const T *arg)
|
||||||
|
{
|
||||||
|
add(arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T, class... Args> DeviceKernelArguments(const T *first, Args... args)
|
||||||
|
{
|
||||||
|
add(first);
|
||||||
|
add(args...);
|
||||||
|
}
|
||||||
|
|
||||||
|
void add(const KernelFilmConvert *value)
|
||||||
|
{
|
||||||
|
add(KERNEL_FILM_CONVERT, value, sizeof(KernelFilmConvert));
|
||||||
|
}
|
||||||
|
void add(const device_ptr *value)
|
||||||
|
{
|
||||||
|
add(POINTER, value, sizeof(device_ptr));
|
||||||
|
}
|
||||||
|
void add(const int32_t *value)
|
||||||
|
{
|
||||||
|
add(INT32, value, sizeof(int32_t));
|
||||||
|
}
|
||||||
|
void add(const float *value)
|
||||||
|
{
|
||||||
|
add(FLOAT32, value, sizeof(float));
|
||||||
|
}
|
||||||
|
void add(const bool *value)
|
||||||
|
{
|
||||||
|
add(BOOLEAN, value, 4);
|
||||||
|
}
|
||||||
|
void add(const Type type, const void *value, size_t size)
|
||||||
|
{
|
||||||
|
types[count] = type;
|
||||||
|
values[count] = (void *)value;
|
||||||
|
sizes[count] = size;
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
template<typename T, typename... Args> void add(const T *first, Args... args)
|
||||||
|
{
|
||||||
|
add(first);
|
||||||
|
add(args...);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/* Abstraction of a command queue for a device.
|
/* Abstraction of a command queue for a device.
|
||||||
* Provides API to schedule kernel execution in a specific queue with minimal possible overhead
|
* Provides API to schedule kernel execution in a specific queue with minimal possible overhead
|
||||||
* from driver side.
|
* from driver side.
|
||||||
@@ -66,7 +132,9 @@ class DeviceQueue {
|
|||||||
* - int: pass pointer to the int
|
* - int: pass pointer to the int
|
||||||
* - device memory: pass pointer to device_memory.device_pointer
|
* - device memory: pass pointer to device_memory.device_pointer
|
||||||
* Return false if there was an error executing this or a previous kernel. */
|
* Return false if there was an error executing this or a previous kernel. */
|
||||||
virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) = 0;
|
virtual bool enqueue(DeviceKernel kernel,
|
||||||
|
const int work_size,
|
||||||
|
DeviceKernelArguments const &args) = 0;
|
||||||
|
|
||||||
/* Wait unit all enqueued kernels have finished execution.
|
/* Wait unit all enqueued kernels have finished execution.
|
||||||
* Return false if there was an error executing any of the enqueued kernels. */
|
* Return false if there was an error executing any of the enqueued kernels. */
|
||||||
|
@@ -31,7 +31,7 @@ struct Node;
|
|||||||
struct NodeType;
|
struct NodeType;
|
||||||
struct Transform;
|
struct Transform;
|
||||||
|
|
||||||
/* Note: in the following macros we use "type const &" instead of "const type &"
|
/* NOTE: in the following macros we use "type const &" instead of "const type &"
|
||||||
* to avoid issues when pasting a pointer type. */
|
* to avoid issues when pasting a pointer type. */
|
||||||
#define NODE_SOCKET_API_BASE_METHODS(type_, name, string_name) \
|
#define NODE_SOCKET_API_BASE_METHODS(type_, name, string_name) \
|
||||||
const SocketType *get_##name##_socket() const \
|
const SocketType *get_##name##_socket() const \
|
||||||
|
@@ -54,30 +54,30 @@ void PassAccessorGPU::run_film_convert_kernels(DeviceKernel kernel,
|
|||||||
if (destination.d_pixels) {
|
if (destination.d_pixels) {
|
||||||
DCHECK_EQ(destination.stride, 0) << "Custom stride for float destination is not implemented.";
|
DCHECK_EQ(destination.stride, 0) << "Custom stride for float destination is not implemented.";
|
||||||
|
|
||||||
void *args[] = {const_cast<KernelFilmConvert *>(&kfilm_convert),
|
DeviceKernelArguments args(&kfilm_convert,
|
||||||
const_cast<device_ptr *>(&destination.d_pixels),
|
&destination.d_pixels,
|
||||||
const_cast<device_ptr *>(&render_buffers->buffer.device_pointer),
|
&render_buffers->buffer.device_pointer,
|
||||||
const_cast<int *>(&work_size),
|
&work_size,
|
||||||
const_cast<int *>(&buffer_params.window_width),
|
&buffer_params.window_width,
|
||||||
const_cast<int *>(&offset),
|
&offset,
|
||||||
const_cast<int *>(&buffer_params.stride),
|
&buffer_params.stride,
|
||||||
const_cast<int *>(&destination.offset),
|
&destination.offset,
|
||||||
const_cast<int *>(&destination_stride)};
|
&destination_stride);
|
||||||
|
|
||||||
queue_->enqueue(kernel, work_size, args);
|
queue_->enqueue(kernel, work_size, args);
|
||||||
}
|
}
|
||||||
if (destination.d_pixels_half_rgba) {
|
if (destination.d_pixels_half_rgba) {
|
||||||
const DeviceKernel kernel_half_float = static_cast<DeviceKernel>(kernel + 1);
|
const DeviceKernel kernel_half_float = static_cast<DeviceKernel>(kernel + 1);
|
||||||
|
|
||||||
void *args[] = {const_cast<KernelFilmConvert *>(&kfilm_convert),
|
DeviceKernelArguments args(&kfilm_convert,
|
||||||
const_cast<device_ptr *>(&destination.d_pixels_half_rgba),
|
&destination.d_pixels_half_rgba,
|
||||||
const_cast<device_ptr *>(&render_buffers->buffer.device_pointer),
|
&render_buffers->buffer.device_pointer,
|
||||||
const_cast<int *>(&work_size),
|
&work_size,
|
||||||
const_cast<int *>(&buffer_params.window_width),
|
&buffer_params.window_width,
|
||||||
const_cast<int *>(&offset),
|
&offset,
|
||||||
const_cast<int *>(&buffer_params.stride),
|
&buffer_params.stride,
|
||||||
const_cast<int *>(&destination.offset),
|
&destination.offset,
|
||||||
const_cast<int *>(&destination_stride)};
|
&destination_stride);
|
||||||
|
|
||||||
queue_->enqueue(kernel_half_float, work_size, args);
|
queue_->enqueue(kernel_half_float, work_size, args);
|
||||||
}
|
}
|
||||||
|
@@ -482,7 +482,11 @@ void PathTrace::set_denoiser_params(const DenoiseParams ¶ms)
|
|||||||
}
|
}
|
||||||
|
|
||||||
denoiser_ = Denoiser::create(device_, params);
|
denoiser_ = Denoiser::create(device_, params);
|
||||||
denoiser_->is_cancelled_cb = [this]() { return is_cancel_requested(); };
|
|
||||||
|
/* Only take into account the "immediate" cancel to have interactive rendering responding to
|
||||||
|
* navigation as quickly as possible, but allow to run denoiser after user hit Esc button while
|
||||||
|
* doing offline rendering. */
|
||||||
|
denoiser_->is_cancelled_cb = [this]() { return render_cancel_.is_requested; };
|
||||||
}
|
}
|
||||||
|
|
||||||
void PathTrace::set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling)
|
void PathTrace::set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling)
|
||||||
@@ -1089,6 +1093,8 @@ static const char *device_type_for_description(const DeviceType type)
|
|||||||
return "Dummy";
|
return "Dummy";
|
||||||
case DEVICE_MULTI:
|
case DEVICE_MULTI:
|
||||||
return "Multi";
|
return "Multi";
|
||||||
|
case DEVICE_METAL:
|
||||||
|
return "Metal";
|
||||||
}
|
}
|
||||||
|
|
||||||
return "UNKNOWN";
|
return "UNKNOWN";
|
||||||
|
@@ -258,7 +258,8 @@ void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
|
|||||||
* become busy after adding new tiles). This is especially important for the shadow catcher which
|
* become busy after adding new tiles). This is especially important for the shadow catcher which
|
||||||
* schedules work in halves of available number of paths. */
|
* schedules work in halves of available number of paths. */
|
||||||
work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
|
work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8);
|
||||||
|
work_tile_scheduler_.set_accelerated_rt((device_->get_bvh_layout_mask() & BVH_LAYOUT_OPTIX) !=
|
||||||
|
0);
|
||||||
work_tile_scheduler_.reset(effective_buffer_params_,
|
work_tile_scheduler_.reset(effective_buffer_params_,
|
||||||
start_sample,
|
start_sample,
|
||||||
samples_num,
|
samples_num,
|
||||||
@@ -333,7 +334,8 @@ DeviceKernel PathTraceWorkGPU::get_most_queued_kernel() const
|
|||||||
|
|
||||||
void PathTraceWorkGPU::enqueue_reset()
|
void PathTraceWorkGPU::enqueue_reset()
|
||||||
{
|
{
|
||||||
void *args[] = {&max_num_paths_};
|
DeviceKernelArguments args(&max_num_paths_);
|
||||||
|
|
||||||
queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_RESET, max_num_paths_, args);
|
queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_RESET, max_num_paths_, args);
|
||||||
queue_->zero_to_device(integrator_queue_counter_);
|
queue_->zero_to_device(integrator_queue_counter_);
|
||||||
queue_->zero_to_device(integrator_shader_sort_counter_);
|
queue_->zero_to_device(integrator_shader_sort_counter_);
|
||||||
@@ -404,7 +406,7 @@ bool PathTraceWorkGPU::enqueue_path_iteration()
|
|||||||
|
|
||||||
void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num_paths_limit)
|
void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num_paths_limit)
|
||||||
{
|
{
|
||||||
void *d_path_index = (void *)NULL;
|
device_ptr d_path_index = 0;
|
||||||
|
|
||||||
/* Create array of path indices for which this kernel is queued to be executed. */
|
/* Create array of path indices for which this kernel is queued to be executed. */
|
||||||
int work_size = kernel_max_active_main_path_index(kernel);
|
int work_size = kernel_max_active_main_path_index(kernel);
|
||||||
@@ -415,14 +417,14 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
|
|||||||
if (kernel_uses_sorting(kernel)) {
|
if (kernel_uses_sorting(kernel)) {
|
||||||
/* Compute array of active paths, sorted by shader. */
|
/* Compute array of active paths, sorted by shader. */
|
||||||
work_size = num_queued;
|
work_size = num_queued;
|
||||||
d_path_index = (void *)queued_paths_.device_pointer;
|
d_path_index = queued_paths_.device_pointer;
|
||||||
|
|
||||||
compute_sorted_queued_paths(
|
compute_sorted_queued_paths(
|
||||||
DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY, kernel, num_paths_limit);
|
DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY, kernel, num_paths_limit);
|
||||||
}
|
}
|
||||||
else if (num_queued < work_size) {
|
else if (num_queued < work_size) {
|
||||||
work_size = num_queued;
|
work_size = num_queued;
|
||||||
d_path_index = (void *)queued_paths_.device_pointer;
|
d_path_index = queued_paths_.device_pointer;
|
||||||
|
|
||||||
if (kernel_is_shadow_path(kernel)) {
|
if (kernel_is_shadow_path(kernel)) {
|
||||||
/* Compute array of active shadow paths for specific kernel. */
|
/* Compute array of active shadow paths for specific kernel. */
|
||||||
@@ -441,8 +443,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
|
|||||||
switch (kernel) {
|
switch (kernel) {
|
||||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: {
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: {
|
||||||
/* Closest ray intersection kernels with integrator state and render buffer. */
|
/* Closest ray intersection kernels with integrator state and render buffer. */
|
||||||
void *d_render_buffer = (void *)buffers_->buffer.device_pointer;
|
DeviceKernelArguments args(&d_path_index, &buffers_->buffer.device_pointer, &work_size);
|
||||||
void *args[] = {&d_path_index, &d_render_buffer, const_cast<int *>(&work_size)};
|
|
||||||
|
|
||||||
queue_->enqueue(kernel, work_size, args);
|
queue_->enqueue(kernel, work_size, args);
|
||||||
break;
|
break;
|
||||||
@@ -452,7 +453,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
|
|||||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
|
||||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: {
|
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: {
|
||||||
/* Ray intersection kernels with integrator state. */
|
/* Ray intersection kernels with integrator state. */
|
||||||
void *args[] = {&d_path_index, const_cast<int *>(&work_size)};
|
DeviceKernelArguments args(&d_path_index, &work_size);
|
||||||
|
|
||||||
queue_->enqueue(kernel, work_size, args);
|
queue_->enqueue(kernel, work_size, args);
|
||||||
break;
|
break;
|
||||||
@@ -464,8 +465,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
|
|||||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
|
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
|
||||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME: {
|
case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME: {
|
||||||
/* Shading kernels with integrator state and render buffer. */
|
/* Shading kernels with integrator state and render buffer. */
|
||||||
void *d_render_buffer = (void *)buffers_->buffer.device_pointer;
|
DeviceKernelArguments args(&d_path_index, &buffers_->buffer.device_pointer, &work_size);
|
||||||
void *args[] = {&d_path_index, &d_render_buffer, const_cast<int *>(&work_size)};
|
|
||||||
|
|
||||||
queue_->enqueue(kernel, work_size, args);
|
queue_->enqueue(kernel, work_size, args);
|
||||||
break;
|
break;
|
||||||
@@ -483,15 +483,17 @@ void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel,
|
|||||||
const int num_paths_limit)
|
const int num_paths_limit)
|
||||||
{
|
{
|
||||||
int d_queued_kernel = queued_kernel;
|
int d_queued_kernel = queued_kernel;
|
||||||
void *d_counter = integrator_state_gpu_.sort_key_counter[d_queued_kernel];
|
device_ptr d_counter = (device_ptr)integrator_state_gpu_.sort_key_counter[d_queued_kernel];
|
||||||
void *d_prefix_sum = (void *)integrator_shader_sort_prefix_sum_.device_pointer;
|
device_ptr d_prefix_sum = integrator_shader_sort_prefix_sum_.device_pointer;
|
||||||
assert(d_counter != nullptr && d_prefix_sum != nullptr);
|
assert(d_counter != 0 && d_prefix_sum != 0);
|
||||||
|
|
||||||
/* Compute prefix sum of number of active paths with each shader. */
|
/* Compute prefix sum of number of active paths with each shader. */
|
||||||
{
|
{
|
||||||
const int work_size = 1;
|
const int work_size = 1;
|
||||||
int max_shaders = device_scene_->data.max_shaders;
|
int max_shaders = device_scene_->data.max_shaders;
|
||||||
void *args[] = {&d_counter, &d_prefix_sum, &max_shaders};
|
|
||||||
|
DeviceKernelArguments args(&d_counter, &d_prefix_sum, &max_shaders);
|
||||||
|
|
||||||
queue_->enqueue(DEVICE_KERNEL_PREFIX_SUM, work_size, args);
|
queue_->enqueue(DEVICE_KERNEL_PREFIX_SUM, work_size, args);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -506,15 +508,16 @@ void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel,
|
|||||||
* end of the array since compaction would need to do less work. */
|
* end of the array since compaction would need to do less work. */
|
||||||
const int work_size = kernel_max_active_main_path_index(queued_kernel);
|
const int work_size = kernel_max_active_main_path_index(queued_kernel);
|
||||||
|
|
||||||
void *d_queued_paths = (void *)queued_paths_.device_pointer;
|
device_ptr d_queued_paths = queued_paths_.device_pointer;
|
||||||
void *d_num_queued_paths = (void *)num_queued_paths_.device_pointer;
|
device_ptr d_num_queued_paths = num_queued_paths_.device_pointer;
|
||||||
void *args[] = {const_cast<int *>(&work_size),
|
|
||||||
const_cast<int *>(&num_paths_limit),
|
DeviceKernelArguments args(&work_size,
|
||||||
|
&num_paths_limit,
|
||||||
&d_queued_paths,
|
&d_queued_paths,
|
||||||
&d_num_queued_paths,
|
&d_num_queued_paths,
|
||||||
&d_counter,
|
&d_counter,
|
||||||
&d_prefix_sum,
|
&d_prefix_sum,
|
||||||
&d_queued_kernel};
|
&d_queued_kernel);
|
||||||
|
|
||||||
queue_->enqueue(kernel, work_size, args);
|
queue_->enqueue(kernel, work_size, args);
|
||||||
}
|
}
|
||||||
@@ -526,10 +529,10 @@ void PathTraceWorkGPU::compute_queued_paths(DeviceKernel kernel, DeviceKernel qu
|
|||||||
|
|
||||||
/* Launch kernel to fill the active paths arrays. */
|
/* Launch kernel to fill the active paths arrays. */
|
||||||
const int work_size = kernel_max_active_main_path_index(queued_kernel);
|
const int work_size = kernel_max_active_main_path_index(queued_kernel);
|
||||||
void *d_queued_paths = (void *)queued_paths_.device_pointer;
|
device_ptr d_queued_paths = queued_paths_.device_pointer;
|
||||||
void *d_num_queued_paths = (void *)num_queued_paths_.device_pointer;
|
device_ptr d_num_queued_paths = num_queued_paths_.device_pointer;
|
||||||
void *args[] = {
|
|
||||||
const_cast<int *>(&work_size), &d_queued_paths, &d_num_queued_paths, &d_queued_kernel};
|
DeviceKernelArguments args(&work_size, &d_queued_paths, &d_num_queued_paths, &d_queued_kernel);
|
||||||
|
|
||||||
queue_->zero_to_device(num_queued_paths_);
|
queue_->zero_to_device(num_queued_paths_);
|
||||||
queue_->enqueue(kernel, work_size, args);
|
queue_->enqueue(kernel, work_size, args);
|
||||||
@@ -605,15 +608,17 @@ void PathTraceWorkGPU::compact_paths(const int num_active_paths,
|
|||||||
{
|
{
|
||||||
/* Compact fragmented path states into the start of the array, moving any paths
|
/* Compact fragmented path states into the start of the array, moving any paths
|
||||||
* with index higher than the number of active paths into the gaps. */
|
* with index higher than the number of active paths into the gaps. */
|
||||||
void *d_compact_paths = (void *)queued_paths_.device_pointer;
|
device_ptr d_compact_paths = queued_paths_.device_pointer;
|
||||||
void *d_num_queued_paths = (void *)num_queued_paths_.device_pointer;
|
device_ptr d_num_queued_paths = num_queued_paths_.device_pointer;
|
||||||
|
|
||||||
/* Create array with terminated paths that we can write to. */
|
/* Create array with terminated paths that we can write to. */
|
||||||
{
|
{
|
||||||
/* TODO: can the work size be reduced here? */
|
/* TODO: can the work size be reduced here? */
|
||||||
int offset = num_active_paths;
|
int offset = num_active_paths;
|
||||||
int work_size = num_active_paths;
|
int work_size = num_active_paths;
|
||||||
void *args[] = {&work_size, &d_compact_paths, &d_num_queued_paths, &offset};
|
|
||||||
|
DeviceKernelArguments args(&work_size, &d_compact_paths, &d_num_queued_paths, &offset);
|
||||||
|
|
||||||
queue_->zero_to_device(num_queued_paths_);
|
queue_->zero_to_device(num_queued_paths_);
|
||||||
queue_->enqueue(terminated_paths_kernel, work_size, args);
|
queue_->enqueue(terminated_paths_kernel, work_size, args);
|
||||||
}
|
}
|
||||||
@@ -622,8 +627,10 @@ void PathTraceWorkGPU::compact_paths(const int num_active_paths,
|
|||||||
* than the number of active paths. */
|
* than the number of active paths. */
|
||||||
{
|
{
|
||||||
int work_size = max_active_path_index;
|
int work_size = max_active_path_index;
|
||||||
void *args[] = {
|
|
||||||
&work_size, &d_compact_paths, &d_num_queued_paths, const_cast<int *>(&num_active_paths)};
|
DeviceKernelArguments args(
|
||||||
|
&work_size, &d_compact_paths, &d_num_queued_paths, &num_active_paths);
|
||||||
|
|
||||||
queue_->zero_to_device(num_queued_paths_);
|
queue_->zero_to_device(num_queued_paths_);
|
||||||
queue_->enqueue(compact_paths_kernel, work_size, args);
|
queue_->enqueue(compact_paths_kernel, work_size, args);
|
||||||
}
|
}
|
||||||
@@ -638,8 +645,10 @@ void PathTraceWorkGPU::compact_paths(const int num_active_paths,
|
|||||||
int work_size = num_compact_paths;
|
int work_size = num_compact_paths;
|
||||||
int active_states_offset = 0;
|
int active_states_offset = 0;
|
||||||
int terminated_states_offset = num_active_paths;
|
int terminated_states_offset = num_active_paths;
|
||||||
void *args[] = {
|
|
||||||
&d_compact_paths, &active_states_offset, &terminated_states_offset, &work_size};
|
DeviceKernelArguments args(
|
||||||
|
&d_compact_paths, &active_states_offset, &terminated_states_offset, &work_size);
|
||||||
|
|
||||||
queue_->enqueue(compact_kernel, work_size, args);
|
queue_->enqueue(compact_kernel, work_size, args);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -768,14 +777,12 @@ void PathTraceWorkGPU::enqueue_work_tiles(DeviceKernel kernel,
|
|||||||
|
|
||||||
queue_->copy_to_device(work_tiles_);
|
queue_->copy_to_device(work_tiles_);
|
||||||
|
|
||||||
void *d_work_tiles = (void *)work_tiles_.device_pointer;
|
device_ptr d_work_tiles = work_tiles_.device_pointer;
|
||||||
void *d_render_buffer = (void *)buffers_->buffer.device_pointer;
|
device_ptr d_render_buffer = buffers_->buffer.device_pointer;
|
||||||
|
|
||||||
/* Launch kernel. */
|
/* Launch kernel. */
|
||||||
void *args[] = {&d_work_tiles,
|
DeviceKernelArguments args(
|
||||||
const_cast<int *>(&num_work_tiles),
|
&d_work_tiles, &num_work_tiles, &d_render_buffer, &max_tile_work_size);
|
||||||
&d_render_buffer,
|
|
||||||
const_cast<int *>(&max_tile_work_size)};
|
|
||||||
|
|
||||||
queue_->enqueue(kernel, max_tile_work_size * num_work_tiles, args);
|
queue_->enqueue(kernel, max_tile_work_size * num_work_tiles, args);
|
||||||
|
|
||||||
@@ -965,16 +972,16 @@ int PathTraceWorkGPU::adaptive_sampling_convergence_check_count_active(float thr
|
|||||||
|
|
||||||
const int work_size = effective_buffer_params_.width * effective_buffer_params_.height;
|
const int work_size = effective_buffer_params_.width * effective_buffer_params_.height;
|
||||||
|
|
||||||
void *args[] = {&buffers_->buffer.device_pointer,
|
DeviceKernelArguments args(&buffers_->buffer.device_pointer,
|
||||||
const_cast<int *>(&effective_buffer_params_.full_x),
|
&effective_buffer_params_.full_x,
|
||||||
const_cast<int *>(&effective_buffer_params_.full_y),
|
&effective_buffer_params_.full_y,
|
||||||
const_cast<int *>(&effective_buffer_params_.width),
|
&effective_buffer_params_.width,
|
||||||
const_cast<int *>(&effective_buffer_params_.height),
|
&effective_buffer_params_.height,
|
||||||
&threshold,
|
&threshold,
|
||||||
&reset,
|
&reset,
|
||||||
&effective_buffer_params_.offset,
|
&effective_buffer_params_.offset,
|
||||||
&effective_buffer_params_.stride,
|
&effective_buffer_params_.stride,
|
||||||
&num_active_pixels.device_pointer};
|
&num_active_pixels.device_pointer);
|
||||||
|
|
||||||
queue_->enqueue(DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_CHECK, work_size, args);
|
queue_->enqueue(DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_CHECK, work_size, args);
|
||||||
|
|
||||||
@@ -988,13 +995,13 @@ void PathTraceWorkGPU::enqueue_adaptive_sampling_filter_x()
|
|||||||
{
|
{
|
||||||
const int work_size = effective_buffer_params_.height;
|
const int work_size = effective_buffer_params_.height;
|
||||||
|
|
||||||
void *args[] = {&buffers_->buffer.device_pointer,
|
DeviceKernelArguments args(&buffers_->buffer.device_pointer,
|
||||||
&effective_buffer_params_.full_x,
|
&effective_buffer_params_.full_x,
|
||||||
&effective_buffer_params_.full_y,
|
&effective_buffer_params_.full_y,
|
||||||
&effective_buffer_params_.width,
|
&effective_buffer_params_.width,
|
||||||
&effective_buffer_params_.height,
|
&effective_buffer_params_.height,
|
||||||
&effective_buffer_params_.offset,
|
&effective_buffer_params_.offset,
|
||||||
&effective_buffer_params_.stride};
|
&effective_buffer_params_.stride);
|
||||||
|
|
||||||
queue_->enqueue(DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_X, work_size, args);
|
queue_->enqueue(DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_X, work_size, args);
|
||||||
}
|
}
|
||||||
@@ -1003,13 +1010,13 @@ void PathTraceWorkGPU::enqueue_adaptive_sampling_filter_y()
|
|||||||
{
|
{
|
||||||
const int work_size = effective_buffer_params_.width;
|
const int work_size = effective_buffer_params_.width;
|
||||||
|
|
||||||
void *args[] = {&buffers_->buffer.device_pointer,
|
DeviceKernelArguments args(&buffers_->buffer.device_pointer,
|
||||||
&effective_buffer_params_.full_x,
|
&effective_buffer_params_.full_x,
|
||||||
&effective_buffer_params_.full_y,
|
&effective_buffer_params_.full_y,
|
||||||
&effective_buffer_params_.width,
|
&effective_buffer_params_.width,
|
||||||
&effective_buffer_params_.height,
|
&effective_buffer_params_.height,
|
||||||
&effective_buffer_params_.offset,
|
&effective_buffer_params_.offset,
|
||||||
&effective_buffer_params_.stride};
|
&effective_buffer_params_.stride);
|
||||||
|
|
||||||
queue_->enqueue(DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_Y, work_size, args);
|
queue_->enqueue(DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_Y, work_size, args);
|
||||||
}
|
}
|
||||||
@@ -1018,10 +1025,10 @@ void PathTraceWorkGPU::cryptomatte_postproces()
|
|||||||
{
|
{
|
||||||
const int work_size = effective_buffer_params_.width * effective_buffer_params_.height;
|
const int work_size = effective_buffer_params_.width * effective_buffer_params_.height;
|
||||||
|
|
||||||
void *args[] = {&buffers_->buffer.device_pointer,
|
DeviceKernelArguments args(&buffers_->buffer.device_pointer,
|
||||||
const_cast<int *>(&work_size),
|
&work_size,
|
||||||
&effective_buffer_params_.offset,
|
&effective_buffer_params_.offset,
|
||||||
&effective_buffer_params_.stride};
|
&effective_buffer_params_.stride);
|
||||||
|
|
||||||
queue_->enqueue(DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS, work_size, args);
|
queue_->enqueue(DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS, work_size, args);
|
||||||
}
|
}
|
||||||
@@ -1070,8 +1077,9 @@ int PathTraceWorkGPU::shadow_catcher_count_possible_splits()
|
|||||||
queue_->zero_to_device(num_queued_paths_);
|
queue_->zero_to_device(num_queued_paths_);
|
||||||
|
|
||||||
const int work_size = max_active_main_path_index_;
|
const int work_size = max_active_main_path_index_;
|
||||||
void *d_num_queued_paths = (void *)num_queued_paths_.device_pointer;
|
device_ptr d_num_queued_paths = num_queued_paths_.device_pointer;
|
||||||
void *args[] = {const_cast<int *>(&work_size), &d_num_queued_paths};
|
|
||||||
|
DeviceKernelArguments args(&work_size, &d_num_queued_paths);
|
||||||
|
|
||||||
queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_SHADOW_CATCHER_COUNT_POSSIBLE_SPLITS, work_size, args);
|
queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_SHADOW_CATCHER_COUNT_POSSIBLE_SPLITS, work_size, args);
|
||||||
queue_->copy_from_device(num_queued_paths_);
|
queue_->copy_from_device(num_queued_paths_);
|
||||||
|
@@ -112,7 +112,7 @@ int RenderScheduler::get_rendered_sample() const
|
|||||||
{
|
{
|
||||||
DCHECK_GT(get_num_rendered_samples(), 0);
|
DCHECK_GT(get_num_rendered_samples(), 0);
|
||||||
|
|
||||||
return start_sample_ + get_num_rendered_samples() - 1;
|
return start_sample_ + get_num_rendered_samples() - 1 - sample_offset_;
|
||||||
}
|
}
|
||||||
|
|
||||||
int RenderScheduler::get_num_rendered_samples() const
|
int RenderScheduler::get_num_rendered_samples() const
|
||||||
@@ -406,6 +406,9 @@ bool RenderScheduler::set_postprocess_render_work(RenderWork *render_work)
|
|||||||
any_scheduled = true;
|
any_scheduled = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Force update. */
|
||||||
|
any_scheduled = true;
|
||||||
|
|
||||||
if (any_scheduled) {
|
if (any_scheduled) {
|
||||||
render_work->display.update = true;
|
render_work->display.update = true;
|
||||||
}
|
}
|
||||||
@@ -874,7 +877,8 @@ int RenderScheduler::get_num_samples_to_path_trace() const
|
|||||||
* is to ensure that the final render is pixel-matched regardless of how many samples per second
|
* is to ensure that the final render is pixel-matched regardless of how many samples per second
|
||||||
* compute device can do. */
|
* compute device can do. */
|
||||||
|
|
||||||
return adaptive_sampling_.align_samples(path_trace_start_sample, num_samples_to_render);
|
return adaptive_sampling_.align_samples(path_trace_start_sample - sample_offset_,
|
||||||
|
num_samples_to_render);
|
||||||
}
|
}
|
||||||
|
|
||||||
int RenderScheduler::get_num_samples_during_navigation(int resolution_divider) const
|
int RenderScheduler::get_num_samples_during_navigation(int resolution_divider) const
|
||||||
|
@@ -158,14 +158,16 @@ bool ShaderEval::eval_gpu(Device *device,
|
|||||||
|
|
||||||
/* Execute work on GPU in chunk, so we can cancel.
|
/* Execute work on GPU in chunk, so we can cancel.
|
||||||
* TODO : query appropriate size from device.*/
|
* TODO : query appropriate size from device.*/
|
||||||
const int64_t chunk_size = 65536;
|
const int32_t chunk_size = 65536;
|
||||||
|
|
||||||
void *d_input = (void *)input.device_pointer;
|
device_ptr d_input = input.device_pointer;
|
||||||
void *d_output = (void *)output.device_pointer;
|
device_ptr d_output = output.device_pointer;
|
||||||
|
|
||||||
for (int64_t d_offset = 0; d_offset < work_size; d_offset += chunk_size) {
|
assert(work_size <= 0x7fffffff);
|
||||||
int64_t d_work_size = std::min(chunk_size, work_size - d_offset);
|
for (int32_t d_offset = 0; d_offset < int32_t(work_size); d_offset += chunk_size) {
|
||||||
void *args[] = {&d_input, &d_output, &d_offset, &d_work_size};
|
int32_t d_work_size = std::min(chunk_size, int32_t(work_size) - d_offset);
|
||||||
|
|
||||||
|
DeviceKernelArguments args(&d_input, &d_output, &d_offset, &d_work_size);
|
||||||
|
|
||||||
queue->enqueue(kernel, d_work_size, args);
|
queue->enqueue(kernel, d_work_size, args);
|
||||||
queue->synchronize();
|
queue->synchronize();
|
||||||
|
@@ -46,7 +46,8 @@ ccl_device_inline uint round_up_to_power_of_two(uint x)
|
|||||||
return next_power_of_two(x);
|
return next_power_of_two(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
TileSize tile_calculate_best_size(const int2 &image_size,
|
TileSize tile_calculate_best_size(const bool accel_rt,
|
||||||
|
const int2 &image_size,
|
||||||
const int num_samples,
|
const int num_samples,
|
||||||
const int max_num_path_states,
|
const int max_num_path_states,
|
||||||
const float scrambling_distance)
|
const float scrambling_distance)
|
||||||
@@ -73,7 +74,7 @@ TileSize tile_calculate_best_size(const int2 &image_size,
|
|||||||
|
|
||||||
TileSize tile_size;
|
TileSize tile_size;
|
||||||
const int num_path_states_per_sample = max_num_path_states / num_samples;
|
const int num_path_states_per_sample = max_num_path_states / num_samples;
|
||||||
if (scrambling_distance < 0.9f) {
|
if (scrambling_distance < 0.9f && accel_rt) {
|
||||||
/* Prefer large tiles for scrambling distance, bounded by max num path states. */
|
/* Prefer large tiles for scrambling distance, bounded by max num path states. */
|
||||||
tile_size.width = min(image_size.x, max_num_path_states);
|
tile_size.width = min(image_size.x, max_num_path_states);
|
||||||
tile_size.height = min(image_size.y, max(max_num_path_states / tile_size.width, 1));
|
tile_size.height = min(image_size.y, max(max_num_path_states / tile_size.width, 1));
|
||||||
|
@@ -49,7 +49,8 @@ std::ostream &operator<<(std::ostream &os, const TileSize &tile_size);
|
|||||||
* of active path states.
|
* of active path states.
|
||||||
* Will attempt to provide best guess to keep path tracing threads of a device as localized as
|
* Will attempt to provide best guess to keep path tracing threads of a device as localized as
|
||||||
* possible, and have as many threads active for every tile as possible. */
|
* possible, and have as many threads active for every tile as possible. */
|
||||||
TileSize tile_calculate_best_size(const int2 &image_size,
|
TileSize tile_calculate_best_size(const bool accel_rt,
|
||||||
|
const int2 &image_size,
|
||||||
const int num_samples,
|
const int num_samples,
|
||||||
const int max_num_path_states,
|
const int max_num_path_states,
|
||||||
const float scrambling_distance);
|
const float scrambling_distance);
|
||||||
|
@@ -28,6 +28,11 @@ WorkTileScheduler::WorkTileScheduler()
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void WorkTileScheduler::set_accelerated_rt(bool accelerated_rt)
|
||||||
|
{
|
||||||
|
accelerated_rt_ = accelerated_rt;
|
||||||
|
}
|
||||||
|
|
||||||
void WorkTileScheduler::set_max_num_path_states(int max_num_path_states)
|
void WorkTileScheduler::set_max_num_path_states(int max_num_path_states)
|
||||||
{
|
{
|
||||||
max_num_path_states_ = max_num_path_states;
|
max_num_path_states_ = max_num_path_states;
|
||||||
@@ -61,7 +66,7 @@ void WorkTileScheduler::reset(const BufferParams &buffer_params,
|
|||||||
void WorkTileScheduler::reset_scheduler_state()
|
void WorkTileScheduler::reset_scheduler_state()
|
||||||
{
|
{
|
||||||
tile_size_ = tile_calculate_best_size(
|
tile_size_ = tile_calculate_best_size(
|
||||||
image_size_px_, samples_num_, max_num_path_states_, scrambling_distance_);
|
accelerated_rt_, image_size_px_, samples_num_, max_num_path_states_, scrambling_distance_);
|
||||||
|
|
||||||
VLOG(3) << "Will schedule tiles of size " << tile_size_;
|
VLOG(3) << "Will schedule tiles of size " << tile_size_;
|
||||||
|
|
||||||
|
@@ -31,6 +31,9 @@ class WorkTileScheduler {
|
|||||||
public:
|
public:
|
||||||
WorkTileScheduler();
|
WorkTileScheduler();
|
||||||
|
|
||||||
|
/* To indicate if there is accelerated RT support. */
|
||||||
|
void set_accelerated_rt(bool state);
|
||||||
|
|
||||||
/* MAximum path states which are allowed to be used by a single scheduled work tile.
|
/* MAximum path states which are allowed to be used by a single scheduled work tile.
|
||||||
*
|
*
|
||||||
* Affects the scheduled work size: the work size will be as big as possible, but will not exceed
|
* Affects the scheduled work size: the work size will be as big as possible, but will not exceed
|
||||||
@@ -55,6 +58,9 @@ class WorkTileScheduler {
|
|||||||
protected:
|
protected:
|
||||||
void reset_scheduler_state();
|
void reset_scheduler_state();
|
||||||
|
|
||||||
|
/* Used to indicate if there is accelerated ray tracing. */
|
||||||
|
bool accelerated_rt_ = false;
|
||||||
|
|
||||||
/* Maximum allowed path states to be used.
|
/* Maximum allowed path states to be used.
|
||||||
*
|
*
|
||||||
* TODO(sergey): Naming can be improved. The fact that this is a limiting factor based on the
|
* TODO(sergey): Naming can be improved. The fact that this is a limiting factor based on the
|
||||||
|
@@ -179,11 +179,14 @@ set(SRC_KERNEL_GEOM_HEADERS
|
|||||||
geom/curve.h
|
geom/curve.h
|
||||||
geom/curve_intersect.h
|
geom/curve_intersect.h
|
||||||
geom/motion_curve.h
|
geom/motion_curve.h
|
||||||
|
geom/motion_point.h
|
||||||
geom/motion_triangle.h
|
geom/motion_triangle.h
|
||||||
geom/motion_triangle_intersect.h
|
geom/motion_triangle_intersect.h
|
||||||
geom/motion_triangle_shader.h
|
geom/motion_triangle_shader.h
|
||||||
geom/object.h
|
geom/object.h
|
||||||
geom/patch.h
|
geom/patch.h
|
||||||
|
geom/point.h
|
||||||
|
geom/point_intersect.h
|
||||||
geom/primitive.h
|
geom/primitive.h
|
||||||
geom/shader_data.h
|
geom/shader_data.h
|
||||||
geom/subd_triangle.h
|
geom/subd_triangle.h
|
||||||
@@ -207,6 +210,7 @@ set(SRC_KERNEL_BVH_HEADERS
|
|||||||
bvh/volume.h
|
bvh/volume.h
|
||||||
bvh/volume_all.h
|
bvh/volume_all.h
|
||||||
bvh/embree.h
|
bvh/embree.h
|
||||||
|
bvh/metal.h
|
||||||
)
|
)
|
||||||
|
|
||||||
set(SRC_KERNEL_CAMERA_HEADERS
|
set(SRC_KERNEL_CAMERA_HEADERS
|
||||||
|
@@ -31,6 +31,10 @@
|
|||||||
# include "kernel/bvh/embree.h"
|
# include "kernel/bvh/embree.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __METALRT__
|
||||||
|
# include "kernel/bvh/metal.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "kernel/bvh/types.h"
|
#include "kernel/bvh/types.h"
|
||||||
#include "kernel/bvh/util.h"
|
#include "kernel/bvh/util.h"
|
||||||
|
|
||||||
@@ -38,31 +42,31 @@
|
|||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
#ifndef __KERNEL_OPTIX__
|
#if !defined(__KERNEL_GPU_RAYTRACING__)
|
||||||
|
|
||||||
/* Regular BVH traversal */
|
/* Regular BVH traversal */
|
||||||
|
|
||||||
# include "kernel/bvh/nodes.h"
|
# include "kernel/bvh/nodes.h"
|
||||||
|
|
||||||
# define BVH_FUNCTION_NAME bvh_intersect
|
# define BVH_FUNCTION_NAME bvh_intersect
|
||||||
# define BVH_FUNCTION_FEATURES 0
|
# define BVH_FUNCTION_FEATURES BVH_POINTCLOUD
|
||||||
# include "kernel/bvh/traversal.h"
|
# include "kernel/bvh/traversal.h"
|
||||||
|
|
||||||
# if defined(__HAIR__)
|
# if defined(__HAIR__)
|
||||||
# define BVH_FUNCTION_NAME bvh_intersect_hair
|
# define BVH_FUNCTION_NAME bvh_intersect_hair
|
||||||
# define BVH_FUNCTION_FEATURES BVH_HAIR
|
# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_POINTCLOUD
|
||||||
# include "kernel/bvh/traversal.h"
|
# include "kernel/bvh/traversal.h"
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
# if defined(__OBJECT_MOTION__)
|
# if defined(__OBJECT_MOTION__)
|
||||||
# define BVH_FUNCTION_NAME bvh_intersect_motion
|
# define BVH_FUNCTION_NAME bvh_intersect_motion
|
||||||
# define BVH_FUNCTION_FEATURES BVH_MOTION
|
# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_POINTCLOUD
|
||||||
# include "kernel/bvh/traversal.h"
|
# include "kernel/bvh/traversal.h"
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
|
# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
|
||||||
# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
|
# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
|
||||||
# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION
|
# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION | BVH_POINTCLOUD
|
||||||
# include "kernel/bvh/traversal.h"
|
# include "kernel/bvh/traversal.h"
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
@@ -98,26 +102,27 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
# if defined(__SHADOW_RECORD_ALL__)
|
# if defined(__SHADOW_RECORD_ALL__)
|
||||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
|
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
|
||||||
# define BVH_FUNCTION_FEATURES 0
|
# define BVH_FUNCTION_FEATURES BVH_POINTCLOUD
|
||||||
# include "kernel/bvh/shadow_all.h"
|
# include "kernel/bvh/shadow_all.h"
|
||||||
|
|
||||||
# if defined(__HAIR__)
|
# if defined(__HAIR__)
|
||||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
|
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
|
||||||
# define BVH_FUNCTION_FEATURES BVH_HAIR
|
# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_POINTCLOUD
|
||||||
# include "kernel/bvh/shadow_all.h"
|
# include "kernel/bvh/shadow_all.h"
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
# if defined(__OBJECT_MOTION__)
|
# if defined(__OBJECT_MOTION__)
|
||||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
|
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
|
||||||
# define BVH_FUNCTION_FEATURES BVH_MOTION
|
# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_POINTCLOUD
|
||||||
# include "kernel/bvh/shadow_all.h"
|
# include "kernel/bvh/shadow_all.h"
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
|
# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
|
||||||
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
|
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
|
||||||
# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION
|
# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION | BVH_POINTCLOUD
|
||||||
# include "kernel/bvh/shadow_all.h"
|
# include "kernel/bvh/shadow_all.h"
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
# endif /* __SHADOW_RECORD_ALL__ */
|
# endif /* __SHADOW_RECORD_ALL__ */
|
||||||
|
|
||||||
/* Record all intersections - Volume BVH traversal. */
|
/* Record all intersections - Volume BVH traversal. */
|
||||||
@@ -139,7 +144,7 @@ CCL_NAMESPACE_BEGIN
|
|||||||
# undef BVH_NAME_EVAL
|
# undef BVH_NAME_EVAL
|
||||||
# undef BVH_FUNCTION_FULL_NAME
|
# undef BVH_FUNCTION_FULL_NAME
|
||||||
|
|
||||||
#endif /* __KERNEL_OPTIX__ */
|
#endif /* !defined(__KERNEL_GPU_RAYTRACING__) */
|
||||||
|
|
||||||
ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray)
|
ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray)
|
||||||
{
|
{
|
||||||
@@ -205,7 +210,95 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
|||||||
isect->type = p5;
|
isect->type = p5;
|
||||||
|
|
||||||
return p5 != PRIMITIVE_NONE;
|
return p5 != PRIMITIVE_NONE;
|
||||||
#else /* __KERNEL_OPTIX__ */
|
#elif defined(__METALRT__)
|
||||||
|
|
||||||
|
if (!scene_intersect_valid(ray)) {
|
||||||
|
isect->t = ray->t;
|
||||||
|
isect->type = PRIMITIVE_NONE;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
# if defined(__KERNEL_DEBUG__)
|
||||||
|
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
|
||||||
|
isect->t = ray->t;
|
||||||
|
isect->type = PRIMITIVE_NONE;
|
||||||
|
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
|
||||||
|
isect->t = ray->t;
|
||||||
|
isect->type = PRIMITIVE_NONE;
|
||||||
|
kernel_assert(!"Invalid ift_default");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
|
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
|
||||||
|
metalrt_intersector_type metalrt_intersect;
|
||||||
|
|
||||||
|
if (!kernel_data.bvh.have_curves) {
|
||||||
|
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||||
|
}
|
||||||
|
|
||||||
|
MetalRTIntersectionPayload payload;
|
||||||
|
payload.u = 0.0f;
|
||||||
|
payload.v = 0.0f;
|
||||||
|
payload.visibility = visibility;
|
||||||
|
|
||||||
|
typename metalrt_intersector_type::result_type intersection;
|
||||||
|
|
||||||
|
uint ray_mask = visibility & 0xFF;
|
||||||
|
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
|
||||||
|
ray_mask = 0xFF;
|
||||||
|
/* No further intersector setup required: Default MetalRT behavior is any-hit. */
|
||||||
|
}
|
||||||
|
else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
|
||||||
|
/* No further intersector setup required: Shadow ray early termination is controlled by the
|
||||||
|
* intersection handler */
|
||||||
|
}
|
||||||
|
|
||||||
|
# if defined(__METALRT_MOTION__)
|
||||||
|
payload.time = ray->time;
|
||||||
|
intersection = metalrt_intersect.intersect(r,
|
||||||
|
metal_ancillaries->accel_struct,
|
||||||
|
ray_mask,
|
||||||
|
ray->time,
|
||||||
|
metal_ancillaries->ift_default,
|
||||||
|
payload);
|
||||||
|
# else
|
||||||
|
intersection = metalrt_intersect.intersect(
|
||||||
|
r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
|
||||||
|
# endif
|
||||||
|
|
||||||
|
if (intersection.type == intersection_type::none) {
|
||||||
|
isect->t = ray->t;
|
||||||
|
isect->type = PRIMITIVE_NONE;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
isect->t = intersection.distance;
|
||||||
|
|
||||||
|
isect->prim = payload.prim;
|
||||||
|
isect->type = payload.type;
|
||||||
|
isect->object = intersection.user_instance_id;
|
||||||
|
|
||||||
|
isect->t = intersection.distance;
|
||||||
|
if (intersection.type == intersection_type::triangle) {
|
||||||
|
isect->u = 1.0f - intersection.triangle_barycentric_coord.y -
|
||||||
|
intersection.triangle_barycentric_coord.x;
|
||||||
|
isect->v = intersection.triangle_barycentric_coord.x;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
isect->u = payload.u;
|
||||||
|
isect->v = payload.v;
|
||||||
|
}
|
||||||
|
|
||||||
|
return isect->type != PRIMITIVE_NONE;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
if (!scene_intersect_valid(ray)) {
|
if (!scene_intersect_valid(ray)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -289,7 +382,69 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
|||||||
p5);
|
p5);
|
||||||
|
|
||||||
return p5;
|
return p5;
|
||||||
# else /* __KERNEL_OPTIX__ */
|
# elif defined(__METALRT__)
|
||||||
|
if (!scene_intersect_valid(ray)) {
|
||||||
|
if (local_isect) {
|
||||||
|
local_isect->num_hits = 0;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
# if defined(__KERNEL_DEBUG__)
|
||||||
|
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
|
||||||
|
if (local_isect) {
|
||||||
|
local_isect->num_hits = 0;
|
||||||
|
}
|
||||||
|
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_null_intersection_function_table(metal_ancillaries->ift_local)) {
|
||||||
|
if (local_isect) {
|
||||||
|
local_isect->num_hits = 0;
|
||||||
|
}
|
||||||
|
kernel_assert(!"Invalid ift_local");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
|
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
|
||||||
|
metalrt_intersector_type metalrt_intersect;
|
||||||
|
|
||||||
|
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||||
|
if (!kernel_data.bvh.have_curves) {
|
||||||
|
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||||
|
}
|
||||||
|
|
||||||
|
MetalRTIntersectionLocalPayload payload;
|
||||||
|
payload.local_object = local_object;
|
||||||
|
payload.max_hits = max_hits;
|
||||||
|
payload.local_isect.num_hits = 0;
|
||||||
|
if (lcg_state) {
|
||||||
|
payload.has_lcg_state = true;
|
||||||
|
payload.lcg_state = *lcg_state;
|
||||||
|
}
|
||||||
|
payload.result = false;
|
||||||
|
|
||||||
|
typename metalrt_intersector_type::result_type intersection;
|
||||||
|
|
||||||
|
# if defined(__METALRT_MOTION__)
|
||||||
|
intersection = metalrt_intersect.intersect(
|
||||||
|
r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload);
|
||||||
|
# else
|
||||||
|
intersection = metalrt_intersect.intersect(
|
||||||
|
r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload);
|
||||||
|
# endif
|
||||||
|
|
||||||
|
if (lcg_state) {
|
||||||
|
*lcg_state = payload.lcg_state;
|
||||||
|
}
|
||||||
|
*local_isect = payload.local_isect;
|
||||||
|
|
||||||
|
return payload.result;
|
||||||
|
|
||||||
|
# else
|
||||||
|
|
||||||
if (!scene_intersect_valid(ray)) {
|
if (!scene_intersect_valid(ray)) {
|
||||||
if (local_isect) {
|
if (local_isect) {
|
||||||
local_isect->num_hits = 0;
|
local_isect->num_hits = 0;
|
||||||
@@ -406,7 +561,67 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
|||||||
*throughput = __uint_as_float(p1);
|
*throughput = __uint_as_float(p1);
|
||||||
|
|
||||||
return p5;
|
return p5;
|
||||||
# else /* __KERNEL_OPTIX__ */
|
# elif defined(__METALRT__)
|
||||||
|
|
||||||
|
if (!scene_intersect_valid(ray)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
# if defined(__KERNEL_DEBUG__)
|
||||||
|
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
|
||||||
|
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_null_intersection_function_table(metal_ancillaries->ift_shadow)) {
|
||||||
|
kernel_assert(!"Invalid ift_shadow");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
|
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
|
||||||
|
metalrt_intersector_type metalrt_intersect;
|
||||||
|
|
||||||
|
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||||
|
if (!kernel_data.bvh.have_curves) {
|
||||||
|
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||||
|
}
|
||||||
|
|
||||||
|
MetalRTIntersectionShadowPayload payload;
|
||||||
|
payload.visibility = visibility;
|
||||||
|
payload.max_hits = max_hits;
|
||||||
|
payload.num_hits = 0;
|
||||||
|
payload.num_recorded_hits = 0;
|
||||||
|
payload.throughput = 1.0f;
|
||||||
|
payload.result = false;
|
||||||
|
payload.state = state;
|
||||||
|
|
||||||
|
uint ray_mask = visibility & 0xFF;
|
||||||
|
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
|
||||||
|
ray_mask = 0xFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
typename metalrt_intersector_type::result_type intersection;
|
||||||
|
|
||||||
|
# if defined(__METALRT_MOTION__)
|
||||||
|
payload.time = ray->time;
|
||||||
|
intersection = metalrt_intersect.intersect(r,
|
||||||
|
metal_ancillaries->accel_struct,
|
||||||
|
ray_mask,
|
||||||
|
ray->time,
|
||||||
|
metal_ancillaries->ift_shadow,
|
||||||
|
payload);
|
||||||
|
# else
|
||||||
|
intersection = metalrt_intersect.intersect(
|
||||||
|
r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_shadow, payload);
|
||||||
|
# endif
|
||||||
|
|
||||||
|
*num_recorded_hits = payload.num_recorded_hits;
|
||||||
|
*throughput = payload.throughput;
|
||||||
|
|
||||||
|
return payload.result;
|
||||||
|
|
||||||
|
# else
|
||||||
if (!scene_intersect_valid(ray)) {
|
if (!scene_intersect_valid(ray)) {
|
||||||
*num_recorded_hits = 0;
|
*num_recorded_hits = 0;
|
||||||
*throughput = 1.0f;
|
*throughput = 1.0f;
|
||||||
@@ -503,7 +718,76 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
|
|||||||
isect->type = p5;
|
isect->type = p5;
|
||||||
|
|
||||||
return p5 != PRIMITIVE_NONE;
|
return p5 != PRIMITIVE_NONE;
|
||||||
# else /* __KERNEL_OPTIX__ */
|
# elif defined(__METALRT__)
|
||||||
|
|
||||||
|
if (!scene_intersect_valid(ray)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
# if defined(__KERNEL_DEBUG__)
|
||||||
|
if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
|
||||||
|
kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
|
||||||
|
kernel_assert(!"Invalid ift_default");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
|
metal::raytracing::ray r(ray->P, ray->D, 0.0f, ray->t);
|
||||||
|
metalrt_intersector_type metalrt_intersect;
|
||||||
|
|
||||||
|
metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
|
||||||
|
if (!kernel_data.bvh.have_curves) {
|
||||||
|
metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
|
||||||
|
}
|
||||||
|
|
||||||
|
MetalRTIntersectionPayload payload;
|
||||||
|
payload.visibility = visibility;
|
||||||
|
|
||||||
|
typename metalrt_intersector_type::result_type intersection;
|
||||||
|
|
||||||
|
uint ray_mask = visibility & 0xFF;
|
||||||
|
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
|
||||||
|
ray_mask = 0xFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
# if defined(__METALRT_MOTION__)
|
||||||
|
payload.time = ray->time;
|
||||||
|
intersection = metalrt_intersect.intersect(r,
|
||||||
|
metal_ancillaries->accel_struct,
|
||||||
|
ray_mask,
|
||||||
|
ray->time,
|
||||||
|
metal_ancillaries->ift_default,
|
||||||
|
payload);
|
||||||
|
# else
|
||||||
|
intersection = metalrt_intersect.intersect(
|
||||||
|
r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
|
||||||
|
# endif
|
||||||
|
|
||||||
|
if (intersection.type == intersection_type::none) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
isect->prim = payload.prim;
|
||||||
|
isect->type = payload.type;
|
||||||
|
isect->object = intersection.user_instance_id;
|
||||||
|
|
||||||
|
isect->t = intersection.distance;
|
||||||
|
if (intersection.type == intersection_type::triangle) {
|
||||||
|
isect->u = 1.0f - intersection.triangle_barycentric_coord.y -
|
||||||
|
intersection.triangle_barycentric_coord.x;
|
||||||
|
isect->v = intersection.triangle_barycentric_coord.x;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
isect->u = payload.u;
|
||||||
|
isect->v = payload.v;
|
||||||
|
}
|
||||||
|
|
||||||
|
return isect->type != PRIMITIVE_NONE;
|
||||||
|
|
||||||
|
# else
|
||||||
if (!scene_intersect_valid(ray)) {
|
if (!scene_intersect_valid(ray)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
47
intern/cycles/kernel/bvh/metal.h
Normal file
47
intern/cycles/kernel/bvh/metal.h
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2021 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct MetalRTIntersectionPayload {
|
||||||
|
uint visibility;
|
||||||
|
float u, v;
|
||||||
|
int prim;
|
||||||
|
int type;
|
||||||
|
#if defined(__METALRT_MOTION__)
|
||||||
|
float time;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MetalRTIntersectionLocalPayload {
|
||||||
|
uint local_object;
|
||||||
|
uint lcg_state;
|
||||||
|
short max_hits;
|
||||||
|
bool has_lcg_state;
|
||||||
|
bool result;
|
||||||
|
LocalIntersection local_isect;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MetalRTIntersectionShadowPayload {
|
||||||
|
uint visibility;
|
||||||
|
#if defined(__METALRT_MOTION__)
|
||||||
|
float time;
|
||||||
|
#endif
|
||||||
|
int state;
|
||||||
|
float throughput;
|
||||||
|
short max_hits;
|
||||||
|
short num_hits;
|
||||||
|
short num_recorded_hits;
|
||||||
|
bool result;
|
||||||
|
};
|
@@ -28,6 +28,7 @@
|
|||||||
* without new features slowing things down.
|
* without new features slowing things down.
|
||||||
*
|
*
|
||||||
* BVH_HAIR: hair curve rendering
|
* BVH_HAIR: hair curve rendering
|
||||||
|
* BVH_POINTCLOUD: point cloud rendering
|
||||||
* BVH_MOTION: motion blur rendering
|
* BVH_MOTION: motion blur rendering
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -173,7 +174,7 @@ ccl_device_inline
|
|||||||
case PRIMITIVE_MOTION_CURVE_THICK:
|
case PRIMITIVE_MOTION_CURVE_THICK:
|
||||||
case PRIMITIVE_CURVE_RIBBON:
|
case PRIMITIVE_CURVE_RIBBON:
|
||||||
case PRIMITIVE_MOTION_CURVE_RIBBON: {
|
case PRIMITIVE_MOTION_CURVE_RIBBON: {
|
||||||
if ((type & PRIMITIVE_ALL_MOTION) && kernel_data.bvh.use_bvh_steps) {
|
if ((type & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) {
|
||||||
const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr);
|
const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr);
|
||||||
if (ray->time < prim_time.x || ray->time > prim_time.y) {
|
if (ray->time < prim_time.x || ray->time > prim_time.y) {
|
||||||
hit = false;
|
hit = false;
|
||||||
@@ -199,6 +200,34 @@ ccl_device_inline
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if BVH_FEATURE(BVH_POINTCLOUD)
|
||||||
|
case PRIMITIVE_POINT:
|
||||||
|
case PRIMITIVE_MOTION_POINT: {
|
||||||
|
if ((type & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) {
|
||||||
|
const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr);
|
||||||
|
if (ray->time < prim_time.x || ray->time > prim_time.y) {
|
||||||
|
hit = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const int point_object = (object == OBJECT_NONE) ?
|
||||||
|
kernel_tex_fetch(__prim_object, prim_addr) :
|
||||||
|
object;
|
||||||
|
const int point_prim = kernel_tex_fetch(__prim_index, prim_addr);
|
||||||
|
const int point_type = kernel_tex_fetch(__prim_type, prim_addr);
|
||||||
|
hit = point_intersect(kg,
|
||||||
|
&isect,
|
||||||
|
P,
|
||||||
|
dir,
|
||||||
|
t_max_current,
|
||||||
|
point_object,
|
||||||
|
point_prim,
|
||||||
|
ray->time,
|
||||||
|
point_type);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif /* BVH_FEATURE(BVH_POINTCLOUD) */
|
||||||
default: {
|
default: {
|
||||||
hit = false;
|
hit = false;
|
||||||
break;
|
break;
|
||||||
@@ -226,7 +255,7 @@ ccl_device_inline
|
|||||||
bool record_intersection = true;
|
bool record_intersection = true;
|
||||||
|
|
||||||
/* Always use baked shadow transparency for curves. */
|
/* Always use baked shadow transparency for curves. */
|
||||||
if (isect.type & PRIMITIVE_ALL_CURVE) {
|
if (isect.type & PRIMITIVE_CURVE) {
|
||||||
*throughput *= intersection_curve_shadow_transparency(
|
*throughput *= intersection_curve_shadow_transparency(
|
||||||
kg, isect.object, isect.prim, isect.u);
|
kg, isect.object, isect.prim, isect.u);
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user